In [None]:
!pip install gym[atari]
!pip install gym[accept-rom-license]



In [None]:
import random
import numpy as np
import torch
import torch.nn as nn
import gym
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import convolve, gaussian

import os
import io
import base64
import time
import glob
from IPython.display import HTML
import torch.nn.functional as F
from gym.wrappers import AtariPreprocessing
from gym.wrappers import FrameStack
from gym.wrappers import TransformReward


In [None]:
def make_env(env_name, clip_rewards = True, seed = None):
	# complete this function which returns an object 'env' using gym module
	# Use AtariPreprocessing, FrameStack, TransformReward(based on the clip_rewards variable passed in the arguments of the function), check their usage from internet
	# Use FrameStack to stack 4 frames
	# TODO
  env = gym.make(env_name)
  env = AtariPreprocessing(env)
  env = FrameStack(env, num_stack=4)
  if clip_rewards:
        env = TransformReward(env, lambda r: np.sign(r))
  if seed is not None:
        env.seed(seed)
  return env

# Initialize the device based on CUDA availability
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')


In [None]:
# Next we create a class DQNAgent which is the class containing the neural network, This class is derived from nn.Module

class DQNAgent(nn.Module):
    def __init__(self, state_shape, n_actions, epsilon=0):

        super().__init__()
        self.epsilon = epsilon
        self.n_actions = n_actions
        self.state_shape = state_shape

        state_dim = state_shape[0]
        # a simple NN with state_dim as input vector (inout is state s)
        # and self.n_actions as output vector of logits of q(s, a)
        self.network = nn.Sequential()
        self.network.add_module('conv1', nn.Conv2d(4,16,kernel_size=8, stride=4))
        self.network.add_module('relu1', nn.ReLU())
        self.network.add_module('conv2', nn.Conv2d(16,32,kernel_size=4, stride=2))
        self.network.add_module('relu2', nn.ReLU())
        self.network.add_module('flatten', nn.Flatten())
        self.network.add_module('linear3', nn.Linear(2592, 256)) #2592 calculated above
        self.network.add_module('relu3', nn.ReLU())
        self.network.add_module('linear4', nn.Linear(256, n_actions))

        self.parameters = self.network.parameters

    def forward(self, state_t):
        # pass the state at time t through the newrok to get Q(s,a)
        qvalues = self.network(state_t)
        return qvalues

    def get_qvalues(self, states):
        # input is an array of states in numpy and outout is Qvals as numpy array
        states = torch.tensor(states, device=device, dtype=torch.float32)
        qvalues = self.forward(states)
        return qvalues.data.cpu().numpy()

    def sample_actions(self, qvalues):
    # sample actions from a batch of q_values using epsilon greedy policy
       epsilon = self.epsilon
       if qvalues.ndim == 1:
          qvalues = qvalues.reshape(1, -1)  # Reshape to make it a 2D array with batch size 1

       batch_size, n_actions = qvalues.shape
       random_actions = np.random.choice(n_actions, size=batch_size)
       best_actions = qvalues.argmax(axis=-1)
       should_explore = np.random.choice([0, 1], batch_size, p=[1 - epsilon, epsilon])
       return np.where(should_explore, random_actions, best_actions)




def evaluate(env, agent, n_games=1, greedy=False, t_max=10000):
	# used for evaluationing the trained agent for number of games = n_games and step in each game = t_max
	# returns the mean of sum of all rewards across n_games
	#TODO
    total_rewards = 0
    for i in range(n_games):
        state = env.reset()
        done = False
        t = 0

        while not done and t < t_max:
            q_values = agent.get_qvalues(torch.tensor(state, dtype=torch.float32).unsqueeze(0))

            if greedy:
                action = q_values.argmax().item()
            else:
                action = agent.sample_actions(q_values)[0]

            next_state, reward, done, i = env.step(action)
            total_rewards += reward
            state = next_state
            t += 1

    return total_rewards / n_games

In [None]:
# Now we create a class ReplayBuffer. The object of this class is responsible for storing the buffer information based on the agent's action when we play the agent(i.e, current_State -> action -> next_state -> done_flag ->reward)
# For Deep Q Learning we sample information of size = 'batch_size' from the ReplayBuffer and return that information for training
# This buffer has a fixed size, set that to 10**6. remove previous information as new information is passed in the buffer


class ReplayBuffer:
	def __init__(self, size):
		#TODO
		# size is the maximum size that the buffer can hold
		self.size=size
		self.buffer=[]
		self.position=0


	def __len__(self):
		# no need to change
		return len(self.buffer)

	def add(self, state, action ,reward, next_state, done):
		experience=(state, action ,reward, next_state, done)
		if len(self.buffer)<self.size:
			self.buffer.append(experience)
		else:
			self.buffer[self.position] = experience

		self.position = (self.position + 1) % self.size
		#TODO
		# store the information passed in one call to add as 1 unit of informmation




	def sample(self, batch_size):
		#TODO
		# return a random sampling of 'batch_size' units of information
		batch=random.sample(self.buffer,min(batch_size,len(self.buffer)))
		return batch


In [None]:
def play_and_record(start_state, agent, env, exp_replay, n_steps = 1):
	state = start_state
	for _ in range(n_steps):
		state_t = torch.tensor([state], dtype=torch.float32, device=device)
		qvalues_t = agent(state_t)
		qvalues = qvalues_t.cpu().detach().numpy()[0]
		action = agent.sample_actions(qvalues)[0]
		next_state, reward, done, _ = env.step(action)
		exp_replay.add(state, action, reward, next_state, done)
		if done:
			state = env.reset()
		else:
			state = next_state



	# use this function to make the agent play on the env and store the information in exp_replay which is an object of class ReplayBuffer
	# n_steps is the number of steps to be played in this function on one call
	#TODO
	# pass


def compute_td_loss(agent, target_network, device, batch_size, exp_replay ,gamma = 0.99,):
	states, actions, rewards, next_states, dones = zip(*exp_replay.sample(batch_size))
	states = torch.tensor(states,dtype=torch.float32).to(device)
	actions = torch.tensor(actions,dtype=torch.long).to(device)
	rewards = torch.tensor(rewards,dtype=torch.float32).to(device)
	next_states = torch.tensor(next_states,dtype=torch.float32).to(device)
	dones = torch.tensor(dones,dtype=torch.float32).to(device)

    # Compute the predicted Q-values of the actions using the agent
	q_values = agent(states)
	predicted_qvalues = q_values.gather(1, actions.unsqueeze(1)).squeeze(1)
	gamma=torch.tensor(gamma,dtype=torch.float32)

    # Compute the target Q-values of the actions using the target network
	with torch.no_grad():
		target_q_values = target_network(next_states)
		target_qvalues_of_actions = rewards + torch.mul(gamma,target_q_values.max(dim=1)[0]) * torch.logical_not(dones)

    # Compute the TD loss (Mean Squared Error)
	loss = torch.nn.MSELoss()(predicted_qvalues, target_qvalues_of_actions)
	return loss

	# Here agent is the one playing on the game and target_network is updates using agent after some fixed steps as is done in Deep Q Learning
	# sample 'batch_size' units of info stored in the exp_replay
	# Find the predicted_qvalues_of_actions using agent and target_qvalues_of_actions using target_network, find the loss based on these Mean Squared Error of these two
	# IMPORTANT NOTE : check the type of objects, U need to convert the actions, rewards, etc, to toch.tensors for backward propogation using pytorch
	#TODO
	# pass


In [None]:
############# MAIN LOOP ###############
from tqdm import trange
from IPython.display import clear_output
import matplotlib.pyplot as plt
import torch.optim as optim

seed = 108
random.seed(108)
np.random.seed(108)
torch.manual_seed(108)


<torch._C.Generator at 0x78d772380f30>

In [None]:

##  setup environment using make_env function defined above
# find action_space and observation_space of the atari
# Use env_name = "BreakoutNoFrameskip-v4"
# Reset the environment before starting to train the agent and everytime the game ends (U will get a done flag which is a boolean representing whether the game has ended or not)
# TODO
env_name = "BreakoutNoFrameskip-v4"
env = make_env(env_name)
action_space = env.action_space
observation_space = env.observation_space
state_shape = observation_space.shape
n_actions = action_space.n
state = env.reset()
done = False

  deprecation(
  deprecation(
  logger.deprecation(


In [None]:
# create agent from DQNAgent class the online network
# create target_network from DQNAgent class is updated after some fixed steps from agent
# Note initialise target network values from agent
# Create the online network (agent) and target network objects
agent = DQNAgent(observation_space.shape, action_space.n, epsilon=0.1)
target_network = DQNAgent(observation_space.shape, action_space.n, epsilon=0.1)

# Initialize the target network with the agent's values
target_network.load_state_dict(agent.state_dict())

# TODO


<All keys matched successfully>

In [None]:
# created a ReplayBuffer object and saved some information in the object by playing the agent. It is better to populate some information in the Buffer, hence this step
#filling experience replay with some samples using full random policy
exp_replay = ReplayBuffer(10**6)
for i in range(4000):
    play_and_record(state, agent, env, exp_replay, n_steps=10**2)
    print( "Replay Buffer : i : ", i)
    if len(exp_replay) == 10**6:
        break
print(len(exp_replay))



In [None]:
#setup some parameters for training
timesteps_per_epoch = 2
batch_size = 32

total_steps = 2 * 10**6

#Optimizer
optimizer = torch.optim.Adam(agent.parameters(), lr=2e-5)
# TODO - use Adam optimiser from torch with learning rate (lr) = 2*1e-5


In [None]:
#setting exploration epsilon
start_epsilon = 0.1
end_epsilon = 0.05
eps_decay_final_step = 1 * 10**5

# setup spme frequency for logginf and updating target network
loss_freq = 20
refresh_target_network_freq = 100
eval_freq = 10000

# to clip the gradients
max_grad_norm = 5000

mean_rw_history = []
td_loss_history = []

SAVE_INTERVAL = 50000

from numpy import asarray
from numpy import savetxt


def epsilon_schedule(start_eps, end_eps, step, final_step):
    return start_eps + (end_eps-start_eps)*min(step, final_step)/final_step




In [None]:
# TODO - reset the state of the environment before starting
state=env.reset()
## MAIN LOOP STARTING
for step in trange(total_steps + 1):

	#TODO update the exploration probability (epsilon) as time passes
		epsilon = epsilon_schedule(start_epsilon, end_epsilon, step, eps_decay_final_step)
		agent.epsilon = epsilon
	#TODO taking timesteps_per_epoch and update experience replay buffer, (use play_and_record)
		play_and_record(state, agent, env, exp_replay, n_steps=timesteps_per_epoch)
	#TODO compute loss
		loss = compute_td_loss(agent, target_network, device=device, batch_size=batch_size, exp_replay=exp_replay,gamma=0.99)
	#TODO Backward propogation and updating the network parameters
		optimizer.zero_grad()
		loss.backward()
		torch.nn.utils.clip_grad_norm_(agent.parameters(), max_grad_norm)  # Clip gradients to avoid exploding gradients
		optimizer.step()
	# IMPORTANT NOTE : You only need to update the parameters of agent and not of target_network, that will be done according to refresh_target_network_freq. But Backward Propogation will take into account the target_network parameters as well. So use detach() method on target_network while calculating the loss. Google what it does and how to use !!


		if step % loss_freq == 0:
			td_loss_history.append(loss.data.cpu().item())


		if step % refresh_target_network_freq == 0:
        #TODO Load agent weights into target_network
			target_network.load_state_dict(agent.state_dict())

		if step % eval_freq == 0:
			mean_reward = evaluate(make_env(env_name, seed=step), agent, n_games=3, greedy=True, t_max=6000)
			mean_rw_history.append(mean_reward)

		print("mean_reward : ", mean_reward)

		clear_output(True)
		print("buffer size = %i, epsilon = %.5f" %
				(len(exp_replay), agent.epsilon))


		if step % SAVE_INTERVAL == 0 and step!= 0:
			print('Saving...')
			device = torch.device('cpu')
			torch.save(agent.state_dict(), f'model_{step}.pth')
			device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
		savetxt(f'reward_{step}.csv', np.array(mean_rw_history))



# savetxt('reward_final.csv', np.array(mean_rw_history))

final_score = evaluate(
  make_env(env_name),
  agent, n_games=1, greedy=True, t_max=10000
)
print('final score:', final_score)




  states = torch.tensor(states, device=device, dtype=torch.float32)


mean_reward :  0.0


  0%|          | 1/2000001 [02:58<99087:46:44, 178.36s/it]

buffer size = 402, epsilon = 0.10000
mean_reward :  0.0


  0%|          | 2/2000001 [02:58<40951:26:43, 73.71s/it] 

buffer size = 404, epsilon = 0.10000
mean_reward :  0.0


  0%|          | 3/2000001 [02:59<22354:58:29, 40.24s/it]

buffer size = 406, epsilon = 0.10000
mean_reward :  0.0


  0%|          | 4/2000001 [02:59<13624:38:22, 24.52s/it]

buffer size = 408, epsilon = 0.10000
mean_reward :  0.0


  0%|          | 5/2000001 [03:00<8792:56:48, 15.83s/it] 

buffer size = 410, epsilon = 0.10000
mean_reward :  0.0


  0%|          | 6/2000001 [03:00<5881:17:46, 10.59s/it]

buffer size = 412, epsilon = 0.10000
mean_reward :  0.0


  0%|          | 7/2000001 [03:00<4034:53:07,  7.26s/it]

buffer size = 414, epsilon = 0.10000
mean_reward :  0.0


  0%|          | 8/2000001 [03:01<2827:11:04,  5.09s/it]

buffer size = 416, epsilon = 0.10000
mean_reward :  0.0


  0%|          | 9/2000001 [03:01<2017:30:15,  3.63s/it]

buffer size = 418, epsilon = 0.10000
mean_reward :  0.0


  0%|          | 10/2000001 [03:02<1463:38:15,  2.63s/it]

buffer size = 420, epsilon = 0.10000
mean_reward :  0.0


  0%|          | 11/2000001 [03:02<1085:06:45,  1.95s/it]

buffer size = 422, epsilon = 0.10000
mean_reward :  0.0


  0%|          | 12/2000001 [03:02<827:02:37,  1.49s/it] 

buffer size = 424, epsilon = 0.09999
mean_reward :  0.0


  0%|          | 13/2000001 [03:03<646:04:13,  1.16s/it]

buffer size = 426, epsilon = 0.09999
mean_reward :  0.0


  0%|          | 14/2000001 [03:03<523:50:01,  1.06it/s]

buffer size = 428, epsilon = 0.09999
mean_reward :  0.0


  0%|          | 15/2000001 [03:04<436:59:05,  1.27it/s]

buffer size = 430, epsilon = 0.09999
mean_reward :  0.0


  0%|          | 16/2000001 [03:04<375:42:44,  1.48it/s]

buffer size = 432, epsilon = 0.09999
mean_reward :  0.0


  0%|          | 17/2000001 [03:05<333:40:34,  1.66it/s]

buffer size = 434, epsilon = 0.09999
mean_reward :  0.0


  0%|          | 18/2000001 [03:05<304:29:18,  1.82it/s]

buffer size = 436, epsilon = 0.09999
mean_reward :  0.0


  0%|          | 19/2000001 [03:05<284:06:05,  1.96it/s]

buffer size = 438, epsilon = 0.09999
mean_reward :  0.0


  0%|          | 20/2000001 [03:06<271:24:09,  2.05it/s]

buffer size = 440, epsilon = 0.09999
mean_reward :  0.0


  0%|          | 21/2000001 [03:06<262:51:02,  2.11it/s]

buffer size = 442, epsilon = 0.09999
mean_reward :  0.0


  0%|          | 22/2000001 [03:07<272:59:04,  2.04it/s]

buffer size = 444, epsilon = 0.09999
mean_reward :  0.0


  0%|          | 23/2000001 [03:08<326:12:57,  1.70it/s]

buffer size = 446, epsilon = 0.09999
mean_reward :  0.0


  0%|          | 24/2000001 [03:08<364:29:18,  1.52it/s]

buffer size = 448, epsilon = 0.09999
mean_reward :  0.0


  0%|          | 25/2000001 [03:09<385:02:27,  1.44it/s]

buffer size = 450, epsilon = 0.09999
mean_reward :  0.0


  0%|          | 26/2000001 [03:10<369:42:22,  1.50it/s]

buffer size = 452, epsilon = 0.09999
mean_reward :  0.0


  0%|          | 27/2000001 [03:10<328:38:10,  1.69it/s]

buffer size = 454, epsilon = 0.09999
mean_reward :  0.0


  0%|          | 28/2000001 [03:11<299:19:21,  1.86it/s]

buffer size = 456, epsilon = 0.09999
mean_reward :  0.0


  0%|          | 29/2000001 [03:11<277:50:57,  2.00it/s]

buffer size = 458, epsilon = 0.09999
mean_reward :  0.0


  0%|          | 30/2000001 [03:12<262:36:30,  2.12it/s]

buffer size = 460, epsilon = 0.09999
mean_reward :  0.0


  0%|          | 31/2000001 [03:12<251:38:28,  2.21it/s]

buffer size = 462, epsilon = 0.09999
mean_reward :  0.0


  0%|          | 32/2000001 [03:12<243:50:14,  2.28it/s]

buffer size = 464, epsilon = 0.09998
mean_reward :  0.0


  0%|          | 33/2000001 [03:13<242:38:31,  2.29it/s]

buffer size = 466, epsilon = 0.09998
mean_reward :  0.0


  0%|          | 34/2000001 [03:13<241:51:59,  2.30it/s]

buffer size = 468, epsilon = 0.09998
mean_reward :  0.0


  0%|          | 35/2000001 [03:14<239:31:59,  2.32it/s]

buffer size = 470, epsilon = 0.09998
mean_reward :  0.0


  0%|          | 36/2000001 [03:14<234:59:08,  2.36it/s]

buffer size = 472, epsilon = 0.09998
mean_reward :  0.0


  0%|          | 37/2000001 [03:14<233:48:35,  2.38it/s]

buffer size = 474, epsilon = 0.09998
mean_reward :  0.0


  0%|          | 38/2000001 [03:15<234:23:16,  2.37it/s]

buffer size = 476, epsilon = 0.09998
mean_reward :  0.0


  0%|          | 39/2000001 [03:15<234:10:01,  2.37it/s]

buffer size = 478, epsilon = 0.09998
mean_reward :  0.0


  0%|          | 40/2000001 [03:16<233:23:44,  2.38it/s]

buffer size = 480, epsilon = 0.09998
mean_reward :  0.0


  0%|          | 41/2000001 [03:16<230:44:45,  2.41it/s]

buffer size = 482, epsilon = 0.09998
mean_reward :  0.0


  0%|          | 42/2000001 [03:17<228:51:03,  2.43it/s]

buffer size = 484, epsilon = 0.09998
mean_reward :  0.0


  0%|          | 43/2000001 [03:17<229:54:13,  2.42it/s]

buffer size = 486, epsilon = 0.09998
mean_reward :  0.0


  0%|          | 44/2000001 [03:17<230:20:23,  2.41it/s]

buffer size = 488, epsilon = 0.09998
mean_reward :  0.0


  0%|          | 45/2000001 [03:18<229:20:46,  2.42it/s]

buffer size = 490, epsilon = 0.09998
mean_reward :  0.0


  0%|          | 46/2000001 [03:18<228:03:56,  2.44it/s]

buffer size = 492, epsilon = 0.09998
mean_reward :  0.0


  0%|          | 47/2000001 [03:19<226:55:52,  2.45it/s]

buffer size = 494, epsilon = 0.09998
mean_reward :  0.0


  0%|          | 48/2000001 [03:19<229:07:08,  2.42it/s]

buffer size = 496, epsilon = 0.09998
mean_reward :  0.0


  0%|          | 49/2000001 [03:19<230:11:20,  2.41it/s]

buffer size = 498, epsilon = 0.09998
mean_reward :  0.0


  0%|          | 50/2000001 [03:20<251:25:39,  2.21it/s]

buffer size = 500, epsilon = 0.09998
mean_reward :  0.0


  0%|          | 51/2000001 [03:21<305:53:51,  1.82it/s]

buffer size = 502, epsilon = 0.09998
mean_reward :  0.0


  0%|          | 52/2000001 [03:22<350:44:11,  1.58it/s]

buffer size = 504, epsilon = 0.09997
mean_reward :  0.0


  0%|          | 53/2000001 [03:22<375:56:56,  1.48it/s]

buffer size = 506, epsilon = 0.09997
mean_reward :  0.0


  0%|          | 54/2000001 [03:23<363:47:00,  1.53it/s]

buffer size = 508, epsilon = 0.09997
mean_reward :  0.0


  0%|          | 55/2000001 [03:23<324:30:30,  1.71it/s]

buffer size = 510, epsilon = 0.09997
mean_reward :  0.0


  0%|          | 56/2000001 [03:24<295:37:29,  1.88it/s]

buffer size = 512, epsilon = 0.09997
mean_reward :  0.0


  0%|          | 57/2000001 [03:24<278:58:58,  1.99it/s]

buffer size = 514, epsilon = 0.09997
mean_reward :  0.0


  0%|          | 58/2000001 [03:25<264:55:33,  2.10it/s]

buffer size = 516, epsilon = 0.09997
mean_reward :  0.0


  0%|          | 59/2000001 [03:25<256:10:08,  2.17it/s]

buffer size = 518, epsilon = 0.09997
mean_reward :  0.0


  0%|          | 60/2000001 [03:25<248:57:14,  2.23it/s]

buffer size = 520, epsilon = 0.09997
mean_reward :  0.0


  0%|          | 61/2000001 [03:26<242:57:54,  2.29it/s]

buffer size = 522, epsilon = 0.09997
mean_reward :  0.0


  0%|          | 62/2000001 [03:26<240:35:11,  2.31it/s]

buffer size = 524, epsilon = 0.09997
mean_reward :  0.0


  0%|          | 63/2000001 [03:27<239:10:06,  2.32it/s]

buffer size = 526, epsilon = 0.09997
mean_reward :  0.0


  0%|          | 64/2000001 [03:27<239:23:42,  2.32it/s]

buffer size = 528, epsilon = 0.09997
mean_reward :  0.0


  0%|          | 65/2000001 [03:28<236:07:27,  2.35it/s]

buffer size = 530, epsilon = 0.09997
mean_reward :  0.0


  0%|          | 66/2000001 [03:28<233:58:25,  2.37it/s]

buffer size = 532, epsilon = 0.09997
mean_reward :  0.0


  0%|          | 67/2000001 [03:28<233:58:24,  2.37it/s]

buffer size = 534, epsilon = 0.09997
mean_reward :  0.0


  0%|          | 68/2000001 [03:29<232:04:59,  2.39it/s]

buffer size = 536, epsilon = 0.09997
mean_reward :  0.0


  0%|          | 69/2000001 [03:29<230:40:07,  2.41it/s]

buffer size = 538, epsilon = 0.09997
mean_reward :  0.0


  0%|          | 70/2000001 [03:30<227:41:57,  2.44it/s]

buffer size = 540, epsilon = 0.09997
mean_reward :  0.0


  0%|          | 71/2000001 [03:30<229:25:25,  2.42it/s]

buffer size = 542, epsilon = 0.09997
mean_reward :  0.0


  0%|          | 72/2000001 [03:30<231:46:38,  2.40it/s]

buffer size = 544, epsilon = 0.09996
mean_reward :  0.0


  0%|          | 73/2000001 [03:31<231:48:21,  2.40it/s]

buffer size = 546, epsilon = 0.09996
mean_reward :  0.0


  0%|          | 74/2000001 [03:31<230:56:30,  2.41it/s]

buffer size = 548, epsilon = 0.09996
mean_reward :  0.0


  0%|          | 75/2000001 [03:32<228:38:51,  2.43it/s]

buffer size = 550, epsilon = 0.09996
mean_reward :  0.0


  0%|          | 76/2000001 [03:32<230:39:25,  2.41it/s]

buffer size = 552, epsilon = 0.09996
mean_reward :  0.0


  0%|          | 77/2000001 [03:33<230:28:24,  2.41it/s]

buffer size = 554, epsilon = 0.09996
mean_reward :  0.0


  0%|          | 78/2000001 [03:33<259:08:38,  2.14it/s]

buffer size = 556, epsilon = 0.09996
mean_reward :  0.0


  0%|          | 79/2000001 [03:34<317:54:21,  1.75it/s]

buffer size = 558, epsilon = 0.09996
mean_reward :  0.0


  0%|          | 80/2000001 [03:35<356:52:33,  1.56it/s]

buffer size = 560, epsilon = 0.09996
mean_reward :  0.0


  0%|          | 81/2000001 [03:36<377:25:02,  1.47it/s]

buffer size = 562, epsilon = 0.09996
mean_reward :  0.0


  0%|          | 82/2000001 [03:36<362:02:34,  1.53it/s]

buffer size = 564, epsilon = 0.09996
mean_reward :  0.0


  0%|          | 83/2000001 [03:37<323:54:50,  1.72it/s]

buffer size = 566, epsilon = 0.09996
mean_reward :  0.0


  0%|          | 84/2000001 [03:37<296:24:56,  1.87it/s]

buffer size = 568, epsilon = 0.09996
mean_reward :  0.0


  0%|          | 85/2000001 [03:37<275:31:19,  2.02it/s]

buffer size = 570, epsilon = 0.09996
mean_reward :  0.0


  0%|          | 86/2000001 [03:38<262:00:24,  2.12it/s]

buffer size = 572, epsilon = 0.09996
mean_reward :  0.0


  0%|          | 87/2000001 [03:38<252:46:59,  2.20it/s]

buffer size = 574, epsilon = 0.09996
mean_reward :  0.0


  0%|          | 88/2000001 [03:39<247:55:14,  2.24it/s]

buffer size = 576, epsilon = 0.09996
mean_reward :  0.0


  0%|          | 89/2000001 [03:39<243:37:10,  2.28it/s]

buffer size = 578, epsilon = 0.09996
mean_reward :  0.0


  0%|          | 90/2000001 [03:39<240:44:57,  2.31it/s]

buffer size = 580, epsilon = 0.09996
mean_reward :  0.0


  0%|          | 91/2000001 [03:40<237:52:16,  2.34it/s]

buffer size = 582, epsilon = 0.09996
mean_reward :  0.0


  0%|          | 92/2000001 [03:40<233:45:47,  2.38it/s]

buffer size = 584, epsilon = 0.09995
mean_reward :  0.0


  0%|          | 93/2000001 [03:41<234:39:42,  2.37it/s]

buffer size = 586, epsilon = 0.09995
mean_reward :  0.0


  0%|          | 94/2000001 [03:41<233:03:18,  2.38it/s]

buffer size = 588, epsilon = 0.09995
mean_reward :  0.0


  0%|          | 95/2000001 [03:42<230:44:39,  2.41it/s]

buffer size = 590, epsilon = 0.09995
mean_reward :  0.0


  0%|          | 96/2000001 [03:42<230:10:13,  2.41it/s]

buffer size = 592, epsilon = 0.09995
mean_reward :  0.0


  0%|          | 97/2000001 [03:42<229:42:11,  2.42it/s]

buffer size = 594, epsilon = 0.09995
mean_reward :  0.0


  0%|          | 98/2000001 [03:43<232:17:26,  2.39it/s]

buffer size = 596, epsilon = 0.09995
mean_reward :  0.0


  0%|          | 99/2000001 [03:43<238:17:38,  2.33it/s]

buffer size = 598, epsilon = 0.09995
mean_reward :  0.0


  0%|          | 100/2000001 [03:44<235:23:03,  2.36it/s]

buffer size = 600, epsilon = 0.09995
mean_reward :  0.0


  0%|          | 101/2000001 [03:44<235:29:09,  2.36it/s]

buffer size = 602, epsilon = 0.09995
mean_reward :  0.0


  0%|          | 102/2000001 [03:44<233:35:18,  2.38it/s]

buffer size = 604, epsilon = 0.09995
mean_reward :  0.0


  0%|          | 103/2000001 [03:45<233:41:51,  2.38it/s]

buffer size = 606, epsilon = 0.09995
mean_reward :  0.0


  0%|          | 104/2000001 [03:45<233:50:05,  2.38it/s]

buffer size = 608, epsilon = 0.09995
mean_reward :  0.0


  0%|          | 105/2000001 [03:46<234:12:16,  2.37it/s]

buffer size = 610, epsilon = 0.09995
mean_reward :  0.0


  0%|          | 106/2000001 [03:46<275:13:24,  2.02it/s]

buffer size = 612, epsilon = 0.09995
mean_reward :  0.0


  0%|          | 107/2000001 [03:47<327:31:44,  1.70it/s]

buffer size = 614, epsilon = 0.09995
mean_reward :  0.0


  0%|          | 108/2000001 [03:48<367:34:08,  1.51it/s]

buffer size = 616, epsilon = 0.09995
mean_reward :  0.0


  0%|          | 109/2000001 [03:49<393:04:01,  1.41it/s]

buffer size = 618, epsilon = 0.09995
mean_reward :  0.0


  0%|          | 110/2000001 [03:49<342:32:42,  1.62it/s]

buffer size = 620, epsilon = 0.09995
mean_reward :  0.0


  0%|          | 111/2000001 [03:50<308:47:15,  1.80it/s]

buffer size = 622, epsilon = 0.09995
mean_reward :  0.0


  0%|          | 112/2000001 [03:50<286:02:51,  1.94it/s]

buffer size = 624, epsilon = 0.09994
mean_reward :  0.0


  0%|          | 113/2000001 [03:50<268:38:09,  2.07it/s]

buffer size = 626, epsilon = 0.09994
mean_reward :  0.0


  0%|          | 114/2000001 [03:51<260:39:05,  2.13it/s]

buffer size = 628, epsilon = 0.09994
mean_reward :  0.0


  0%|          | 115/2000001 [03:51<251:14:45,  2.21it/s]

buffer size = 630, epsilon = 0.09994
mean_reward :  0.0


  0%|          | 116/2000001 [03:52<245:44:50,  2.26it/s]

buffer size = 632, epsilon = 0.09994
mean_reward :  0.0


  0%|          | 117/2000001 [03:52<244:07:10,  2.28it/s]

buffer size = 634, epsilon = 0.09994
mean_reward :  0.0


  0%|          | 118/2000001 [03:53<239:10:10,  2.32it/s]

buffer size = 636, epsilon = 0.09994
mean_reward :  0.0


  0%|          | 119/2000001 [03:53<238:49:34,  2.33it/s]

buffer size = 638, epsilon = 0.09994
mean_reward :  0.0


  0%|          | 120/2000001 [03:53<235:22:51,  2.36it/s]

buffer size = 640, epsilon = 0.09994
mean_reward :  0.0


  0%|          | 121/2000001 [03:54<235:29:24,  2.36it/s]

buffer size = 642, epsilon = 0.09994
mean_reward :  0.0


  0%|          | 122/2000001 [03:54<237:20:25,  2.34it/s]

buffer size = 644, epsilon = 0.09994
mean_reward :  0.0


  0%|          | 123/2000001 [03:55<235:16:43,  2.36it/s]

buffer size = 646, epsilon = 0.09994
mean_reward :  0.0


  0%|          | 124/2000001 [03:55<235:01:12,  2.36it/s]

buffer size = 648, epsilon = 0.09994
mean_reward :  0.0


  0%|          | 125/2000001 [03:56<231:47:49,  2.40it/s]

buffer size = 650, epsilon = 0.09994
mean_reward :  0.0


  0%|          | 126/2000001 [03:56<231:10:37,  2.40it/s]

buffer size = 652, epsilon = 0.09994
mean_reward :  0.0


  0%|          | 127/2000001 [03:56<231:46:23,  2.40it/s]

buffer size = 654, epsilon = 0.09994
mean_reward :  0.0


  0%|          | 128/2000001 [03:57<232:05:20,  2.39it/s]

buffer size = 656, epsilon = 0.09994
mean_reward :  0.0


  0%|          | 129/2000001 [03:57<235:18:59,  2.36it/s]

buffer size = 658, epsilon = 0.09994
mean_reward :  0.0


  0%|          | 130/2000001 [03:58<232:21:20,  2.39it/s]

buffer size = 660, epsilon = 0.09994
mean_reward :  0.0


  0%|          | 131/2000001 [03:58<232:18:49,  2.39it/s]

buffer size = 662, epsilon = 0.09994
mean_reward :  0.0


  0%|          | 132/2000001 [03:58<232:02:48,  2.39it/s]

buffer size = 664, epsilon = 0.09993
mean_reward :  0.0


  0%|          | 133/2000001 [03:59<236:26:29,  2.35it/s]

buffer size = 666, epsilon = 0.09993
mean_reward :  0.0


  0%|          | 134/2000001 [04:00<295:18:17,  1.88it/s]

buffer size = 668, epsilon = 0.09993
mean_reward :  0.0


  0%|          | 135/2000001 [04:00<339:44:06,  1.64it/s]

buffer size = 670, epsilon = 0.09993
mean_reward :  0.0


  0%|          | 136/2000001 [04:01<370:29:43,  1.50it/s]

buffer size = 672, epsilon = 0.09993
mean_reward :  0.0


  0%|          | 137/2000001 [04:02<387:29:35,  1.43it/s]

buffer size = 674, epsilon = 0.09993
mean_reward :  0.0


  0%|          | 138/2000001 [04:02<342:51:44,  1.62it/s]

buffer size = 676, epsilon = 0.09993
mean_reward :  0.0


  0%|          | 139/2000001 [04:03<311:31:19,  1.78it/s]

buffer size = 678, epsilon = 0.09993
mean_reward :  0.0


  0%|          | 140/2000001 [04:03<289:27:48,  1.92it/s]

buffer size = 680, epsilon = 0.09993
mean_reward :  0.0


  0%|          | 141/2000001 [04:04<274:39:43,  2.02it/s]

buffer size = 682, epsilon = 0.09993
mean_reward :  0.0


  0%|          | 142/2000001 [04:04<263:50:43,  2.11it/s]

buffer size = 684, epsilon = 0.09993
mean_reward :  0.0


  0%|          | 143/2000001 [04:05<260:29:14,  2.13it/s]

buffer size = 686, epsilon = 0.09993
mean_reward :  0.0


  0%|          | 144/2000001 [04:05<251:00:01,  2.21it/s]

buffer size = 688, epsilon = 0.09993
mean_reward :  0.0


  0%|          | 145/2000001 [04:06<246:08:04,  2.26it/s]

buffer size = 690, epsilon = 0.09993
mean_reward :  0.0


  0%|          | 146/2000001 [04:06<241:36:04,  2.30it/s]

buffer size = 692, epsilon = 0.09993
mean_reward :  0.0


  0%|          | 147/2000001 [04:06<238:31:21,  2.33it/s]

buffer size = 694, epsilon = 0.09993
mean_reward :  0.0


  0%|          | 148/2000001 [04:07<239:24:53,  2.32it/s]

buffer size = 696, epsilon = 0.09993
mean_reward :  0.0


  0%|          | 149/2000001 [04:07<235:16:53,  2.36it/s]

buffer size = 698, epsilon = 0.09993
mean_reward :  0.0


  0%|          | 150/2000001 [04:08<234:05:07,  2.37it/s]

buffer size = 700, epsilon = 0.09993
mean_reward :  0.0


  0%|          | 151/2000001 [04:08<234:50:15,  2.37it/s]

buffer size = 702, epsilon = 0.09992
mean_reward :  0.0


  0%|          | 152/2000001 [04:08<232:44:20,  2.39it/s]

buffer size = 704, epsilon = 0.09992
mean_reward :  0.0


  0%|          | 153/2000001 [04:09<235:49:11,  2.36it/s]

buffer size = 706, epsilon = 0.09992
mean_reward :  0.0


  0%|          | 154/2000001 [04:09<240:25:07,  2.31it/s]

buffer size = 708, epsilon = 0.09992
mean_reward :  0.0


  0%|          | 155/2000001 [04:10<240:41:41,  2.31it/s]

buffer size = 710, epsilon = 0.09992
mean_reward :  0.0


  0%|          | 156/2000001 [04:10<238:22:19,  2.33it/s]

buffer size = 712, epsilon = 0.09992
mean_reward :  0.0


  0%|          | 157/2000001 [04:11<236:39:09,  2.35it/s]

buffer size = 714, epsilon = 0.09992
mean_reward :  0.0


  0%|          | 158/2000001 [04:11<236:25:11,  2.35it/s]

buffer size = 716, epsilon = 0.09992
mean_reward :  0.0


  0%|          | 159/2000001 [04:11<234:48:31,  2.37it/s]

buffer size = 718, epsilon = 0.09992
mean_reward :  0.0


  0%|          | 160/2000001 [04:12<237:39:22,  2.34it/s]

buffer size = 720, epsilon = 0.09992
mean_reward :  0.0


  0%|          | 161/2000001 [04:13<274:54:33,  2.02it/s]

buffer size = 722, epsilon = 0.09992
mean_reward :  0.0


  0%|          | 162/2000001 [04:13<327:12:43,  1.70it/s]

buffer size = 724, epsilon = 0.09992
mean_reward :  0.0


  0%|          | 163/2000001 [04:14<361:36:10,  1.54it/s]

buffer size = 726, epsilon = 0.09992
mean_reward :  0.0


  0%|          | 164/2000001 [04:15<385:21:30,  1.44it/s]

buffer size = 728, epsilon = 0.09992
mean_reward :  0.0


  0%|          | 165/2000001 [04:15<360:49:57,  1.54it/s]

buffer size = 730, epsilon = 0.09992
mean_reward :  0.0


  0%|          | 166/2000001 [04:16<322:39:24,  1.72it/s]

buffer size = 732, epsilon = 0.09992
mean_reward :  0.0


  0%|          | 167/2000001 [04:16<293:50:07,  1.89it/s]

buffer size = 734, epsilon = 0.09992
mean_reward :  0.0


  0%|          | 168/2000001 [04:17<272:47:57,  2.04it/s]

buffer size = 736, epsilon = 0.09992
mean_reward :  0.0


  0%|          | 169/2000001 [04:17<263:59:35,  2.10it/s]

buffer size = 738, epsilon = 0.09992
mean_reward :  0.0


  0%|          | 170/2000001 [04:18<252:00:57,  2.20it/s]

buffer size = 740, epsilon = 0.09992
mean_reward :  0.0


  0%|          | 171/2000001 [04:18<247:29:50,  2.24it/s]

buffer size = 742, epsilon = 0.09992
mean_reward :  0.0


  0%|          | 172/2000001 [04:18<242:02:32,  2.30it/s]

buffer size = 744, epsilon = 0.09991
mean_reward :  0.0


  0%|          | 173/2000001 [04:19<239:22:26,  2.32it/s]

buffer size = 746, epsilon = 0.09991
mean_reward :  0.0


  0%|          | 174/2000001 [04:19<240:09:50,  2.31it/s]

buffer size = 748, epsilon = 0.09991
mean_reward :  0.0


  0%|          | 175/2000001 [04:20<238:27:39,  2.33it/s]

buffer size = 750, epsilon = 0.09991
mean_reward :  0.0


  0%|          | 176/2000001 [04:20<240:29:42,  2.31it/s]

buffer size = 752, epsilon = 0.09991
mean_reward :  0.0


  0%|          | 177/2000001 [04:20<235:22:27,  2.36it/s]

buffer size = 754, epsilon = 0.09991
mean_reward :  0.0


  0%|          | 178/2000001 [04:21<237:29:51,  2.34it/s]

buffer size = 756, epsilon = 0.09991
mean_reward :  0.0


  0%|          | 179/2000001 [04:21<238:16:28,  2.33it/s]

buffer size = 758, epsilon = 0.09991
mean_reward :  0.0


  0%|          | 180/2000001 [04:22<236:31:06,  2.35it/s]

buffer size = 760, epsilon = 0.09991
mean_reward :  0.0


  0%|          | 181/2000001 [04:22<234:34:33,  2.37it/s]

buffer size = 762, epsilon = 0.09991
mean_reward :  0.0


  0%|          | 182/2000001 [04:23<232:38:22,  2.39it/s]

buffer size = 764, epsilon = 0.09991
mean_reward :  0.0


  0%|          | 183/2000001 [04:23<235:11:31,  2.36it/s]

buffer size = 766, epsilon = 0.09991
mean_reward :  0.0


  0%|          | 184/2000001 [04:23<233:31:55,  2.38it/s]

buffer size = 768, epsilon = 0.09991
mean_reward :  0.0


  0%|          | 185/2000001 [04:24<234:05:09,  2.37it/s]

buffer size = 770, epsilon = 0.09991
mean_reward :  0.0


  0%|          | 186/2000001 [04:24<236:02:25,  2.35it/s]

buffer size = 772, epsilon = 0.09991
mean_reward :  0.0


  0%|          | 187/2000001 [04:25<232:29:55,  2.39it/s]

buffer size = 774, epsilon = 0.09991
mean_reward :  0.0


  0%|          | 188/2000001 [04:25<235:30:18,  2.36it/s]

buffer size = 776, epsilon = 0.09991
mean_reward :  0.0


  0%|          | 189/2000001 [04:26<285:53:08,  1.94it/s]

buffer size = 778, epsilon = 0.09991
mean_reward :  0.0


  0%|          | 190/2000001 [04:27<333:24:22,  1.67it/s]

buffer size = 780, epsilon = 0.09991
mean_reward :  0.0


  0%|          | 191/2000001 [04:28<369:57:16,  1.50it/s]

buffer size = 782, epsilon = 0.09991
mean_reward :  0.0


  0%|          | 192/2000001 [04:28<389:18:21,  1.43it/s]

buffer size = 784, epsilon = 0.09990
mean_reward :  0.0


  0%|          | 193/2000001 [04:29<341:37:27,  1.63it/s]

buffer size = 786, epsilon = 0.09990
mean_reward :  0.0


  0%|          | 194/2000001 [04:29<308:13:30,  1.80it/s]

buffer size = 788, epsilon = 0.09990
mean_reward :  0.0


  0%|          | 195/2000001 [04:30<286:16:41,  1.94it/s]

buffer size = 790, epsilon = 0.09990
mean_reward :  0.0


  0%|          | 196/2000001 [04:30<270:20:50,  2.05it/s]

buffer size = 792, epsilon = 0.09990
mean_reward :  0.0


  0%|          | 197/2000001 [04:30<260:01:16,  2.14it/s]

buffer size = 794, epsilon = 0.09990
mean_reward :  0.0


  0%|          | 198/2000001 [04:31<251:26:30,  2.21it/s]

buffer size = 796, epsilon = 0.09990
mean_reward :  0.0


  0%|          | 199/2000001 [04:31<243:43:45,  2.28it/s]

buffer size = 798, epsilon = 0.09990
mean_reward :  0.0


  0%|          | 200/2000001 [04:32<241:00:07,  2.30it/s]

buffer size = 800, epsilon = 0.09990
mean_reward :  0.0


  0%|          | 201/2000001 [04:32<238:58:34,  2.32it/s]

buffer size = 802, epsilon = 0.09990
mean_reward :  0.0


  0%|          | 202/2000001 [04:32<238:11:46,  2.33it/s]

buffer size = 804, epsilon = 0.09990
mean_reward :  0.0


  0%|          | 203/2000001 [04:33<236:25:15,  2.35it/s]

buffer size = 806, epsilon = 0.09990
mean_reward :  0.0


  0%|          | 204/2000001 [04:33<233:45:38,  2.38it/s]

buffer size = 808, epsilon = 0.09990
mean_reward :  0.0


  0%|          | 205/2000001 [04:34<234:17:17,  2.37it/s]

buffer size = 810, epsilon = 0.09990
mean_reward :  0.0


  0%|          | 206/2000001 [04:34<233:24:05,  2.38it/s]

buffer size = 812, epsilon = 0.09990
mean_reward :  0.0


  0%|          | 207/2000001 [04:35<236:38:31,  2.35it/s]

buffer size = 814, epsilon = 0.09990
mean_reward :  0.0


  0%|          | 208/2000001 [04:35<235:41:01,  2.36it/s]

buffer size = 816, epsilon = 0.09990
mean_reward :  0.0


  0%|          | 209/2000001 [04:35<233:31:21,  2.38it/s]

buffer size = 818, epsilon = 0.09990
mean_reward :  0.0


  0%|          | 210/2000001 [04:36<232:38:35,  2.39it/s]

buffer size = 820, epsilon = 0.09990
mean_reward :  0.0


  0%|          | 211/2000001 [04:36<233:39:08,  2.38it/s]

buffer size = 822, epsilon = 0.09990
mean_reward :  0.0


  0%|          | 212/2000001 [04:37<234:09:26,  2.37it/s]

buffer size = 824, epsilon = 0.09989
mean_reward :  0.0


  0%|          | 213/2000001 [04:37<236:27:29,  2.35it/s]

buffer size = 826, epsilon = 0.09989
mean_reward :  0.0


  0%|          | 214/2000001 [04:38<235:11:37,  2.36it/s]

buffer size = 828, epsilon = 0.09989
mean_reward :  0.0


  0%|          | 215/2000001 [04:38<237:00:15,  2.34it/s]

buffer size = 830, epsilon = 0.09989
mean_reward :  0.0


  0%|          | 216/2000001 [04:38<247:57:35,  2.24it/s]

buffer size = 832, epsilon = 0.09989
mean_reward :  0.0


  0%|          | 217/2000001 [04:39<305:11:45,  1.82it/s]

buffer size = 834, epsilon = 0.09989
mean_reward :  0.0


  0%|          | 218/2000001 [04:40<349:28:41,  1.59it/s]

buffer size = 836, epsilon = 0.09989
mean_reward :  0.0


  0%|          | 219/2000001 [04:41<385:46:57,  1.44it/s]

buffer size = 838, epsilon = 0.09989
mean_reward :  0.0


  0%|          | 220/2000001 [04:42<369:55:14,  1.50it/s]

buffer size = 840, epsilon = 0.09989
mean_reward :  0.0


  0%|          | 221/2000001 [04:42<331:54:58,  1.67it/s]

buffer size = 842, epsilon = 0.09989
mean_reward :  0.0


  0%|          | 222/2000001 [04:42<302:57:07,  1.83it/s]

buffer size = 844, epsilon = 0.09989
mean_reward :  0.0


  0%|          | 223/2000001 [04:43<286:04:57,  1.94it/s]

buffer size = 846, epsilon = 0.09989
mean_reward :  0.0


  0%|          | 224/2000001 [04:43<273:33:07,  2.03it/s]

buffer size = 848, epsilon = 0.09989
mean_reward :  0.0


  0%|          | 225/2000001 [04:44<260:21:49,  2.13it/s]

buffer size = 850, epsilon = 0.09989
mean_reward :  0.0


  0%|          | 226/2000001 [04:44<253:10:57,  2.19it/s]

buffer size = 852, epsilon = 0.09989
mean_reward :  0.0


  0%|          | 227/2000001 [04:45<246:15:32,  2.26it/s]

buffer size = 854, epsilon = 0.09989
mean_reward :  0.0


  0%|          | 228/2000001 [04:45<244:42:33,  2.27it/s]

buffer size = 856, epsilon = 0.09989
mean_reward :  0.0


  0%|          | 229/2000001 [04:45<241:23:57,  2.30it/s]

buffer size = 858, epsilon = 0.09989
mean_reward :  0.0


  0%|          | 230/2000001 [04:46<237:16:40,  2.34it/s]

buffer size = 860, epsilon = 0.09989
mean_reward :  0.0


  0%|          | 231/2000001 [04:46<237:56:54,  2.33it/s]

buffer size = 862, epsilon = 0.09989
mean_reward :  0.0


  0%|          | 232/2000001 [04:47<234:00:10,  2.37it/s]

buffer size = 864, epsilon = 0.09988
mean_reward :  0.0


  0%|          | 233/2000001 [04:47<236:02:07,  2.35it/s]

buffer size = 866, epsilon = 0.09988
mean_reward :  0.0


  0%|          | 234/2000001 [04:47<233:45:39,  2.38it/s]

buffer size = 868, epsilon = 0.09988
mean_reward :  0.0


  0%|          | 235/2000001 [04:48<235:21:13,  2.36it/s]

buffer size = 870, epsilon = 0.09988
mean_reward :  0.0


  0%|          | 236/2000001 [04:48<234:19:42,  2.37it/s]

buffer size = 872, epsilon = 0.09988
mean_reward :  0.0


  0%|          | 237/2000001 [04:49<233:09:24,  2.38it/s]

buffer size = 874, epsilon = 0.09988
mean_reward :  0.0


  0%|          | 238/2000001 [04:49<234:19:06,  2.37it/s]

buffer size = 876, epsilon = 0.09988
mean_reward :  0.0


  0%|          | 239/2000001 [04:50<235:52:08,  2.36it/s]

buffer size = 878, epsilon = 0.09988
mean_reward :  0.0


  0%|          | 240/2000001 [04:50<237:08:19,  2.34it/s]

buffer size = 880, epsilon = 0.09988
mean_reward :  0.0


  0%|          | 241/2000001 [04:50<235:01:16,  2.36it/s]

buffer size = 882, epsilon = 0.09988
mean_reward :  0.0


  0%|          | 242/2000001 [04:51<235:16:53,  2.36it/s]

buffer size = 884, epsilon = 0.09988
mean_reward :  0.0


  0%|          | 243/2000001 [04:51<237:03:18,  2.34it/s]

buffer size = 886, epsilon = 0.09988
mean_reward :  0.0


  0%|          | 244/2000001 [04:52<296:08:30,  1.88it/s]

buffer size = 888, epsilon = 0.09988
mean_reward :  0.0


  0%|          | 245/2000001 [04:53<338:40:37,  1.64it/s]

buffer size = 890, epsilon = 0.09988
mean_reward :  0.0


  0%|          | 246/2000001 [04:54<367:52:32,  1.51it/s]

buffer size = 892, epsilon = 0.09988
mean_reward :  0.0


  0%|          | 247/2000001 [04:54<392:53:35,  1.41it/s]

buffer size = 894, epsilon = 0.09988
mean_reward :  0.0


  0%|          | 248/2000001 [04:55<347:13:57,  1.60it/s]

buffer size = 896, epsilon = 0.09988
mean_reward :  0.0


  0%|          | 249/2000001 [04:55<314:55:58,  1.76it/s]

buffer size = 898, epsilon = 0.09988
mean_reward :  0.0


  0%|          | 250/2000001 [04:56<289:01:44,  1.92it/s]

buffer size = 900, epsilon = 0.09988
mean_reward :  0.0


  0%|          | 251/2000001 [04:56<273:01:05,  2.03it/s]

buffer size = 902, epsilon = 0.09988
mean_reward :  0.0


  0%|          | 252/2000001 [04:57<261:33:32,  2.12it/s]

buffer size = 904, epsilon = 0.09987
mean_reward :  0.0


  0%|          | 253/2000001 [04:57<254:04:01,  2.19it/s]

buffer size = 906, epsilon = 0.09987
mean_reward :  0.0


  0%|          | 254/2000001 [04:57<248:02:22,  2.24it/s]

buffer size = 908, epsilon = 0.09987
mean_reward :  0.0


  0%|          | 255/2000001 [04:58<243:35:07,  2.28it/s]

buffer size = 910, epsilon = 0.09987
mean_reward :  0.0


  0%|          | 256/2000001 [04:58<242:13:57,  2.29it/s]

buffer size = 912, epsilon = 0.09987
mean_reward :  0.0


  0%|          | 257/2000001 [04:59<239:55:38,  2.32it/s]

buffer size = 914, epsilon = 0.09987
mean_reward :  0.0


  0%|          | 258/2000001 [04:59<238:49:59,  2.33it/s]

buffer size = 916, epsilon = 0.09987
mean_reward :  0.0


  0%|          | 259/2000001 [05:00<237:18:15,  2.34it/s]

buffer size = 918, epsilon = 0.09987
mean_reward :  0.0


  0%|          | 260/2000001 [05:00<234:55:24,  2.36it/s]

buffer size = 920, epsilon = 0.09987
mean_reward :  0.0


  0%|          | 261/2000001 [05:00<235:38:49,  2.36it/s]

buffer size = 922, epsilon = 0.09987
mean_reward :  0.0


  0%|          | 262/2000001 [05:01<233:51:23,  2.38it/s]

buffer size = 924, epsilon = 0.09987
mean_reward :  0.0


  0%|          | 263/2000001 [05:01<233:19:32,  2.38it/s]

buffer size = 926, epsilon = 0.09987
mean_reward :  0.0


  0%|          | 264/2000001 [05:02<233:49:29,  2.38it/s]

buffer size = 928, epsilon = 0.09987
mean_reward :  0.0


  0%|          | 265/2000001 [05:02<234:54:06,  2.36it/s]

buffer size = 930, epsilon = 0.09987
mean_reward :  0.0


  0%|          | 266/2000001 [05:02<236:55:13,  2.34it/s]

buffer size = 932, epsilon = 0.09987
mean_reward :  0.0


  0%|          | 267/2000001 [05:03<235:26:05,  2.36it/s]

buffer size = 934, epsilon = 0.09987
mean_reward :  0.0


  0%|          | 268/2000001 [05:03<235:51:16,  2.36it/s]

buffer size = 936, epsilon = 0.09987
mean_reward :  0.0


  0%|          | 269/2000001 [05:04<236:04:38,  2.35it/s]

buffer size = 938, epsilon = 0.09987
mean_reward :  0.0


  0%|          | 270/2000001 [05:04<235:49:49,  2.36it/s]

buffer size = 940, epsilon = 0.09987
mean_reward :  0.0


  0%|          | 271/2000001 [05:05<252:11:53,  2.20it/s]

buffer size = 942, epsilon = 0.09987
mean_reward :  0.0


  0%|          | 272/2000001 [05:05<308:00:43,  1.80it/s]

buffer size = 944, epsilon = 0.09986
mean_reward :  0.0


  0%|          | 273/2000001 [05:06<347:02:39,  1.60it/s]

buffer size = 946, epsilon = 0.09986
mean_reward :  0.0


  0%|          | 274/2000001 [05:07<376:04:24,  1.48it/s]

buffer size = 948, epsilon = 0.09986
mean_reward :  0.0


  0%|          | 275/2000001 [05:08<377:15:06,  1.47it/s]

buffer size = 950, epsilon = 0.09986
mean_reward :  0.0


  0%|          | 276/2000001 [05:08<334:19:58,  1.66it/s]

buffer size = 952, epsilon = 0.09986
mean_reward :  0.0


  0%|          | 277/2000001 [05:09<304:27:42,  1.82it/s]

buffer size = 954, epsilon = 0.09986
mean_reward :  0.0


  0%|          | 278/2000001 [05:09<284:01:11,  1.96it/s]

buffer size = 956, epsilon = 0.09986
mean_reward :  0.0


  0%|          | 279/2000001 [05:09<268:36:11,  2.07it/s]

buffer size = 958, epsilon = 0.09986
mean_reward :  0.0


  0%|          | 280/2000001 [05:10<263:26:41,  2.11it/s]

buffer size = 960, epsilon = 0.09986
mean_reward :  0.0


  0%|          | 281/2000001 [05:10<253:45:34,  2.19it/s]

buffer size = 962, epsilon = 0.09986
mean_reward :  0.0


  0%|          | 282/2000001 [05:11<248:03:51,  2.24it/s]

buffer size = 964, epsilon = 0.09986
mean_reward :  0.0


  0%|          | 283/2000001 [05:11<243:58:23,  2.28it/s]

buffer size = 966, epsilon = 0.09986
mean_reward :  0.0


  0%|          | 284/2000001 [05:12<237:51:02,  2.34it/s]

buffer size = 968, epsilon = 0.09986
mean_reward :  0.0


  0%|          | 285/2000001 [05:12<238:01:37,  2.33it/s]

buffer size = 970, epsilon = 0.09986
mean_reward :  0.0


  0%|          | 286/2000001 [05:12<237:34:53,  2.34it/s]

buffer size = 972, epsilon = 0.09986
mean_reward :  0.0


  0%|          | 287/2000001 [05:13<235:42:20,  2.36it/s]

buffer size = 974, epsilon = 0.09986
mean_reward :  0.0


  0%|          | 288/2000001 [05:13<240:45:42,  2.31it/s]

buffer size = 976, epsilon = 0.09986
mean_reward :  0.0


  0%|          | 289/2000001 [05:14<242:55:59,  2.29it/s]

buffer size = 978, epsilon = 0.09986
mean_reward :  0.0


  0%|          | 290/2000001 [05:14<238:52:03,  2.33it/s]

buffer size = 980, epsilon = 0.09986
mean_reward :  0.0


  0%|          | 291/2000001 [05:15<235:16:02,  2.36it/s]

buffer size = 982, epsilon = 0.09985
mean_reward :  0.0


  0%|          | 292/2000001 [05:15<236:26:33,  2.35it/s]

buffer size = 984, epsilon = 0.09985
mean_reward :  0.0


  0%|          | 293/2000001 [05:15<236:08:51,  2.35it/s]

buffer size = 986, epsilon = 0.09985
mean_reward :  0.0


  0%|          | 294/2000001 [05:16<235:37:15,  2.36it/s]

buffer size = 988, epsilon = 0.09985
mean_reward :  0.0


  0%|          | 295/2000001 [05:16<234:47:46,  2.37it/s]

buffer size = 990, epsilon = 0.09985
mean_reward :  0.0


  0%|          | 296/2000001 [05:17<231:18:35,  2.40it/s]

buffer size = 992, epsilon = 0.09985
mean_reward :  0.0


  0%|          | 297/2000001 [05:17<235:04:40,  2.36it/s]

buffer size = 994, epsilon = 0.09985
mean_reward :  0.0


  0%|          | 298/2000001 [05:18<235:17:38,  2.36it/s]

buffer size = 996, epsilon = 0.09985
mean_reward :  0.0


  0%|          | 299/2000001 [05:18<276:30:26,  2.01it/s]

buffer size = 998, epsilon = 0.09985
mean_reward :  0.0


  0%|          | 300/2000001 [05:19<327:06:46,  1.70it/s]

buffer size = 1000, epsilon = 0.09985
mean_reward :  0.0


  0%|          | 301/2000001 [05:20<360:35:59,  1.54it/s]

buffer size = 1002, epsilon = 0.09985
mean_reward :  0.0


  0%|          | 302/2000001 [05:21<385:56:24,  1.44it/s]

buffer size = 1004, epsilon = 0.09985
mean_reward :  0.0


  0%|          | 303/2000001 [05:21<354:03:28,  1.57it/s]

buffer size = 1006, epsilon = 0.09985
mean_reward :  0.0


  0%|          | 304/2000001 [05:22<318:17:01,  1.75it/s]

buffer size = 1008, epsilon = 0.09985
mean_reward :  0.0


  0%|          | 305/2000001 [05:22<292:22:56,  1.90it/s]

buffer size = 1010, epsilon = 0.09985
mean_reward :  0.0


  0%|          | 306/2000001 [05:22<275:55:57,  2.01it/s]

buffer size = 1012, epsilon = 0.09985
mean_reward :  0.0


  0%|          | 307/2000001 [05:23<261:55:54,  2.12it/s]

buffer size = 1014, epsilon = 0.09985
mean_reward :  0.0


  0%|          | 308/2000001 [05:23<256:46:30,  2.16it/s]

buffer size = 1016, epsilon = 0.09985
mean_reward :  0.0


  0%|          | 309/2000001 [05:24<248:10:04,  2.24it/s]

buffer size = 1018, epsilon = 0.09985
mean_reward :  0.0


  0%|          | 310/2000001 [05:24<246:35:20,  2.25it/s]

buffer size = 1020, epsilon = 0.09985
mean_reward :  0.0


  0%|          | 311/2000001 [05:24<242:50:36,  2.29it/s]

buffer size = 1022, epsilon = 0.09985
mean_reward :  0.0


  0%|          | 312/2000001 [05:25<241:28:25,  2.30it/s]

buffer size = 1024, epsilon = 0.09984
mean_reward :  0.0


  0%|          | 313/2000001 [05:25<242:07:00,  2.29it/s]

buffer size = 1026, epsilon = 0.09984
mean_reward :  0.0


  0%|          | 314/2000001 [05:26<238:02:29,  2.33it/s]

buffer size = 1028, epsilon = 0.09984
mean_reward :  0.0


  0%|          | 315/2000001 [05:26<238:50:59,  2.33it/s]

buffer size = 1030, epsilon = 0.09984
mean_reward :  0.0


  0%|          | 316/2000001 [05:27<236:29:01,  2.35it/s]

buffer size = 1032, epsilon = 0.09984
mean_reward :  0.0


  0%|          | 317/2000001 [05:27<235:34:57,  2.36it/s]

buffer size = 1034, epsilon = 0.09984
mean_reward :  0.0


  0%|          | 318/2000001 [05:27<237:23:32,  2.34it/s]

buffer size = 1036, epsilon = 0.09984
mean_reward :  0.0


  0%|          | 319/2000001 [05:28<235:55:51,  2.35it/s]

buffer size = 1038, epsilon = 0.09984
mean_reward :  0.0


  0%|          | 320/2000001 [05:28<237:37:12,  2.34it/s]

buffer size = 1040, epsilon = 0.09984
mean_reward :  0.0


  0%|          | 321/2000001 [05:29<233:04:33,  2.38it/s]

buffer size = 1042, epsilon = 0.09984
mean_reward :  0.0


  0%|          | 322/2000001 [05:29<233:06:29,  2.38it/s]

buffer size = 1044, epsilon = 0.09984
mean_reward :  0.0


  0%|          | 323/2000001 [05:30<233:22:55,  2.38it/s]

buffer size = 1046, epsilon = 0.09984
mean_reward :  0.0


  0%|          | 324/2000001 [05:30<234:26:56,  2.37it/s]

buffer size = 1048, epsilon = 0.09984
mean_reward :  0.0


  0%|          | 325/2000001 [05:30<235:51:02,  2.36it/s]

buffer size = 1050, epsilon = 0.09984
mean_reward :  0.0


  0%|          | 326/2000001 [05:31<239:32:04,  2.32it/s]

buffer size = 1052, epsilon = 0.09984
mean_reward :  0.0


  0%|          | 327/2000001 [05:32<298:10:23,  1.86it/s]

buffer size = 1054, epsilon = 0.09984
mean_reward :  0.0


  0%|          | 328/2000001 [05:32<340:46:57,  1.63it/s]

buffer size = 1056, epsilon = 0.09984
mean_reward :  0.0


  0%|          | 329/2000001 [05:33<371:11:25,  1.50it/s]

buffer size = 1058, epsilon = 0.09984
mean_reward :  0.0


  0%|          | 330/2000001 [05:34<384:59:45,  1.44it/s]

buffer size = 1060, epsilon = 0.09984
mean_reward :  0.0


  0%|          | 331/2000001 [05:34<339:32:02,  1.64it/s]

buffer size = 1062, epsilon = 0.09984
mean_reward :  0.0


  0%|          | 332/2000001 [05:35<309:38:12,  1.79it/s]

buffer size = 1064, epsilon = 0.09983
mean_reward :  0.0


  0%|          | 333/2000001 [05:35<288:26:33,  1.93it/s]

buffer size = 1066, epsilon = 0.09983
mean_reward :  0.0


  0%|          | 334/2000001 [05:36<271:46:15,  2.04it/s]

buffer size = 1068, epsilon = 0.09983
mean_reward :  0.0


  0%|          | 335/2000001 [05:36<262:21:20,  2.12it/s]

buffer size = 1070, epsilon = 0.09983
mean_reward :  0.0


  0%|          | 336/2000001 [05:37<254:07:46,  2.19it/s]

buffer size = 1072, epsilon = 0.09983
mean_reward :  0.0


  0%|          | 337/2000001 [05:37<248:17:32,  2.24it/s]

buffer size = 1074, epsilon = 0.09983
mean_reward :  0.0


  0%|          | 338/2000001 [05:37<243:34:34,  2.28it/s]

buffer size = 1076, epsilon = 0.09983
mean_reward :  0.0


  0%|          | 339/2000001 [05:38<242:36:25,  2.29it/s]

buffer size = 1078, epsilon = 0.09983
mean_reward :  0.0


  0%|          | 340/2000001 [05:38<240:38:40,  2.31it/s]

buffer size = 1080, epsilon = 0.09983
mean_reward :  0.0


  0%|          | 341/2000001 [05:39<239:59:25,  2.31it/s]

buffer size = 1082, epsilon = 0.09983
mean_reward :  0.0


  0%|          | 342/2000001 [05:39<239:03:58,  2.32it/s]

buffer size = 1084, epsilon = 0.09983
mean_reward :  0.0


  0%|          | 343/2000001 [05:40<236:03:00,  2.35it/s]

buffer size = 1086, epsilon = 0.09983
mean_reward :  0.0


  0%|          | 344/2000001 [05:40<236:55:27,  2.34it/s]

buffer size = 1088, epsilon = 0.09983
mean_reward :  0.0


  0%|          | 345/2000001 [05:40<236:09:38,  2.35it/s]

buffer size = 1090, epsilon = 0.09983
mean_reward :  0.0


  0%|          | 346/2000001 [05:41<237:48:37,  2.34it/s]

buffer size = 1092, epsilon = 0.09983
mean_reward :  0.0


  0%|          | 347/2000001 [05:41<234:07:15,  2.37it/s]

buffer size = 1094, epsilon = 0.09983
mean_reward :  0.0


  0%|          | 348/2000001 [05:42<233:15:30,  2.38it/s]

buffer size = 1096, epsilon = 0.09983
mean_reward :  0.0


  0%|          | 349/2000001 [05:42<233:56:46,  2.37it/s]

buffer size = 1098, epsilon = 0.09983
mean_reward :  0.0


  0%|          | 350/2000001 [05:42<236:58:52,  2.34it/s]

buffer size = 1100, epsilon = 0.09983
mean_reward :  0.0


  0%|          | 351/2000001 [05:43<238:03:35,  2.33it/s]

buffer size = 1102, epsilon = 0.09983
mean_reward :  0.0


  0%|          | 352/2000001 [05:43<238:09:19,  2.33it/s]

buffer size = 1104, epsilon = 0.09982
mean_reward :  0.0


  0%|          | 353/2000001 [05:44<238:01:18,  2.33it/s]

buffer size = 1106, epsilon = 0.09982
mean_reward :  0.0


  0%|          | 354/2000001 [05:44<274:20:46,  2.02it/s]

buffer size = 1108, epsilon = 0.09982
mean_reward :  0.0


  0%|          | 355/2000001 [05:45<324:48:15,  1.71it/s]

buffer size = 1110, epsilon = 0.09982
mean_reward :  0.0


  0%|          | 356/2000001 [05:46<360:05:02,  1.54it/s]

buffer size = 1112, epsilon = 0.09982
mean_reward :  0.0


  0%|          | 357/2000001 [05:47<383:45:28,  1.45it/s]

buffer size = 1114, epsilon = 0.09982
mean_reward :  0.0


  0%|          | 358/2000001 [05:47<354:29:27,  1.57it/s]

buffer size = 1116, epsilon = 0.09982
mean_reward :  0.0


  0%|          | 359/2000001 [05:48<317:31:32,  1.75it/s]

buffer size = 1118, epsilon = 0.09982
mean_reward :  0.0


  0%|          | 360/2000001 [05:48<294:34:18,  1.89it/s]

buffer size = 1120, epsilon = 0.09982
mean_reward :  0.0


  0%|          | 361/2000001 [05:49<275:47:07,  2.01it/s]

buffer size = 1122, epsilon = 0.09982
mean_reward :  0.0


  0%|          | 362/2000001 [05:49<264:33:46,  2.10it/s]

buffer size = 1124, epsilon = 0.09982
mean_reward :  0.0


  0%|          | 363/2000001 [05:49<256:18:51,  2.17it/s]

buffer size = 1126, epsilon = 0.09982
mean_reward :  0.0


  0%|          | 364/2000001 [05:50<249:03:13,  2.23it/s]

buffer size = 1128, epsilon = 0.09982
mean_reward :  0.0


  0%|          | 365/2000001 [05:50<246:37:46,  2.25it/s]

buffer size = 1130, epsilon = 0.09982
mean_reward :  0.0


  0%|          | 366/2000001 [05:51<242:24:50,  2.29it/s]

buffer size = 1132, epsilon = 0.09982
mean_reward :  0.0


  0%|          | 367/2000001 [05:51<242:39:36,  2.29it/s]

buffer size = 1134, epsilon = 0.09982
mean_reward :  0.0


  0%|          | 368/2000001 [05:52<238:09:19,  2.33it/s]

buffer size = 1136, epsilon = 0.09982
mean_reward :  0.0


  0%|          | 369/2000001 [05:52<233:35:12,  2.38it/s]

buffer size = 1138, epsilon = 0.09982
mean_reward :  0.0


  0%|          | 370/2000001 [05:52<232:08:29,  2.39it/s]

buffer size = 1140, epsilon = 0.09982
mean_reward :  0.0


  0%|          | 371/2000001 [05:53<230:57:38,  2.40it/s]

buffer size = 1142, epsilon = 0.09982
mean_reward :  0.0


  0%|          | 372/2000001 [05:53<230:57:35,  2.40it/s]

buffer size = 1144, epsilon = 0.09981
mean_reward :  0.0


  0%|          | 373/2000001 [05:54<228:31:14,  2.43it/s]

buffer size = 1146, epsilon = 0.09981
mean_reward :  0.0


  0%|          | 374/2000001 [05:54<227:06:59,  2.45it/s]

buffer size = 1148, epsilon = 0.09981
mean_reward :  0.0


  0%|          | 375/2000001 [05:54<229:36:33,  2.42it/s]

buffer size = 1150, epsilon = 0.09981
mean_reward :  0.0


  0%|          | 376/2000001 [05:55<229:24:01,  2.42it/s]

buffer size = 1152, epsilon = 0.09981
mean_reward :  0.0


  0%|          | 377/2000001 [05:55<232:55:01,  2.38it/s]

buffer size = 1154, epsilon = 0.09981
mean_reward :  0.0


  0%|          | 378/2000001 [05:56<232:08:30,  2.39it/s]

buffer size = 1156, epsilon = 0.09981
mean_reward :  0.0


  0%|          | 379/2000001 [05:56<232:23:16,  2.39it/s]

buffer size = 1158, epsilon = 0.09981
mean_reward :  0.0


  0%|          | 380/2000001 [05:57<233:10:39,  2.38it/s]

buffer size = 1160, epsilon = 0.09981
mean_reward :  0.0


  0%|          | 381/2000001 [05:57<233:00:04,  2.38it/s]

buffer size = 1162, epsilon = 0.09981
mean_reward :  0.0


  0%|          | 382/2000001 [05:58<277:10:00,  2.00it/s]

buffer size = 1164, epsilon = 0.09981
mean_reward :  0.0


  0%|          | 383/2000001 [05:58<324:11:10,  1.71it/s]

buffer size = 1166, epsilon = 0.09981
mean_reward :  0.0


  0%|          | 384/2000001 [05:59<361:21:40,  1.54it/s]

buffer size = 1168, epsilon = 0.09981
mean_reward :  0.0


  0%|          | 385/2000001 [06:00<384:02:57,  1.45it/s]

buffer size = 1170, epsilon = 0.09981
mean_reward :  0.0


  0%|          | 386/2000001 [06:01<351:49:07,  1.58it/s]

buffer size = 1172, epsilon = 0.09981
mean_reward :  0.0


  0%|          | 387/2000001 [06:01<316:49:50,  1.75it/s]

buffer size = 1174, epsilon = 0.09981
mean_reward :  0.0


  0%|          | 388/2000001 [06:01<292:44:19,  1.90it/s]

buffer size = 1176, epsilon = 0.09981
mean_reward :  0.0


  0%|          | 389/2000001 [06:02<274:37:29,  2.02it/s]

buffer size = 1178, epsilon = 0.09981
mean_reward :  0.0


  0%|          | 390/2000001 [06:02<262:32:03,  2.12it/s]

buffer size = 1180, epsilon = 0.09981
mean_reward :  0.0


  0%|          | 391/2000001 [06:03<254:56:21,  2.18it/s]

buffer size = 1182, epsilon = 0.09981
mean_reward :  0.0


  0%|          | 392/2000001 [06:03<247:24:50,  2.25it/s]

buffer size = 1184, epsilon = 0.09980
mean_reward :  0.0


  0%|          | 393/2000001 [06:03<244:59:32,  2.27it/s]

buffer size = 1186, epsilon = 0.09980
mean_reward :  0.0


  0%|          | 394/2000001 [06:04<242:32:39,  2.29it/s]

buffer size = 1188, epsilon = 0.09980
mean_reward :  0.0


  0%|          | 395/2000001 [06:04<241:54:25,  2.30it/s]

buffer size = 1190, epsilon = 0.09980
mean_reward :  0.0


  0%|          | 396/2000001 [06:05<237:57:35,  2.33it/s]

buffer size = 1192, epsilon = 0.09980
mean_reward :  0.0


  0%|          | 397/2000001 [06:05<235:41:19,  2.36it/s]

buffer size = 1194, epsilon = 0.09980
mean_reward :  0.0


  0%|          | 398/2000001 [06:06<234:35:47,  2.37it/s]

buffer size = 1196, epsilon = 0.09980
mean_reward :  0.0


  0%|          | 399/2000001 [06:06<237:39:20,  2.34it/s]

buffer size = 1198, epsilon = 0.09980
mean_reward :  0.0


  0%|          | 400/2000001 [06:06<237:16:16,  2.34it/s]

buffer size = 1200, epsilon = 0.09980
mean_reward :  0.0


  0%|          | 401/2000001 [06:07<235:36:43,  2.36it/s]

buffer size = 1202, epsilon = 0.09980
mean_reward :  0.0


  0%|          | 402/2000001 [06:07<233:25:26,  2.38it/s]

buffer size = 1204, epsilon = 0.09980
mean_reward :  0.0


  0%|          | 403/2000001 [06:08<234:46:29,  2.37it/s]

buffer size = 1206, epsilon = 0.09980
mean_reward :  0.0


  0%|          | 404/2000001 [06:08<234:53:49,  2.36it/s]

buffer size = 1208, epsilon = 0.09980
mean_reward :  0.0


  0%|          | 405/2000001 [06:09<237:20:30,  2.34it/s]

buffer size = 1210, epsilon = 0.09980
mean_reward :  0.0


  0%|          | 406/2000001 [06:09<236:38:28,  2.35it/s]

buffer size = 1212, epsilon = 0.09980
mean_reward :  0.0


  0%|          | 407/2000001 [06:09<235:53:26,  2.35it/s]

buffer size = 1214, epsilon = 0.09980
mean_reward :  0.0


  0%|          | 408/2000001 [06:10<238:29:27,  2.33it/s]

buffer size = 1216, epsilon = 0.09980
mean_reward :  0.0


  0%|          | 409/2000001 [06:10<245:39:23,  2.26it/s]

buffer size = 1218, epsilon = 0.09980
mean_reward :  0.0


  0%|          | 410/2000001 [06:11<304:29:59,  1.82it/s]

buffer size = 1220, epsilon = 0.09980
mean_reward :  0.0


  0%|          | 411/2000001 [06:12<341:52:32,  1.62it/s]

buffer size = 1222, epsilon = 0.09980
mean_reward :  0.0


  0%|          | 412/2000001 [06:13<375:19:07,  1.48it/s]

buffer size = 1224, epsilon = 0.09979
mean_reward :  0.0


  0%|          | 413/2000001 [06:13<379:53:35,  1.46it/s]

buffer size = 1226, epsilon = 0.09979
mean_reward :  0.0


  0%|          | 414/2000001 [06:14<337:44:16,  1.64it/s]

buffer size = 1228, epsilon = 0.09979
mean_reward :  0.0


  0%|          | 415/2000001 [06:14<307:59:33,  1.80it/s]

buffer size = 1230, epsilon = 0.09979
mean_reward :  0.0


  0%|          | 416/2000001 [06:15<283:43:22,  1.96it/s]

buffer size = 1232, epsilon = 0.09979
mean_reward :  0.0


  0%|          | 417/2000001 [06:15<270:48:16,  2.05it/s]

buffer size = 1234, epsilon = 0.09979
mean_reward :  0.0


  0%|          | 418/2000001 [06:16<257:48:03,  2.15it/s]

buffer size = 1236, epsilon = 0.09979
mean_reward :  0.0


  0%|          | 419/2000001 [06:16<253:14:05,  2.19it/s]

buffer size = 1238, epsilon = 0.09979
mean_reward :  0.0


  0%|          | 420/2000001 [06:16<246:54:57,  2.25it/s]

buffer size = 1240, epsilon = 0.09979
mean_reward :  0.0


  0%|          | 421/2000001 [06:17<243:01:48,  2.29it/s]

buffer size = 1242, epsilon = 0.09979
mean_reward :  0.0


  0%|          | 422/2000001 [06:17<240:14:18,  2.31it/s]

buffer size = 1244, epsilon = 0.09979
mean_reward :  0.0


  0%|          | 423/2000001 [06:18<239:35:23,  2.32it/s]

buffer size = 1246, epsilon = 0.09979
mean_reward :  0.0


  0%|          | 424/2000001 [06:18<238:25:59,  2.33it/s]

buffer size = 1248, epsilon = 0.09979
mean_reward :  0.0


  0%|          | 425/2000001 [06:18<234:34:46,  2.37it/s]

buffer size = 1250, epsilon = 0.09979
mean_reward :  0.0


  0%|          | 426/2000001 [06:19<237:32:30,  2.34it/s]

buffer size = 1252, epsilon = 0.09979
mean_reward :  0.0


  0%|          | 427/2000001 [06:19<239:41:40,  2.32it/s]

buffer size = 1254, epsilon = 0.09979
mean_reward :  0.0


  0%|          | 428/2000001 [06:20<238:15:36,  2.33it/s]

buffer size = 1256, epsilon = 0.09979
mean_reward :  0.0


  0%|          | 429/2000001 [06:20<237:43:24,  2.34it/s]

buffer size = 1258, epsilon = 0.09979
mean_reward :  0.0


  0%|          | 430/2000001 [06:21<234:44:46,  2.37it/s]

buffer size = 1260, epsilon = 0.09979
mean_reward :  0.0


  0%|          | 431/2000001 [06:21<236:13:06,  2.35it/s]

buffer size = 1262, epsilon = 0.09978
mean_reward :  0.0


  0%|          | 432/2000001 [06:21<233:09:31,  2.38it/s]

buffer size = 1264, epsilon = 0.09978
mean_reward :  0.0


  0%|          | 433/2000001 [06:22<232:18:39,  2.39it/s]

buffer size = 1266, epsilon = 0.09978
mean_reward :  0.0


  0%|          | 434/2000001 [06:22<234:38:50,  2.37it/s]

buffer size = 1268, epsilon = 0.09978
mean_reward :  0.0


  0%|          | 435/2000001 [06:23<234:28:52,  2.37it/s]

buffer size = 1270, epsilon = 0.09978
mean_reward :  0.0


  0%|          | 436/2000001 [06:23<236:34:30,  2.35it/s]

buffer size = 1272, epsilon = 0.09978
mean_reward :  0.0


  0%|          | 437/2000001 [06:24<270:14:08,  2.06it/s]

buffer size = 1274, epsilon = 0.09978
mean_reward :  0.0


  0%|          | 438/2000001 [06:25<321:48:23,  1.73it/s]

buffer size = 1276, epsilon = 0.09978
mean_reward :  0.0


  0%|          | 439/2000001 [06:25<359:53:23,  1.54it/s]

buffer size = 1278, epsilon = 0.09978
mean_reward :  0.0


  0%|          | 440/2000001 [06:26<383:40:27,  1.45it/s]

buffer size = 1280, epsilon = 0.09978
mean_reward :  0.0


  0%|          | 441/2000001 [06:27<357:56:50,  1.55it/s]

buffer size = 1282, epsilon = 0.09978
mean_reward :  0.0


  0%|          | 442/2000001 [06:27<324:37:44,  1.71it/s]

buffer size = 1284, epsilon = 0.09978
mean_reward :  0.0


  0%|          | 443/2000001 [06:28<298:44:43,  1.86it/s]

buffer size = 1286, epsilon = 0.09978
mean_reward :  0.0


  0%|          | 444/2000001 [06:28<279:06:19,  1.99it/s]

buffer size = 1288, epsilon = 0.09978
mean_reward :  0.0


  0%|          | 445/2000001 [06:28<267:28:12,  2.08it/s]

buffer size = 1290, epsilon = 0.09978
mean_reward :  0.0


  0%|          | 446/2000001 [06:29<258:17:59,  2.15it/s]

buffer size = 1292, epsilon = 0.09978
mean_reward :  0.0


  0%|          | 447/2000001 [06:29<251:39:46,  2.21it/s]

buffer size = 1294, epsilon = 0.09978
mean_reward :  0.0


  0%|          | 448/2000001 [06:30<242:51:59,  2.29it/s]

buffer size = 1296, epsilon = 0.09978
mean_reward :  0.0


  0%|          | 449/2000001 [06:30<241:27:44,  2.30it/s]

buffer size = 1298, epsilon = 0.09978
mean_reward :  0.0


  0%|          | 450/2000001 [06:31<239:08:17,  2.32it/s]

buffer size = 1300, epsilon = 0.09978
mean_reward :  0.0


  0%|          | 451/2000001 [06:31<237:59:02,  2.33it/s]

buffer size = 1302, epsilon = 0.09978
mean_reward :  0.0


  0%|          | 452/2000001 [06:31<235:55:43,  2.35it/s]

buffer size = 1304, epsilon = 0.09977
mean_reward :  0.0


  0%|          | 453/2000001 [06:32<235:15:19,  2.36it/s]

buffer size = 1306, epsilon = 0.09977
mean_reward :  0.0


  0%|          | 454/2000001 [06:32<234:58:18,  2.36it/s]

buffer size = 1308, epsilon = 0.09977
mean_reward :  0.0


  0%|          | 455/2000001 [06:33<233:12:48,  2.38it/s]

buffer size = 1310, epsilon = 0.09977
mean_reward :  0.0


  0%|          | 456/2000001 [06:33<232:24:36,  2.39it/s]

buffer size = 1312, epsilon = 0.09977
mean_reward :  0.0


  0%|          | 457/2000001 [06:33<233:23:43,  2.38it/s]

buffer size = 1314, epsilon = 0.09977
mean_reward :  0.0


  0%|          | 458/2000001 [06:34<232:01:15,  2.39it/s]

buffer size = 1316, epsilon = 0.09977
mean_reward :  0.0


  0%|          | 459/2000001 [06:34<232:19:08,  2.39it/s]

buffer size = 1318, epsilon = 0.09977
mean_reward :  0.0


  0%|          | 460/2000001 [06:35<231:57:26,  2.39it/s]

buffer size = 1320, epsilon = 0.09977
mean_reward :  0.0


  0%|          | 461/2000001 [06:35<232:47:31,  2.39it/s]

buffer size = 1322, epsilon = 0.09977
mean_reward :  0.0


  0%|          | 462/2000001 [06:36<235:24:12,  2.36it/s]

buffer size = 1324, epsilon = 0.09977
mean_reward :  0.0


  0%|          | 463/2000001 [06:36<235:06:25,  2.36it/s]

buffer size = 1326, epsilon = 0.09977
mean_reward :  0.0


  0%|          | 464/2000001 [06:36<237:52:14,  2.33it/s]

buffer size = 1328, epsilon = 0.09977
mean_reward :  0.0


  0%|          | 465/2000001 [06:37<294:18:47,  1.89it/s]

buffer size = 1330, epsilon = 0.09977
mean_reward :  0.0


  0%|          | 466/2000001 [06:38<338:11:35,  1.64it/s]

buffer size = 1332, epsilon = 0.09977
mean_reward :  0.0


  0%|          | 467/2000001 [06:39<373:10:07,  1.49it/s]

buffer size = 1334, epsilon = 0.09977
mean_reward :  0.0


  0%|          | 468/2000001 [06:40<388:35:47,  1.43it/s]

buffer size = 1336, epsilon = 0.09977
mean_reward :  0.0


  0%|          | 469/2000001 [06:40<341:30:54,  1.63it/s]

buffer size = 1338, epsilon = 0.09977
mean_reward :  0.0


  0%|          | 470/2000001 [06:40<308:52:51,  1.80it/s]

buffer size = 1340, epsilon = 0.09977
mean_reward :  0.0


  0%|          | 471/2000001 [06:41<288:37:13,  1.92it/s]

buffer size = 1342, epsilon = 0.09977
mean_reward :  0.0


  0%|          | 472/2000001 [06:41<271:45:33,  2.04it/s]

buffer size = 1344, epsilon = 0.09976
mean_reward :  0.0


  0%|          | 473/2000001 [06:42<261:25:36,  2.12it/s]

buffer size = 1346, epsilon = 0.09976
mean_reward :  0.0


  0%|          | 474/2000001 [06:42<254:23:06,  2.18it/s]

buffer size = 1348, epsilon = 0.09976
mean_reward :  0.0


  0%|          | 475/2000001 [06:43<247:45:04,  2.24it/s]

buffer size = 1350, epsilon = 0.09976
mean_reward :  0.0


  0%|          | 476/2000001 [06:43<252:14:20,  2.20it/s]

buffer size = 1352, epsilon = 0.09976
mean_reward :  0.0


  0%|          | 477/2000001 [06:43<247:29:02,  2.24it/s]

buffer size = 1354, epsilon = 0.09976
mean_reward :  0.0


  0%|          | 478/2000001 [06:44<245:35:43,  2.26it/s]

buffer size = 1356, epsilon = 0.09976
mean_reward :  0.0


  0%|          | 479/2000001 [06:44<241:58:46,  2.30it/s]

buffer size = 1358, epsilon = 0.09976
mean_reward :  0.0


  0%|          | 480/2000001 [06:45<240:35:29,  2.31it/s]

buffer size = 1360, epsilon = 0.09976
mean_reward :  0.0


  0%|          | 481/2000001 [06:45<242:45:47,  2.29it/s]

buffer size = 1362, epsilon = 0.09976
mean_reward :  0.0


  0%|          | 482/2000001 [06:46<240:44:07,  2.31it/s]

buffer size = 1364, epsilon = 0.09976
mean_reward :  0.0


  0%|          | 483/2000001 [06:46<238:47:06,  2.33it/s]

buffer size = 1366, epsilon = 0.09976
mean_reward :  0.0


  0%|          | 484/2000001 [06:46<237:08:57,  2.34it/s]

buffer size = 1368, epsilon = 0.09976
mean_reward :  0.0


  0%|          | 485/2000001 [06:47<236:40:52,  2.35it/s]

buffer size = 1370, epsilon = 0.09976
mean_reward :  0.0


  0%|          | 486/2000001 [06:47<237:57:39,  2.33it/s]

buffer size = 1372, epsilon = 0.09976
mean_reward :  0.0


  0%|          | 487/2000001 [06:48<236:22:02,  2.35it/s]

buffer size = 1374, epsilon = 0.09976
mean_reward :  0.0


  0%|          | 488/2000001 [06:48<238:38:18,  2.33it/s]

buffer size = 1376, epsilon = 0.09976
mean_reward :  0.0


  0%|          | 489/2000001 [06:49<238:12:05,  2.33it/s]

buffer size = 1378, epsilon = 0.09976
mean_reward :  0.0


  0%|          | 490/2000001 [06:49<241:56:19,  2.30it/s]

buffer size = 1380, epsilon = 0.09976
mean_reward :  0.0


  0%|          | 491/2000001 [06:49<240:21:51,  2.31it/s]

buffer size = 1382, epsilon = 0.09976
mean_reward :  0.0


  0%|          | 492/2000001 [06:50<282:21:18,  1.97it/s]

buffer size = 1384, epsilon = 0.09975
mean_reward :  0.0


  0%|          | 493/2000001 [06:51<330:37:32,  1.68it/s]

buffer size = 1386, epsilon = 0.09975
mean_reward :  0.0


  0%|          | 494/2000001 [06:52<367:49:17,  1.51it/s]

buffer size = 1388, epsilon = 0.09975
mean_reward :  0.0


  0%|          | 495/2000001 [06:53<391:31:25,  1.42it/s]

buffer size = 1390, epsilon = 0.09975
mean_reward :  0.0


  0%|          | 496/2000001 [06:53<354:15:58,  1.57it/s]

buffer size = 1392, epsilon = 0.09975
mean_reward :  0.0


  0%|          | 497/2000001 [06:53<319:06:01,  1.74it/s]

buffer size = 1394, epsilon = 0.09975
mean_reward :  0.0


  0%|          | 498/2000001 [06:54<295:06:46,  1.88it/s]

buffer size = 1396, epsilon = 0.09975
mean_reward :  0.0


  0%|          | 499/2000001 [06:54<281:52:23,  1.97it/s]

buffer size = 1398, epsilon = 0.09975
mean_reward :  0.0


  0%|          | 500/2000001 [06:55<268:20:24,  2.07it/s]

buffer size = 1400, epsilon = 0.09975
mean_reward :  0.0


  0%|          | 501/2000001 [06:55<262:50:06,  2.11it/s]

buffer size = 1402, epsilon = 0.09975
mean_reward :  0.0


  0%|          | 502/2000001 [06:56<253:32:35,  2.19it/s]

buffer size = 1404, epsilon = 0.09975
mean_reward :  0.0


  0%|          | 503/2000001 [06:56<247:12:50,  2.25it/s]

buffer size = 1406, epsilon = 0.09975
mean_reward :  0.0


  0%|          | 504/2000001 [06:57<246:19:42,  2.25it/s]

buffer size = 1408, epsilon = 0.09975
mean_reward :  0.0


  0%|          | 505/2000001 [06:57<242:26:27,  2.29it/s]

buffer size = 1410, epsilon = 0.09975
mean_reward :  0.0


  0%|          | 506/2000001 [06:57<243:39:28,  2.28it/s]

buffer size = 1412, epsilon = 0.09975
mean_reward :  0.0


  0%|          | 507/2000001 [06:58<240:58:48,  2.30it/s]

buffer size = 1414, epsilon = 0.09975
mean_reward :  0.0


  0%|          | 508/2000001 [06:58<242:57:05,  2.29it/s]

buffer size = 1416, epsilon = 0.09975
mean_reward :  0.0


  0%|          | 509/2000001 [06:59<241:47:12,  2.30it/s]

buffer size = 1418, epsilon = 0.09975
mean_reward :  0.0


  0%|          | 510/2000001 [06:59<239:12:30,  2.32it/s]

buffer size = 1420, epsilon = 0.09975
mean_reward :  0.0


  0%|          | 511/2000001 [07:00<239:35:54,  2.32it/s]

buffer size = 1422, epsilon = 0.09975
mean_reward :  0.0


  0%|          | 512/2000001 [07:00<239:47:29,  2.32it/s]

buffer size = 1424, epsilon = 0.09974
mean_reward :  0.0


  0%|          | 513/2000001 [07:00<240:26:18,  2.31it/s]

buffer size = 1426, epsilon = 0.09974
mean_reward :  0.0


  0%|          | 514/2000001 [07:01<240:32:03,  2.31it/s]

buffer size = 1428, epsilon = 0.09974
mean_reward :  0.0


  0%|          | 515/2000001 [07:01<239:56:26,  2.31it/s]

buffer size = 1430, epsilon = 0.09974
mean_reward :  0.0


  0%|          | 516/2000001 [07:02<240:18:20,  2.31it/s]

buffer size = 1432, epsilon = 0.09974
mean_reward :  0.0


  0%|          | 517/2000001 [07:02<242:43:59,  2.29it/s]

buffer size = 1434, epsilon = 0.09974
mean_reward :  0.0


  0%|          | 518/2000001 [07:03<240:07:57,  2.31it/s]

buffer size = 1436, epsilon = 0.09974
mean_reward :  0.0


  0%|          | 519/2000001 [07:03<280:01:35,  1.98it/s]

buffer size = 1438, epsilon = 0.09974
mean_reward :  0.0


  0%|          | 520/2000001 [07:04<332:48:50,  1.67it/s]

buffer size = 1440, epsilon = 0.09974
mean_reward :  0.0


  0%|          | 521/2000001 [07:05<370:28:29,  1.50it/s]

buffer size = 1442, epsilon = 0.09974
mean_reward :  0.0


  0%|          | 522/2000001 [07:06<390:33:55,  1.42it/s]

buffer size = 1444, epsilon = 0.09974
mean_reward :  0.0


  0%|          | 523/2000001 [07:06<361:33:39,  1.54it/s]

buffer size = 1446, epsilon = 0.09974
mean_reward :  0.0


  0%|          | 524/2000001 [07:07<327:02:13,  1.70it/s]

buffer size = 1448, epsilon = 0.09974
mean_reward :  0.0


  0%|          | 525/2000001 [07:07<300:25:43,  1.85it/s]

buffer size = 1450, epsilon = 0.09974
mean_reward :  0.0


  0%|          | 526/2000001 [07:07<280:19:27,  1.98it/s]

buffer size = 1452, epsilon = 0.09974
mean_reward :  0.0


  0%|          | 527/2000001 [07:08<269:40:58,  2.06it/s]

buffer size = 1454, epsilon = 0.09974
mean_reward :  0.0


  0%|          | 528/2000001 [07:08<256:19:37,  2.17it/s]

buffer size = 1456, epsilon = 0.09974
mean_reward :  0.0


  0%|          | 529/2000001 [07:09<250:37:16,  2.22it/s]

buffer size = 1458, epsilon = 0.09974
mean_reward :  0.0


  0%|          | 530/2000001 [07:09<248:15:12,  2.24it/s]

buffer size = 1460, epsilon = 0.09974
mean_reward :  0.0


  0%|          | 531/2000001 [07:10<243:48:44,  2.28it/s]

buffer size = 1462, epsilon = 0.09974
mean_reward :  0.0


  0%|          | 532/2000001 [07:10<245:26:25,  2.26it/s]

buffer size = 1464, epsilon = 0.09973
mean_reward :  0.0


  0%|          | 533/2000001 [07:10<242:38:14,  2.29it/s]

buffer size = 1466, epsilon = 0.09973
mean_reward :  0.0


  0%|          | 534/2000001 [07:11<244:25:32,  2.27it/s]

buffer size = 1468, epsilon = 0.09973
mean_reward :  0.0


  0%|          | 535/2000001 [07:11<242:00:01,  2.30it/s]

buffer size = 1470, epsilon = 0.09973
mean_reward :  0.0


  0%|          | 536/2000001 [07:12<240:57:02,  2.31it/s]

buffer size = 1472, epsilon = 0.09973
mean_reward :  0.0


  0%|          | 537/2000001 [07:12<238:32:28,  2.33it/s]

buffer size = 1474, epsilon = 0.09973
mean_reward :  0.0


  0%|          | 538/2000001 [07:13<236:49:50,  2.35it/s]

buffer size = 1476, epsilon = 0.09973
mean_reward :  0.0


  0%|          | 539/2000001 [07:13<239:13:58,  2.32it/s]

buffer size = 1478, epsilon = 0.09973
mean_reward :  0.0


  0%|          | 540/2000001 [07:14<242:13:28,  2.29it/s]

buffer size = 1480, epsilon = 0.09973
mean_reward :  0.0


  0%|          | 541/2000001 [07:14<243:57:26,  2.28it/s]

buffer size = 1482, epsilon = 0.09973
mean_reward :  0.0


  0%|          | 542/2000001 [07:14<242:25:26,  2.29it/s]

buffer size = 1484, epsilon = 0.09973
mean_reward :  0.0


  0%|          | 543/2000001 [07:15<243:58:45,  2.28it/s]

buffer size = 1486, epsilon = 0.09973
mean_reward :  0.0


  0%|          | 544/2000001 [07:15<243:17:20,  2.28it/s]

buffer size = 1488, epsilon = 0.09973
mean_reward :  0.0


  0%|          | 545/2000001 [07:16<240:08:55,  2.31it/s]

buffer size = 1490, epsilon = 0.09973
mean_reward :  0.0


  0%|          | 546/2000001 [07:16<269:30:31,  2.06it/s]

buffer size = 1492, epsilon = 0.09973
mean_reward :  0.0


  0%|          | 547/2000001 [07:17<323:32:24,  1.72it/s]

buffer size = 1494, epsilon = 0.09973
mean_reward :  0.0


  0%|          | 548/2000001 [07:18<355:11:17,  1.56it/s]

buffer size = 1496, epsilon = 0.09973
mean_reward :  0.0


  0%|          | 549/2000001 [07:19<380:49:13,  1.46it/s]

buffer size = 1498, epsilon = 0.09973
mean_reward :  0.0


  0%|          | 550/2000001 [07:19<373:51:47,  1.49it/s]

buffer size = 1500, epsilon = 0.09973
mean_reward :  0.0


  0%|          | 551/2000001 [07:20<333:58:43,  1.66it/s]

buffer size = 1502, epsilon = 0.09973
mean_reward :  0.0


  0%|          | 552/2000001 [07:20<305:36:15,  1.82it/s]

buffer size = 1504, epsilon = 0.09972
mean_reward :  0.0


  0%|          | 553/2000001 [07:21<285:14:31,  1.95it/s]

buffer size = 1506, epsilon = 0.09972
mean_reward :  0.0


  0%|          | 554/2000001 [07:21<272:55:19,  2.04it/s]

buffer size = 1508, epsilon = 0.09972
mean_reward :  0.0


  0%|          | 555/2000001 [07:21<263:43:14,  2.11it/s]

buffer size = 1510, epsilon = 0.09972
mean_reward :  0.0


  0%|          | 556/2000001 [07:22<254:18:37,  2.18it/s]

buffer size = 1512, epsilon = 0.09972
mean_reward :  0.0


  0%|          | 557/2000001 [07:22<250:34:40,  2.22it/s]

buffer size = 1514, epsilon = 0.09972
mean_reward :  0.0


  0%|          | 558/2000001 [07:23<246:26:11,  2.25it/s]

buffer size = 1516, epsilon = 0.09972
mean_reward :  0.0


  0%|          | 559/2000001 [07:23<245:12:59,  2.26it/s]

buffer size = 1518, epsilon = 0.09972
mean_reward :  0.0


  0%|          | 560/2000001 [07:24<241:11:04,  2.30it/s]

buffer size = 1520, epsilon = 0.09972
mean_reward :  0.0


  0%|          | 561/2000001 [07:24<241:37:58,  2.30it/s]

buffer size = 1522, epsilon = 0.09972
mean_reward :  0.0


  0%|          | 562/2000001 [07:25<243:27:02,  2.28it/s]

buffer size = 1524, epsilon = 0.09972
mean_reward :  0.0


  0%|          | 563/2000001 [07:25<242:25:41,  2.29it/s]

buffer size = 1526, epsilon = 0.09972
mean_reward :  0.0


  0%|          | 564/2000001 [07:25<242:55:53,  2.29it/s]

buffer size = 1528, epsilon = 0.09972
mean_reward :  0.0


  0%|          | 565/2000001 [07:26<240:47:40,  2.31it/s]

buffer size = 1530, epsilon = 0.09972
mean_reward :  0.0


  0%|          | 566/2000001 [07:26<239:45:06,  2.32it/s]

buffer size = 1532, epsilon = 0.09972
mean_reward :  0.0


  0%|          | 567/2000001 [07:27<240:32:06,  2.31it/s]

buffer size = 1534, epsilon = 0.09972
mean_reward :  0.0


  0%|          | 568/2000001 [07:27<239:20:05,  2.32it/s]

buffer size = 1536, epsilon = 0.09972
mean_reward :  0.0


  0%|          | 569/2000001 [07:28<239:52:41,  2.32it/s]

buffer size = 1538, epsilon = 0.09972
mean_reward :  0.0


  0%|          | 570/2000001 [07:28<238:07:26,  2.33it/s]

buffer size = 1540, epsilon = 0.09972
mean_reward :  0.0


  0%|          | 571/2000001 [07:28<239:56:20,  2.31it/s]

buffer size = 1542, epsilon = 0.09972
mean_reward :  0.0


  0%|          | 572/2000001 [07:29<239:57:31,  2.31it/s]

buffer size = 1544, epsilon = 0.09971
mean_reward :  0.0


  0%|          | 573/2000001 [07:29<248:02:08,  2.24it/s]

buffer size = 1546, epsilon = 0.09971
mean_reward :  0.0


  0%|          | 574/2000001 [07:30<305:15:14,  1.82it/s]

buffer size = 1548, epsilon = 0.09971
mean_reward :  0.0


  0%|          | 575/2000001 [07:31<347:22:11,  1.60it/s]

buffer size = 1550, epsilon = 0.09971
mean_reward :  0.0


  0%|          | 576/2000001 [07:32<371:01:49,  1.50it/s]

buffer size = 1552, epsilon = 0.09971
mean_reward :  0.0


  0%|          | 577/2000001 [07:32<387:11:29,  1.43it/s]

buffer size = 1554, epsilon = 0.09971
mean_reward :  0.0


  0%|          | 578/2000001 [07:33<345:19:47,  1.61it/s]

buffer size = 1556, epsilon = 0.09971
mean_reward :  0.0


  0%|          | 579/2000001 [07:33<313:45:31,  1.77it/s]

buffer size = 1558, epsilon = 0.09971
mean_reward :  0.0


  0%|          | 580/2000001 [07:34<296:29:08,  1.87it/s]

buffer size = 1560, epsilon = 0.09971
mean_reward :  0.0


  0%|          | 581/2000001 [07:34<277:03:50,  2.00it/s]

buffer size = 1562, epsilon = 0.09971
mean_reward :  0.0


  0%|          | 582/2000001 [07:35<265:03:38,  2.10it/s]

buffer size = 1564, epsilon = 0.09971
mean_reward :  0.0


  0%|          | 583/2000001 [07:35<259:30:25,  2.14it/s]

buffer size = 1566, epsilon = 0.09971
mean_reward :  0.0


  0%|          | 584/2000001 [07:35<251:57:08,  2.20it/s]

buffer size = 1568, epsilon = 0.09971
mean_reward :  0.0


  0%|          | 585/2000001 [07:36<249:23:17,  2.23it/s]

buffer size = 1570, epsilon = 0.09971
mean_reward :  0.0


  0%|          | 586/2000001 [07:36<247:33:24,  2.24it/s]

buffer size = 1572, epsilon = 0.09971
mean_reward :  0.0


  0%|          | 587/2000001 [07:37<246:19:37,  2.25it/s]

buffer size = 1574, epsilon = 0.09971
mean_reward :  0.0


  0%|          | 588/2000001 [07:37<245:05:45,  2.27it/s]

buffer size = 1576, epsilon = 0.09971
mean_reward :  0.0


  0%|          | 589/2000001 [07:38<243:17:03,  2.28it/s]

buffer size = 1578, epsilon = 0.09971
mean_reward :  0.0


  0%|          | 590/2000001 [07:38<242:49:10,  2.29it/s]

buffer size = 1580, epsilon = 0.09971
mean_reward :  0.0


  0%|          | 591/2000001 [07:39<239:39:34,  2.32it/s]

buffer size = 1582, epsilon = 0.09971
mean_reward :  0.0


  0%|          | 592/2000001 [07:39<242:00:01,  2.30it/s]

buffer size = 1584, epsilon = 0.09970
mean_reward :  0.0


  0%|          | 593/2000001 [07:39<241:27:50,  2.30it/s]

buffer size = 1586, epsilon = 0.09970
mean_reward :  0.0


  0%|          | 594/2000001 [07:40<241:45:08,  2.30it/s]

buffer size = 1588, epsilon = 0.09970
mean_reward :  0.0


  0%|          | 595/2000001 [07:40<239:49:52,  2.32it/s]

buffer size = 1590, epsilon = 0.09970
mean_reward :  0.0


  0%|          | 596/2000001 [07:41<241:48:33,  2.30it/s]

buffer size = 1592, epsilon = 0.09970
mean_reward :  0.0


  0%|          | 597/2000001 [07:41<241:59:17,  2.30it/s]

buffer size = 1594, epsilon = 0.09970
mean_reward :  0.0


  0%|          | 598/2000001 [07:42<238:53:32,  2.32it/s]

buffer size = 1596, epsilon = 0.09970
mean_reward :  0.0


  0%|          | 599/2000001 [07:42<241:22:58,  2.30it/s]

buffer size = 1598, epsilon = 0.09970
mean_reward :  0.0


  0%|          | 600/2000001 [07:42<241:27:08,  2.30it/s]

buffer size = 1600, epsilon = 0.09970
mean_reward :  0.0


  0%|          | 601/2000001 [07:43<297:56:33,  1.86it/s]

buffer size = 1602, epsilon = 0.09970
mean_reward :  0.0


  0%|          | 602/2000001 [07:44<344:14:54,  1.61it/s]

buffer size = 1604, epsilon = 0.09970
mean_reward :  0.0


  0%|          | 603/2000001 [07:45<375:55:21,  1.48it/s]

buffer size = 1606, epsilon = 0.09970
mean_reward :  0.0


  0%|          | 604/2000001 [07:46<392:48:01,  1.41it/s]

buffer size = 1608, epsilon = 0.09970
mean_reward :  0.0


  0%|          | 605/2000001 [07:46<353:56:53,  1.57it/s]

buffer size = 1610, epsilon = 0.09970
mean_reward :  0.0


  0%|          | 606/2000001 [07:47<317:42:51,  1.75it/s]

buffer size = 1612, epsilon = 0.09970
mean_reward :  0.0


  0%|          | 607/2000001 [07:47<295:50:20,  1.88it/s]

buffer size = 1614, epsilon = 0.09970
mean_reward :  0.0


  0%|          | 608/2000001 [07:47<279:52:39,  1.98it/s]

buffer size = 1616, epsilon = 0.09970
mean_reward :  0.0


  0%|          | 609/2000001 [07:48<265:30:23,  2.09it/s]

buffer size = 1618, epsilon = 0.09970
mean_reward :  0.0


  0%|          | 610/2000001 [07:48<258:40:40,  2.15it/s]

buffer size = 1620, epsilon = 0.09970
mean_reward :  0.0


  0%|          | 611/2000001 [07:49<251:29:14,  2.21it/s]

buffer size = 1622, epsilon = 0.09970
mean_reward :  0.0


  0%|          | 612/2000001 [07:49<252:07:04,  2.20it/s]

buffer size = 1624, epsilon = 0.09969
mean_reward :  0.0


  0%|          | 613/2000001 [07:50<251:05:09,  2.21it/s]

buffer size = 1626, epsilon = 0.09969
mean_reward :  0.0


  0%|          | 614/2000001 [07:50<246:53:24,  2.25it/s]

buffer size = 1628, epsilon = 0.09969
mean_reward :  0.0


  0%|          | 615/2000001 [07:50<243:40:52,  2.28it/s]

buffer size = 1630, epsilon = 0.09969
mean_reward :  0.0


  0%|          | 616/2000001 [07:51<239:24:34,  2.32it/s]

buffer size = 1632, epsilon = 0.09969
mean_reward :  0.0


  0%|          | 617/2000001 [07:51<239:45:40,  2.32it/s]

buffer size = 1634, epsilon = 0.09969
mean_reward :  0.0


  0%|          | 618/2000001 [07:52<237:41:12,  2.34it/s]

buffer size = 1636, epsilon = 0.09969
mean_reward :  0.0


  0%|          | 619/2000001 [07:52<237:01:57,  2.34it/s]

buffer size = 1638, epsilon = 0.09969
mean_reward :  0.0


  0%|          | 620/2000001 [07:53<235:09:11,  2.36it/s]

buffer size = 1640, epsilon = 0.09969
mean_reward :  0.0


  0%|          | 621/2000001 [07:53<235:50:24,  2.35it/s]

buffer size = 1642, epsilon = 0.09969
mean_reward :  0.0


  0%|          | 622/2000001 [07:53<236:44:33,  2.35it/s]

buffer size = 1644, epsilon = 0.09969
mean_reward :  0.0


  0%|          | 623/2000001 [07:54<239:25:44,  2.32it/s]

buffer size = 1646, epsilon = 0.09969
mean_reward :  0.0


  0%|          | 624/2000001 [07:54<239:22:03,  2.32it/s]

buffer size = 1648, epsilon = 0.09969
mean_reward :  0.0


  0%|          | 625/2000001 [07:55<242:52:50,  2.29it/s]

buffer size = 1650, epsilon = 0.09969
mean_reward :  0.0


  0%|          | 626/2000001 [07:55<243:43:24,  2.28it/s]

buffer size = 1652, epsilon = 0.09969
mean_reward :  0.0


  0%|          | 627/2000001 [07:56<242:01:08,  2.29it/s]

buffer size = 1654, epsilon = 0.09969
mean_reward :  0.0


  0%|          | 628/2000001 [07:56<277:18:36,  2.00it/s]

buffer size = 1656, epsilon = 0.09969
mean_reward :  0.0


  0%|          | 629/2000001 [07:57<325:21:44,  1.71it/s]

buffer size = 1658, epsilon = 0.09969
mean_reward :  0.0


  0%|          | 630/2000001 [07:58<362:57:42,  1.53it/s]

buffer size = 1660, epsilon = 0.09969
mean_reward :  0.0


  0%|          | 631/2000001 [07:59<387:24:50,  1.43it/s]

buffer size = 1662, epsilon = 0.09969
mean_reward :  0.0


  0%|          | 632/2000001 [07:59<363:40:32,  1.53it/s]

buffer size = 1664, epsilon = 0.09968
mean_reward :  0.0


  0%|          | 633/2000001 [08:00<328:37:57,  1.69it/s]

buffer size = 1666, epsilon = 0.09968
mean_reward :  0.0


  0%|          | 634/2000001 [08:00<304:05:51,  1.83it/s]

buffer size = 1668, epsilon = 0.09968
mean_reward :  0.0


  0%|          | 635/2000001 [08:00<284:35:06,  1.95it/s]

buffer size = 1670, epsilon = 0.09968
mean_reward :  0.0


  0%|          | 636/2000001 [08:01<272:36:41,  2.04it/s]

buffer size = 1672, epsilon = 0.09968
mean_reward :  0.0


  0%|          | 637/2000001 [08:01<261:31:06,  2.12it/s]

buffer size = 1674, epsilon = 0.09968
mean_reward :  0.0


  0%|          | 638/2000001 [08:02<255:23:28,  2.17it/s]

buffer size = 1676, epsilon = 0.09968
mean_reward :  0.0


  0%|          | 639/2000001 [08:02<252:42:29,  2.20it/s]

buffer size = 1678, epsilon = 0.09968
mean_reward :  0.0


  0%|          | 640/2000001 [08:03<249:21:33,  2.23it/s]

buffer size = 1680, epsilon = 0.09968
mean_reward :  0.0


  0%|          | 641/2000001 [08:03<248:23:38,  2.24it/s]

buffer size = 1682, epsilon = 0.09968
mean_reward :  0.0


  0%|          | 642/2000001 [08:04<244:11:12,  2.27it/s]

buffer size = 1684, epsilon = 0.09968
mean_reward :  0.0


  0%|          | 643/2000001 [08:04<245:09:17,  2.27it/s]

buffer size = 1686, epsilon = 0.09968
mean_reward :  0.0


  0%|          | 644/2000001 [08:04<243:04:11,  2.28it/s]

buffer size = 1688, epsilon = 0.09968
mean_reward :  0.0


  0%|          | 645/2000001 [08:05<241:02:40,  2.30it/s]

buffer size = 1690, epsilon = 0.09968
mean_reward :  0.0


  0%|          | 646/2000001 [08:05<240:38:21,  2.31it/s]

buffer size = 1692, epsilon = 0.09968
mean_reward :  0.0


  0%|          | 647/2000001 [08:06<239:31:42,  2.32it/s]

buffer size = 1694, epsilon = 0.09968
mean_reward :  0.0


  0%|          | 648/2000001 [08:06<245:01:29,  2.27it/s]

buffer size = 1696, epsilon = 0.09968
mean_reward :  0.0


  0%|          | 649/2000001 [08:07<242:53:31,  2.29it/s]

buffer size = 1698, epsilon = 0.09968
mean_reward :  0.0


  0%|          | 650/2000001 [08:07<246:01:58,  2.26it/s]

buffer size = 1700, epsilon = 0.09968
mean_reward :  0.0


  0%|          | 651/2000001 [08:07<245:23:04,  2.26it/s]

buffer size = 1702, epsilon = 0.09967
mean_reward :  0.0


  0%|          | 652/2000001 [08:08<244:36:20,  2.27it/s]

buffer size = 1704, epsilon = 0.09967
mean_reward :  0.0


  0%|          | 653/2000001 [08:08<244:25:25,  2.27it/s]

buffer size = 1706, epsilon = 0.09967
mean_reward :  0.0


  0%|          | 654/2000001 [08:09<245:32:11,  2.26it/s]

buffer size = 1708, epsilon = 0.09967
mean_reward :  0.0


  0%|          | 655/2000001 [08:10<290:58:52,  1.91it/s]

buffer size = 1710, epsilon = 0.09967
mean_reward :  0.0


  0%|          | 656/2000001 [08:10<342:42:56,  1.62it/s]

buffer size = 1712, epsilon = 0.09967
mean_reward :  0.0


  0%|          | 657/2000001 [08:11<375:28:26,  1.48it/s]

buffer size = 1714, epsilon = 0.09967
mean_reward :  0.0


  0%|          | 658/2000001 [08:12<395:23:14,  1.40it/s]

buffer size = 1716, epsilon = 0.09967
mean_reward :  0.0


  0%|          | 659/2000001 [08:12<359:22:56,  1.55it/s]

buffer size = 1718, epsilon = 0.09967
mean_reward :  0.0


  0%|          | 660/2000001 [08:13<322:34:07,  1.72it/s]

buffer size = 1720, epsilon = 0.09967
mean_reward :  0.0


  0%|          | 661/2000001 [08:13<297:53:48,  1.86it/s]

buffer size = 1722, epsilon = 0.09967
mean_reward :  0.0


  0%|          | 662/2000001 [08:14<285:20:41,  1.95it/s]

buffer size = 1724, epsilon = 0.09967
mean_reward :  0.0


  0%|          | 663/2000001 [08:14<272:52:08,  2.04it/s]

buffer size = 1726, epsilon = 0.09967
mean_reward :  0.0


  0%|          | 664/2000001 [08:15<263:54:16,  2.10it/s]

buffer size = 1728, epsilon = 0.09967
mean_reward :  0.0


  0%|          | 665/2000001 [08:15<260:30:00,  2.13it/s]

buffer size = 1730, epsilon = 0.09967
mean_reward :  0.0


  0%|          | 666/2000001 [08:16<254:26:54,  2.18it/s]

buffer size = 1732, epsilon = 0.09967
mean_reward :  0.0


  0%|          | 667/2000001 [08:16<249:42:14,  2.22it/s]

buffer size = 1734, epsilon = 0.09967
mean_reward :  0.0


  0%|          | 668/2000001 [08:16<249:15:53,  2.23it/s]

buffer size = 1736, epsilon = 0.09967
mean_reward :  0.0


  0%|          | 669/2000001 [08:17<248:18:01,  2.24it/s]

buffer size = 1738, epsilon = 0.09967
mean_reward :  0.0


  0%|          | 670/2000001 [08:17<248:49:24,  2.23it/s]

buffer size = 1740, epsilon = 0.09967
mean_reward :  0.0


  0%|          | 671/2000001 [08:18<245:47:56,  2.26it/s]

buffer size = 1742, epsilon = 0.09967
mean_reward :  0.0


  0%|          | 672/2000001 [08:18<245:05:52,  2.27it/s]

buffer size = 1744, epsilon = 0.09966
mean_reward :  0.0


  0%|          | 673/2000001 [08:19<245:01:50,  2.27it/s]

buffer size = 1746, epsilon = 0.09966
mean_reward :  0.0


  0%|          | 674/2000001 [08:19<243:59:35,  2.28it/s]

buffer size = 1748, epsilon = 0.09966
mean_reward :  0.0


  0%|          | 675/2000001 [08:20<252:02:42,  2.20it/s]

buffer size = 1750, epsilon = 0.09966
mean_reward :  0.0


  0%|          | 676/2000001 [08:20<248:59:14,  2.23it/s]

buffer size = 1752, epsilon = 0.09966
mean_reward :  0.0


  0%|          | 677/2000001 [08:20<245:46:25,  2.26it/s]

buffer size = 1754, epsilon = 0.09966
mean_reward :  0.0


  0%|          | 678/2000001 [08:21<245:37:02,  2.26it/s]

buffer size = 1756, epsilon = 0.09966
mean_reward :  0.0


  0%|          | 679/2000001 [08:21<246:09:30,  2.26it/s]

buffer size = 1758, epsilon = 0.09966
mean_reward :  0.0


  0%|          | 680/2000001 [08:22<246:22:18,  2.25it/s]

buffer size = 1760, epsilon = 0.09966
mean_reward :  0.0


  0%|          | 681/2000001 [08:22<251:39:44,  2.21it/s]

buffer size = 1762, epsilon = 0.09966
mean_reward :  0.0


  0%|          | 682/2000001 [08:23<307:34:09,  1.81it/s]

buffer size = 1764, epsilon = 0.09966
mean_reward :  0.0


  0%|          | 683/2000001 [08:24<348:18:34,  1.59it/s]

buffer size = 1766, epsilon = 0.09966
mean_reward :  0.0


  0%|          | 684/2000001 [08:25<380:58:24,  1.46it/s]

buffer size = 1768, epsilon = 0.09966
mean_reward :  0.0


  0%|          | 685/2000001 [08:25<393:42:27,  1.41it/s]

buffer size = 1770, epsilon = 0.09966
mean_reward :  0.0


  0%|          | 686/2000001 [08:26<353:24:04,  1.57it/s]

buffer size = 1772, epsilon = 0.09966
mean_reward :  0.0


  0%|          | 687/2000001 [08:26<320:14:38,  1.73it/s]

buffer size = 1774, epsilon = 0.09966
mean_reward :  0.0


  0%|          | 688/2000001 [08:27<304:56:45,  1.82it/s]

buffer size = 1776, epsilon = 0.09966
mean_reward :  0.0


  0%|          | 689/2000001 [08:27<290:52:28,  1.91it/s]

buffer size = 1778, epsilon = 0.09966
mean_reward :  0.0


  0%|          | 690/2000001 [08:28<281:23:44,  1.97it/s]

buffer size = 1780, epsilon = 0.09966
mean_reward :  0.0


  0%|          | 691/2000001 [08:28<272:03:56,  2.04it/s]

buffer size = 1782, epsilon = 0.09966
mean_reward :  0.0


  0%|          | 692/2000001 [08:29<267:43:46,  2.07it/s]

buffer size = 1784, epsilon = 0.09965
mean_reward :  0.0


  0%|          | 693/2000001 [08:29<268:03:12,  2.07it/s]

buffer size = 1786, epsilon = 0.09965
mean_reward :  0.0


  0%|          | 694/2000001 [08:30<263:51:40,  2.10it/s]

buffer size = 1788, epsilon = 0.09965
mean_reward :  0.0


  0%|          | 695/2000001 [08:30<263:32:32,  2.11it/s]

buffer size = 1790, epsilon = 0.09965
mean_reward :  0.0


  0%|          | 696/2000001 [08:30<259:46:52,  2.14it/s]

buffer size = 1792, epsilon = 0.09965
mean_reward :  0.0


  0%|          | 697/2000001 [08:31<258:45:11,  2.15it/s]

buffer size = 1794, epsilon = 0.09965
mean_reward :  0.0


  0%|          | 698/2000001 [08:31<256:18:25,  2.17it/s]

buffer size = 1796, epsilon = 0.09965
mean_reward :  0.0


  0%|          | 699/2000001 [08:32<256:13:07,  2.17it/s]

buffer size = 1798, epsilon = 0.09965
mean_reward :  0.0


  0%|          | 700/2000001 [08:32<256:00:33,  2.17it/s]

buffer size = 1800, epsilon = 0.09965
mean_reward :  0.0


  0%|          | 701/2000001 [08:33<262:53:39,  2.11it/s]

buffer size = 1802, epsilon = 0.09965
mean_reward :  0.0


  0%|          | 702/2000001 [08:33<261:42:51,  2.12it/s]

buffer size = 1804, epsilon = 0.09965
mean_reward :  0.0


  0%|          | 703/2000001 [08:34<257:16:24,  2.16it/s]

buffer size = 1806, epsilon = 0.09965
mean_reward :  0.0


  0%|          | 704/2000001 [08:34<255:02:39,  2.18it/s]

buffer size = 1808, epsilon = 0.09965
mean_reward :  0.0


  0%|          | 705/2000001 [08:35<254:14:26,  2.18it/s]

buffer size = 1810, epsilon = 0.09965
mean_reward :  0.0


  0%|          | 706/2000001 [08:35<252:55:46,  2.20it/s]

buffer size = 1812, epsilon = 0.09965
mean_reward :  0.0


  0%|          | 707/2000001 [08:36<252:57:43,  2.20it/s]

buffer size = 1814, epsilon = 0.09965
mean_reward :  0.0


  0%|          | 708/2000001 [08:36<312:43:23,  1.78it/s]

buffer size = 1816, epsilon = 0.09965
mean_reward :  0.0


  0%|          | 709/2000001 [08:37<354:35:35,  1.57it/s]

buffer size = 1818, epsilon = 0.09965
mean_reward :  0.0


  0%|          | 710/2000001 [08:38<383:10:04,  1.45it/s]

buffer size = 1820, epsilon = 0.09965
mean_reward :  0.0


  0%|          | 711/2000001 [08:39<400:56:37,  1.39it/s]

buffer size = 1822, epsilon = 0.09965
mean_reward :  0.0


  0%|          | 712/2000001 [08:39<357:08:55,  1.55it/s]

buffer size = 1824, epsilon = 0.09964
mean_reward :  0.0


  0%|          | 713/2000001 [08:40<321:35:24,  1.73it/s]

buffer size = 1826, epsilon = 0.09964
mean_reward :  0.0


  0%|          | 714/2000001 [08:40<295:33:26,  1.88it/s]

buffer size = 1828, epsilon = 0.09964
mean_reward :  0.0


  0%|          | 715/2000001 [08:41<280:11:39,  1.98it/s]

buffer size = 1830, epsilon = 0.09964
mean_reward :  0.0


  0%|          | 716/2000001 [08:41<268:25:09,  2.07it/s]

buffer size = 1832, epsilon = 0.09964
mean_reward :  0.0


  0%|          | 717/2000001 [08:41<260:30:15,  2.13it/s]

buffer size = 1834, epsilon = 0.09964
mean_reward :  0.0


  0%|          | 718/2000001 [08:42<254:26:26,  2.18it/s]

buffer size = 1836, epsilon = 0.09964
mean_reward :  0.0


  0%|          | 719/2000001 [08:42<250:50:19,  2.21it/s]

buffer size = 1838, epsilon = 0.09964
mean_reward :  0.0


  0%|          | 720/2000001 [08:43<249:57:46,  2.22it/s]

buffer size = 1840, epsilon = 0.09964
mean_reward :  0.0


  0%|          | 721/2000001 [08:43<248:03:04,  2.24it/s]

buffer size = 1842, epsilon = 0.09964
mean_reward :  0.0


  0%|          | 722/2000001 [08:44<245:15:01,  2.26it/s]

buffer size = 1844, epsilon = 0.09964
mean_reward :  0.0


  0%|          | 723/2000001 [08:44<244:40:29,  2.27it/s]

buffer size = 1846, epsilon = 0.09964
mean_reward :  0.0


  0%|          | 724/2000001 [08:44<244:21:18,  2.27it/s]

buffer size = 1848, epsilon = 0.09964
mean_reward :  0.0


  0%|          | 725/2000001 [08:45<244:10:33,  2.27it/s]

buffer size = 1850, epsilon = 0.09964
mean_reward :  0.0


  0%|          | 726/2000001 [08:45<247:07:47,  2.25it/s]

buffer size = 1852, epsilon = 0.09964
mean_reward :  0.0


  0%|          | 727/2000001 [08:46<244:20:00,  2.27it/s]

buffer size = 1854, epsilon = 0.09964
mean_reward :  0.0


  0%|          | 728/2000001 [08:46<243:08:23,  2.28it/s]

buffer size = 1856, epsilon = 0.09964
mean_reward :  0.0


  0%|          | 729/2000001 [08:47<244:07:14,  2.27it/s]

buffer size = 1858, epsilon = 0.09964
mean_reward :  0.0


  0%|          | 730/2000001 [08:47<244:01:07,  2.28it/s]

buffer size = 1860, epsilon = 0.09964
mean_reward :  0.0


  0%|          | 731/2000001 [08:48<243:22:26,  2.28it/s]

buffer size = 1862, epsilon = 0.09964
mean_reward :  0.0


  0%|          | 732/2000001 [08:48<242:25:02,  2.29it/s]

buffer size = 1864, epsilon = 0.09963
mean_reward :  0.0


  0%|          | 733/2000001 [08:48<241:29:35,  2.30it/s]

buffer size = 1866, epsilon = 0.09963
mean_reward :  0.0


  0%|          | 734/2000001 [08:49<242:09:01,  2.29it/s]

buffer size = 1868, epsilon = 0.09963
mean_reward :  0.0


  0%|          | 735/2000001 [08:50<300:04:06,  1.85it/s]

buffer size = 1870, epsilon = 0.09963
mean_reward :  0.0


  0%|          | 736/2000001 [08:50<341:40:00,  1.63it/s]

buffer size = 1872, epsilon = 0.09963
mean_reward :  0.0


  0%|          | 737/2000001 [08:51<376:00:29,  1.48it/s]

buffer size = 1874, epsilon = 0.09963
mean_reward :  0.0


  0%|          | 738/2000001 [08:52<394:01:20,  1.41it/s]

buffer size = 1876, epsilon = 0.09963
mean_reward :  0.0


  0%|          | 739/2000001 [08:52<346:01:22,  1.60it/s]

buffer size = 1878, epsilon = 0.09963
mean_reward :  0.0


  0%|          | 740/2000001 [08:53<316:50:20,  1.75it/s]

buffer size = 1880, epsilon = 0.09963
mean_reward :  0.0


  0%|          | 741/2000001 [08:53<294:14:37,  1.89it/s]

buffer size = 1882, epsilon = 0.09963
mean_reward :  0.0


  0%|          | 742/2000001 [08:54<280:02:24,  1.98it/s]

buffer size = 1884, epsilon = 0.09963
mean_reward :  0.0


  0%|          | 743/2000001 [08:54<269:01:17,  2.06it/s]

buffer size = 1886, epsilon = 0.09963
mean_reward :  0.0


  0%|          | 744/2000001 [08:55<262:32:46,  2.12it/s]

buffer size = 1888, epsilon = 0.09963
mean_reward :  0.0


  0%|          | 745/2000001 [08:55<256:35:59,  2.16it/s]

buffer size = 1890, epsilon = 0.09963
mean_reward :  0.0


  0%|          | 746/2000001 [08:56<250:09:41,  2.22it/s]

buffer size = 1892, epsilon = 0.09963
mean_reward :  0.0


  0%|          | 747/2000001 [08:56<248:40:55,  2.23it/s]

buffer size = 1894, epsilon = 0.09963
mean_reward :  0.0


  0%|          | 748/2000001 [08:56<245:51:51,  2.26it/s]

buffer size = 1896, epsilon = 0.09963
mean_reward :  0.0


  0%|          | 749/2000001 [08:57<245:26:01,  2.26it/s]

buffer size = 1898, epsilon = 0.09963
mean_reward :  0.0


  0%|          | 750/2000001 [08:57<244:54:40,  2.27it/s]

buffer size = 1900, epsilon = 0.09963
mean_reward :  0.0


  0%|          | 751/2000001 [08:58<243:15:59,  2.28it/s]

buffer size = 1902, epsilon = 0.09963
mean_reward :  0.0


  0%|          | 752/2000001 [08:58<244:40:39,  2.27it/s]

buffer size = 1904, epsilon = 0.09962
mean_reward :  0.0


  0%|          | 753/2000001 [08:59<243:25:39,  2.28it/s]

buffer size = 1906, epsilon = 0.09962
mean_reward :  0.0


  0%|          | 754/2000001 [08:59<244:19:58,  2.27it/s]

buffer size = 1908, epsilon = 0.09962
mean_reward :  0.0


  0%|          | 755/2000001 [08:59<242:34:00,  2.29it/s]

buffer size = 1910, epsilon = 0.09962
mean_reward :  0.0


  0%|          | 756/2000001 [09:00<243:19:33,  2.28it/s]

buffer size = 1912, epsilon = 0.09962
mean_reward :  0.0


  0%|          | 757/2000001 [09:00<243:03:20,  2.28it/s]

buffer size = 1914, epsilon = 0.09962
mean_reward :  0.0


  0%|          | 758/2000001 [09:01<240:41:00,  2.31it/s]

buffer size = 1916, epsilon = 0.09962
mean_reward :  0.0


  0%|          | 759/2000001 [09:01<241:41:11,  2.30it/s]

buffer size = 1918, epsilon = 0.09962
mean_reward :  0.0


  0%|          | 760/2000001 [09:02<241:10:41,  2.30it/s]

buffer size = 1920, epsilon = 0.09962
mean_reward :  0.0


  0%|          | 761/2000001 [09:02<251:48:01,  2.21it/s]

buffer size = 1922, epsilon = 0.09962
mean_reward :  0.0


  0%|          | 762/2000001 [09:03<309:25:30,  1.79it/s]

buffer size = 1924, epsilon = 0.09962
mean_reward :  0.0


  0%|          | 763/2000001 [09:04<345:38:21,  1.61it/s]

buffer size = 1926, epsilon = 0.09962
mean_reward :  0.0


  0%|          | 764/2000001 [09:04<373:04:51,  1.49it/s]

buffer size = 1928, epsilon = 0.09962
mean_reward :  0.0


  0%|          | 765/2000001 [09:05<392:43:32,  1.41it/s]

buffer size = 1930, epsilon = 0.09962
mean_reward :  0.0


  0%|          | 766/2000001 [09:06<348:33:43,  1.59it/s]

buffer size = 1932, epsilon = 0.09962
mean_reward :  0.0


  0%|          | 767/2000001 [09:06<318:50:03,  1.74it/s]

buffer size = 1934, epsilon = 0.09962
mean_reward :  0.0


  0%|          | 768/2000001 [09:07<296:50:27,  1.87it/s]

buffer size = 1936, epsilon = 0.09962
mean_reward :  0.0


  0%|          | 769/2000001 [09:07<282:16:41,  1.97it/s]

buffer size = 1938, epsilon = 0.09962
mean_reward :  0.0


  0%|          | 770/2000001 [09:08<271:30:54,  2.05it/s]

buffer size = 1940, epsilon = 0.09962
mean_reward :  0.0


  0%|          | 771/2000001 [09:08<264:24:26,  2.10it/s]

buffer size = 1942, epsilon = 0.09962
mean_reward :  0.0


  0%|          | 772/2000001 [09:08<257:36:11,  2.16it/s]

buffer size = 1944, epsilon = 0.09961
mean_reward :  0.0


  0%|          | 773/2000001 [09:09<252:08:36,  2.20it/s]

buffer size = 1946, epsilon = 0.09961
mean_reward :  0.0


  0%|          | 774/2000001 [09:09<251:27:27,  2.21it/s]

buffer size = 1948, epsilon = 0.09961
mean_reward :  0.0


  0%|          | 775/2000001 [09:10<250:33:20,  2.22it/s]

buffer size = 1950, epsilon = 0.09961
mean_reward :  0.0


  0%|          | 776/2000001 [09:10<249:46:57,  2.22it/s]

buffer size = 1952, epsilon = 0.09961
mean_reward :  0.0


  0%|          | 777/2000001 [09:11<250:51:23,  2.21it/s]

buffer size = 1954, epsilon = 0.09961
mean_reward :  0.0


  0%|          | 778/2000001 [09:11<248:43:04,  2.23it/s]

buffer size = 1956, epsilon = 0.09961
mean_reward :  0.0


  0%|          | 779/2000001 [09:11<248:35:50,  2.23it/s]

buffer size = 1958, epsilon = 0.09961
mean_reward :  0.0


  0%|          | 780/2000001 [09:12<246:07:28,  2.26it/s]

buffer size = 1960, epsilon = 0.09961
mean_reward :  0.0


  0%|          | 781/2000001 [09:12<247:19:23,  2.25it/s]

buffer size = 1962, epsilon = 0.09961
mean_reward :  0.0


  0%|          | 782/2000001 [09:13<247:17:36,  2.25it/s]

buffer size = 1964, epsilon = 0.09961
mean_reward :  0.0


  0%|          | 783/2000001 [09:13<245:55:49,  2.26it/s]

buffer size = 1966, epsilon = 0.09961
mean_reward :  0.0


  0%|          | 784/2000001 [09:14<249:45:06,  2.22it/s]

buffer size = 1968, epsilon = 0.09961
mean_reward :  0.0


  0%|          | 785/2000001 [09:14<251:41:42,  2.21it/s]

buffer size = 1970, epsilon = 0.09961
mean_reward :  0.0


  0%|          | 786/2000001 [09:15<249:20:52,  2.23it/s]

buffer size = 1972, epsilon = 0.09961
mean_reward :  0.0


  0%|          | 787/2000001 [09:15<250:17:30,  2.22it/s]

buffer size = 1974, epsilon = 0.09961
mean_reward :  0.0


  0%|          | 788/2000001 [09:16<282:29:50,  1.97it/s]

buffer size = 1976, epsilon = 0.09961
mean_reward :  0.0


  0%|          | 789/2000001 [09:17<330:45:49,  1.68it/s]

buffer size = 1978, epsilon = 0.09961
mean_reward :  0.0


  0%|          | 790/2000001 [09:17<362:24:12,  1.53it/s]

buffer size = 1980, epsilon = 0.09961
mean_reward :  0.0


  0%|          | 791/2000001 [09:18<385:12:04,  1.44it/s]

buffer size = 1982, epsilon = 0.09960
mean_reward :  0.0


  0%|          | 792/2000001 [09:19<374:48:42,  1.48it/s]

buffer size = 1984, epsilon = 0.09960
mean_reward :  0.0


  0%|          | 793/2000001 [09:19<338:34:54,  1.64it/s]

buffer size = 1986, epsilon = 0.09960
mean_reward :  0.0


  0%|          | 794/2000001 [09:20<316:28:37,  1.75it/s]

buffer size = 1988, epsilon = 0.09960
mean_reward :  0.0


  0%|          | 795/2000001 [09:20<297:39:28,  1.87it/s]

buffer size = 1990, epsilon = 0.09960
mean_reward :  0.0


  0%|          | 796/2000001 [09:21<280:28:58,  1.98it/s]

buffer size = 1992, epsilon = 0.09960
mean_reward :  0.0


  0%|          | 797/2000001 [09:21<273:09:03,  2.03it/s]

buffer size = 1994, epsilon = 0.09960
mean_reward :  0.0


  0%|          | 798/2000001 [09:21<264:55:41,  2.10it/s]

buffer size = 1996, epsilon = 0.09960
mean_reward :  0.0


  0%|          | 799/2000001 [09:22<262:18:25,  2.12it/s]

buffer size = 1998, epsilon = 0.09960
mean_reward :  0.0


  0%|          | 800/2000001 [09:22<256:06:23,  2.17it/s]

buffer size = 2000, epsilon = 0.09960
mean_reward :  0.0


  0%|          | 801/2000001 [09:23<254:35:32,  2.18it/s]

buffer size = 2002, epsilon = 0.09960
mean_reward :  0.0


  0%|          | 802/2000001 [09:23<251:29:38,  2.21it/s]

buffer size = 2004, epsilon = 0.09960
mean_reward :  0.0


  0%|          | 803/2000001 [09:24<248:29:46,  2.23it/s]

buffer size = 2006, epsilon = 0.09960
mean_reward :  0.0


  0%|          | 804/2000001 [09:24<252:28:58,  2.20it/s]

buffer size = 2008, epsilon = 0.09960
mean_reward :  0.0


  0%|          | 805/2000001 [09:25<248:38:24,  2.23it/s]

buffer size = 2010, epsilon = 0.09960
mean_reward :  0.0


  0%|          | 806/2000001 [09:25<247:52:49,  2.24it/s]

buffer size = 2012, epsilon = 0.09960
mean_reward :  0.0


  0%|          | 807/2000001 [09:25<245:19:03,  2.26it/s]

buffer size = 2014, epsilon = 0.09960
mean_reward :  0.0


  0%|          | 808/2000001 [09:26<247:23:10,  2.24it/s]

buffer size = 2016, epsilon = 0.09960
mean_reward :  0.0


  0%|          | 809/2000001 [09:26<244:12:45,  2.27it/s]

buffer size = 2018, epsilon = 0.09960
mean_reward :  0.0


  0%|          | 810/2000001 [09:27<248:13:14,  2.24it/s]

buffer size = 2020, epsilon = 0.09960
mean_reward :  0.0


  0%|          | 811/2000001 [09:27<256:35:15,  2.16it/s]

buffer size = 2022, epsilon = 0.09960
mean_reward :  0.0


  0%|          | 812/2000001 [09:28<253:36:03,  2.19it/s]

buffer size = 2024, epsilon = 0.09959
mean_reward :  0.0


  0%|          | 813/2000001 [09:28<251:55:32,  2.20it/s]

buffer size = 2026, epsilon = 0.09959
mean_reward :  0.0


  0%|          | 814/2000001 [09:29<258:10:21,  2.15it/s]

buffer size = 2028, epsilon = 0.09959
mean_reward :  0.0


  0%|          | 815/2000001 [09:29<309:59:37,  1.79it/s]

buffer size = 2030, epsilon = 0.09959
mean_reward :  0.0


  0%|          | 816/2000001 [09:30<347:48:59,  1.60it/s]

buffer size = 2032, epsilon = 0.09959
mean_reward :  0.0


  0%|          | 817/2000001 [09:31<375:13:58,  1.48it/s]

buffer size = 2034, epsilon = 0.09959
mean_reward :  0.0


  0%|          | 818/2000001 [09:32<393:17:58,  1.41it/s]

buffer size = 2036, epsilon = 0.09959
mean_reward :  0.0


  0%|          | 819/2000001 [09:32<352:26:02,  1.58it/s]

buffer size = 2038, epsilon = 0.09959
mean_reward :  0.0


  0%|          | 820/2000001 [09:33<318:46:40,  1.74it/s]

buffer size = 2040, epsilon = 0.09959
mean_reward :  0.0


  0%|          | 821/2000001 [09:33<299:14:39,  1.86it/s]

buffer size = 2042, epsilon = 0.09959
mean_reward :  0.0


  0%|          | 822/2000001 [09:34<282:55:03,  1.96it/s]

buffer size = 2044, epsilon = 0.09959
mean_reward :  0.0


  0%|          | 823/2000001 [09:34<272:30:16,  2.04it/s]

buffer size = 2046, epsilon = 0.09959
mean_reward :  0.0


  0%|          | 824/2000001 [09:35<266:42:46,  2.08it/s]

buffer size = 2048, epsilon = 0.09959
mean_reward :  0.0


  0%|          | 825/2000001 [09:35<257:56:05,  2.15it/s]

buffer size = 2050, epsilon = 0.09959
mean_reward :  0.0


  0%|          | 826/2000001 [09:35<252:32:56,  2.20it/s]

buffer size = 2052, epsilon = 0.09959
mean_reward :  0.0


  0%|          | 827/2000001 [09:36<251:23:42,  2.21it/s]

buffer size = 2054, epsilon = 0.09959
mean_reward :  0.0


  0%|          | 828/2000001 [09:36<252:20:38,  2.20it/s]

buffer size = 2056, epsilon = 0.09959
mean_reward :  0.0


  0%|          | 829/2000001 [09:37<253:26:30,  2.19it/s]

buffer size = 2058, epsilon = 0.09959
mean_reward :  0.0


  0%|          | 830/2000001 [09:37<249:04:08,  2.23it/s]

buffer size = 2060, epsilon = 0.09959
mean_reward :  0.0


  0%|          | 831/2000001 [09:38<250:13:04,  2.22it/s]

buffer size = 2062, epsilon = 0.09959
mean_reward :  0.0


  0%|          | 832/2000001 [09:38<248:11:06,  2.24it/s]

buffer size = 2064, epsilon = 0.09958
mean_reward :  0.0


  0%|          | 833/2000001 [09:39<246:36:40,  2.25it/s]

buffer size = 2066, epsilon = 0.09958
mean_reward :  0.0


  0%|          | 834/2000001 [09:39<248:40:40,  2.23it/s]

buffer size = 2068, epsilon = 0.09958
mean_reward :  0.0


  0%|          | 835/2000001 [09:39<249:28:04,  2.23it/s]

buffer size = 2070, epsilon = 0.09958
mean_reward :  0.0


  0%|          | 836/2000001 [09:40<247:41:53,  2.24it/s]

buffer size = 2072, epsilon = 0.09958
mean_reward :  0.0


  0%|          | 837/2000001 [09:40<247:02:43,  2.25it/s]

buffer size = 2074, epsilon = 0.09958
mean_reward :  0.0


  0%|          | 838/2000001 [09:41<245:45:33,  2.26it/s]

buffer size = 2076, epsilon = 0.09958
mean_reward :  0.0


  0%|          | 839/2000001 [09:41<244:33:42,  2.27it/s]

buffer size = 2078, epsilon = 0.09958
mean_reward :  0.0


  0%|          | 840/2000001 [09:42<246:16:03,  2.25it/s]

buffer size = 2080, epsilon = 0.09958
mean_reward :  0.0


  0%|          | 841/2000001 [09:42<276:27:58,  2.01it/s]

buffer size = 2082, epsilon = 0.09958
mean_reward :  0.0


  0%|          | 842/2000001 [09:43<330:54:54,  1.68it/s]

buffer size = 2084, epsilon = 0.09958
mean_reward :  0.0


  0%|          | 843/2000001 [09:44<374:19:31,  1.48it/s]

buffer size = 2086, epsilon = 0.09958
mean_reward :  0.0


  0%|          | 844/2000001 [09:45<393:54:34,  1.41it/s]

buffer size = 2088, epsilon = 0.09958
mean_reward :  0.0


  0%|          | 845/2000001 [09:45<371:46:15,  1.49it/s]

buffer size = 2090, epsilon = 0.09958
mean_reward :  0.0


  0%|          | 846/2000001 [09:46<335:54:33,  1.65it/s]

buffer size = 2092, epsilon = 0.09958
mean_reward :  0.0


  0%|          | 847/2000001 [09:46<307:52:08,  1.80it/s]

buffer size = 2094, epsilon = 0.09958
mean_reward :  0.0


  0%|          | 848/2000001 [09:47<287:24:48,  1.93it/s]

buffer size = 2096, epsilon = 0.09958
mean_reward :  0.0


  0%|          | 849/2000001 [09:47<277:46:17,  2.00it/s]

buffer size = 2098, epsilon = 0.09958
mean_reward :  0.0


  0%|          | 850/2000001 [09:48<266:11:11,  2.09it/s]

buffer size = 2100, epsilon = 0.09958
mean_reward :  0.0


  0%|          | 851/2000001 [09:48<260:38:39,  2.13it/s]

buffer size = 2102, epsilon = 0.09958
mean_reward :  0.0


  0%|          | 852/2000001 [09:48<255:24:00,  2.17it/s]

buffer size = 2104, epsilon = 0.09957
mean_reward :  0.0


  0%|          | 853/2000001 [09:49<253:30:49,  2.19it/s]

buffer size = 2106, epsilon = 0.09957
mean_reward :  0.0


  0%|          | 854/2000001 [09:49<251:58:03,  2.20it/s]

buffer size = 2108, epsilon = 0.09957
mean_reward :  0.0


  0%|          | 855/2000001 [09:50<250:20:12,  2.22it/s]

buffer size = 2110, epsilon = 0.09957
mean_reward :  0.0


  0%|          | 856/2000001 [09:50<257:11:08,  2.16it/s]

buffer size = 2112, epsilon = 0.09957
mean_reward :  0.0


  0%|          | 857/2000001 [09:51<251:50:12,  2.21it/s]

buffer size = 2114, epsilon = 0.09957
mean_reward :  0.0


  0%|          | 858/2000001 [09:51<250:24:02,  2.22it/s]

buffer size = 2116, epsilon = 0.09957
mean_reward :  0.0


  0%|          | 859/2000001 [09:52<248:49:21,  2.23it/s]

buffer size = 2118, epsilon = 0.09957
mean_reward :  0.0


  0%|          | 860/2000001 [09:52<249:38:50,  2.22it/s]

buffer size = 2120, epsilon = 0.09957
mean_reward :  0.0


  0%|          | 861/2000001 [09:52<246:01:01,  2.26it/s]

buffer size = 2122, epsilon = 0.09957
mean_reward :  0.0


  0%|          | 862/2000001 [09:53<243:07:30,  2.28it/s]

buffer size = 2124, epsilon = 0.09957
mean_reward :  0.0


  0%|          | 863/2000001 [09:53<241:53:33,  2.30it/s]

buffer size = 2126, epsilon = 0.09957
mean_reward :  0.0


  0%|          | 864/2000001 [09:54<241:41:19,  2.30it/s]

buffer size = 2128, epsilon = 0.09957
mean_reward :  0.0


  0%|          | 865/2000001 [09:54<243:35:56,  2.28it/s]

buffer size = 2130, epsilon = 0.09957
mean_reward :  0.0


  0%|          | 866/2000001 [09:55<241:55:02,  2.30it/s]

buffer size = 2132, epsilon = 0.09957
mean_reward :  0.0


  0%|          | 867/2000001 [09:55<243:37:18,  2.28it/s]

buffer size = 2134, epsilon = 0.09957
mean_reward :  0.0


  0%|          | 868/2000001 [09:56<299:42:55,  1.85it/s]

buffer size = 2136, epsilon = 0.09957
mean_reward :  0.0


  0%|          | 869/2000001 [09:57<341:01:56,  1.63it/s]

buffer size = 2138, epsilon = 0.09957
mean_reward :  0.0


  0%|          | 870/2000001 [09:57<349:52:06,  1.59it/s]

buffer size = 2140, epsilon = 0.09957
mean_reward :  0.0


  0%|          | 871/2000001 [09:58<318:37:58,  1.74it/s]

buffer size = 2142, epsilon = 0.09957
mean_reward :  0.0


  0%|          | 872/2000001 [09:58<298:57:05,  1.86it/s]

buffer size = 2144, epsilon = 0.09956
mean_reward :  0.0


  0%|          | 873/2000001 [09:59<281:50:25,  1.97it/s]

buffer size = 2146, epsilon = 0.09956
mean_reward :  0.0


  0%|          | 874/2000001 [09:59<269:41:54,  2.06it/s]

buffer size = 2148, epsilon = 0.09956
mean_reward :  0.0


  0%|          | 875/2000001 [09:59<263:01:20,  2.11it/s]

buffer size = 2150, epsilon = 0.09956
mean_reward :  0.0


  0%|          | 876/2000001 [10:00<256:00:27,  2.17it/s]

buffer size = 2152, epsilon = 0.09956
mean_reward :  0.0


  0%|          | 877/2000001 [10:00<254:26:13,  2.18it/s]

buffer size = 2154, epsilon = 0.09956
mean_reward :  0.0


  0%|          | 878/2000001 [10:01<248:24:00,  2.24it/s]

buffer size = 2156, epsilon = 0.09956
mean_reward :  0.0


  0%|          | 879/2000001 [10:01<246:45:46,  2.25it/s]

buffer size = 2158, epsilon = 0.09956
mean_reward :  0.0


  0%|          | 880/2000001 [10:02<244:27:31,  2.27it/s]

buffer size = 2160, epsilon = 0.09956
mean_reward :  0.0


  0%|          | 881/2000001 [10:02<244:46:42,  2.27it/s]

buffer size = 2162, epsilon = 0.09956
mean_reward :  0.0


  0%|          | 882/2000001 [10:03<244:46:22,  2.27it/s]

buffer size = 2164, epsilon = 0.09956
mean_reward :  0.0


  0%|          | 883/2000001 [10:03<243:18:15,  2.28it/s]

buffer size = 2166, epsilon = 0.09956
mean_reward :  0.0


  0%|          | 884/2000001 [10:03<243:55:52,  2.28it/s]

buffer size = 2168, epsilon = 0.09956
mean_reward :  0.0


  0%|          | 885/2000001 [10:04<242:57:13,  2.29it/s]

buffer size = 2170, epsilon = 0.09956
mean_reward :  0.0


  0%|          | 886/2000001 [10:04<247:58:58,  2.24it/s]

buffer size = 2172, epsilon = 0.09956
mean_reward :  0.0


  0%|          | 887/2000001 [10:05<244:40:36,  2.27it/s]

buffer size = 2174, epsilon = 0.09956
mean_reward :  0.0


  0%|          | 888/2000001 [10:05<242:27:45,  2.29it/s]

buffer size = 2176, epsilon = 0.09956
mean_reward :  0.0


  0%|          | 889/2000001 [10:06<243:31:09,  2.28it/s]

buffer size = 2178, epsilon = 0.09956
mean_reward :  0.0


  0%|          | 890/2000001 [10:06<243:17:17,  2.28it/s]

buffer size = 2180, epsilon = 0.09956
mean_reward :  0.0


  0%|          | 891/2000001 [10:06<244:24:07,  2.27it/s]

buffer size = 2182, epsilon = 0.09956
mean_reward :  0.0


  0%|          | 892/2000001 [10:07<242:10:08,  2.29it/s]

buffer size = 2184, epsilon = 0.09955
mean_reward :  0.0


  0%|          | 893/2000001 [10:08<273:20:43,  2.03it/s]

buffer size = 2186, epsilon = 0.09955
mean_reward :  0.0


  0%|          | 894/2000001 [10:08<321:17:08,  1.73it/s]

buffer size = 2188, epsilon = 0.09955
mean_reward :  0.0


  0%|          | 895/2000001 [10:09<357:13:29,  1.55it/s]

buffer size = 2190, epsilon = 0.09955
mean_reward :  0.0


  0%|          | 896/2000001 [10:10<329:57:35,  1.68it/s]

buffer size = 2192, epsilon = 0.09955
mean_reward :  0.0


  0%|          | 897/2000001 [10:10<301:04:55,  1.84it/s]

buffer size = 2194, epsilon = 0.09955
mean_reward :  0.0


  0%|          | 898/2000001 [10:10<284:57:10,  1.95it/s]

buffer size = 2196, epsilon = 0.09955
mean_reward :  0.0


  0%|          | 899/2000001 [10:11<271:24:47,  2.05it/s]

buffer size = 2198, epsilon = 0.09955
mean_reward :  0.0


  0%|          | 900/2000001 [10:11<261:20:10,  2.12it/s]

buffer size = 2200, epsilon = 0.09955
mean_reward :  0.0


  0%|          | 901/2000001 [10:12<259:06:04,  2.14it/s]

buffer size = 2202, epsilon = 0.09955
mean_reward :  0.0


  0%|          | 902/2000001 [10:12<252:46:58,  2.20it/s]

buffer size = 2204, epsilon = 0.09955
mean_reward :  0.0


  0%|          | 903/2000001 [10:13<253:08:01,  2.19it/s]

buffer size = 2206, epsilon = 0.09955
mean_reward :  0.0


  0%|          | 904/2000001 [10:13<250:35:20,  2.22it/s]

buffer size = 2208, epsilon = 0.09955
mean_reward :  0.0


  0%|          | 905/2000001 [10:14<246:39:48,  2.25it/s]

buffer size = 2210, epsilon = 0.09955
mean_reward :  0.0


  0%|          | 906/2000001 [10:14<248:01:01,  2.24it/s]

buffer size = 2212, epsilon = 0.09955
mean_reward :  0.0


  0%|          | 907/2000001 [10:14<250:11:21,  2.22it/s]

buffer size = 2214, epsilon = 0.09955
mean_reward :  0.0


  0%|          | 908/2000001 [10:15<248:56:34,  2.23it/s]

buffer size = 2216, epsilon = 0.09955
mean_reward :  0.0


  0%|          | 909/2000001 [10:15<245:33:36,  2.26it/s]

buffer size = 2218, epsilon = 0.09955
mean_reward :  0.0


  0%|          | 910/2000001 [10:16<246:55:06,  2.25it/s]

buffer size = 2220, epsilon = 0.09955
mean_reward :  0.0


  0%|          | 911/2000001 [10:16<244:57:32,  2.27it/s]

buffer size = 2222, epsilon = 0.09955
mean_reward :  0.0


  0%|          | 912/2000001 [10:17<243:18:30,  2.28it/s]

buffer size = 2224, epsilon = 0.09954
mean_reward :  0.0


  0%|          | 913/2000001 [10:17<244:56:48,  2.27it/s]

buffer size = 2226, epsilon = 0.09954
mean_reward :  0.0


  0%|          | 914/2000001 [10:18<243:04:17,  2.28it/s]

buffer size = 2228, epsilon = 0.09954
mean_reward :  0.0


  0%|          | 915/2000001 [10:18<243:28:23,  2.28it/s]

buffer size = 2230, epsilon = 0.09954
mean_reward :  0.0


  0%|          | 916/2000001 [10:18<242:35:35,  2.29it/s]

buffer size = 2232, epsilon = 0.09954
mean_reward :  0.0


  0%|          | 917/2000001 [10:19<242:15:40,  2.29it/s]

buffer size = 2234, epsilon = 0.09954
mean_reward :  0.0


  0%|          | 918/2000001 [10:19<245:58:05,  2.26it/s]

buffer size = 2236, epsilon = 0.09954
mean_reward :  0.0


  0%|          | 919/2000001 [10:20<309:14:14,  1.80it/s]

buffer size = 2238, epsilon = 0.09954
mean_reward :  0.0


  0%|          | 920/2000001 [10:21<346:05:18,  1.60it/s]

buffer size = 2240, epsilon = 0.09954
mean_reward :  0.0


  0%|          | 921/2000001 [10:22<351:06:36,  1.58it/s]

buffer size = 2242, epsilon = 0.09954
mean_reward :  0.0


  0%|          | 922/2000001 [10:22<320:26:46,  1.73it/s]

buffer size = 2244, epsilon = 0.09954
mean_reward :  0.0


  0%|          | 923/2000001 [10:22<296:17:43,  1.87it/s]

buffer size = 2246, epsilon = 0.09954
mean_reward :  0.0


  0%|          | 924/2000001 [10:23<278:07:03,  2.00it/s]

buffer size = 2248, epsilon = 0.09954
mean_reward :  0.0


  0%|          | 925/2000001 [10:23<270:46:50,  2.05it/s]

buffer size = 2250, epsilon = 0.09954
mean_reward :  0.0


  0%|          | 926/2000001 [10:24<261:45:55,  2.12it/s]

buffer size = 2252, epsilon = 0.09954
mean_reward :  0.0


  0%|          | 927/2000001 [10:24<259:11:03,  2.14it/s]

buffer size = 2254, epsilon = 0.09954
mean_reward :  0.0


  0%|          | 928/2000001 [10:25<253:49:22,  2.19it/s]

buffer size = 2256, epsilon = 0.09954
mean_reward :  0.0


  0%|          | 929/2000001 [10:25<251:50:18,  2.20it/s]

buffer size = 2258, epsilon = 0.09954
mean_reward :  0.0


  0%|          | 930/2000001 [10:25<248:37:05,  2.23it/s]

buffer size = 2260, epsilon = 0.09954
mean_reward :  0.0


  0%|          | 931/2000001 [10:26<246:45:29,  2.25it/s]

buffer size = 2262, epsilon = 0.09954
mean_reward :  0.0


  0%|          | 932/2000001 [10:26<245:23:08,  2.26it/s]

buffer size = 2264, epsilon = 0.09953
mean_reward :  0.0


  0%|          | 933/2000001 [10:27<244:02:07,  2.28it/s]

buffer size = 2266, epsilon = 0.09953
mean_reward :  0.0


  0%|          | 934/2000001 [10:27<244:12:42,  2.27it/s]

buffer size = 2268, epsilon = 0.09953
mean_reward :  0.0


  0%|          | 935/2000001 [10:28<242:53:41,  2.29it/s]

buffer size = 2270, epsilon = 0.09953
mean_reward :  0.0


  0%|          | 936/2000001 [10:28<245:41:09,  2.26it/s]

buffer size = 2272, epsilon = 0.09953
mean_reward :  0.0


  0%|          | 937/2000001 [10:29<245:10:19,  2.26it/s]

buffer size = 2274, epsilon = 0.09953
mean_reward :  0.0


  0%|          | 938/2000001 [10:29<245:35:24,  2.26it/s]

buffer size = 2276, epsilon = 0.09953
mean_reward :  0.0


  0%|          | 939/2000001 [10:29<245:52:27,  2.26it/s]

buffer size = 2278, epsilon = 0.09953
mean_reward :  0.0


  0%|          | 940/2000001 [10:30<244:36:35,  2.27it/s]

buffer size = 2280, epsilon = 0.09953
mean_reward :  0.0


  0%|          | 941/2000001 [10:30<245:37:13,  2.26it/s]

buffer size = 2282, epsilon = 0.09953
mean_reward :  0.0


  0%|          | 942/2000001 [10:31<244:14:31,  2.27it/s]

buffer size = 2284, epsilon = 0.09953
mean_reward :  0.0


  0%|          | 943/2000001 [10:31<245:14:46,  2.26it/s]

buffer size = 2286, epsilon = 0.09953
mean_reward :  0.0


  0%|          | 944/2000001 [10:32<276:46:41,  2.01it/s]

buffer size = 2288, epsilon = 0.09953
mean_reward :  0.0


  0%|          | 945/2000001 [10:33<326:54:09,  1.70it/s]

buffer size = 2290, epsilon = 0.09953
mean_reward :  0.0


  0%|          | 946/2000001 [10:33<361:58:24,  1.53it/s]

buffer size = 2292, epsilon = 0.09953
mean_reward :  0.0


  0%|          | 947/2000001 [10:34<328:08:23,  1.69it/s]

buffer size = 2294, epsilon = 0.09953
mean_reward :  0.0


  0%|          | 948/2000001 [10:34<305:40:06,  1.82it/s]

buffer size = 2296, epsilon = 0.09953
mean_reward :  0.0


  0%|          | 949/2000001 [10:35<291:30:09,  1.90it/s]

buffer size = 2298, epsilon = 0.09953
mean_reward :  0.0


  0%|          | 950/2000001 [10:35<277:59:02,  2.00it/s]

buffer size = 2300, epsilon = 0.09953
mean_reward :  0.0


  0%|          | 951/2000001 [10:36<270:28:45,  2.05it/s]

buffer size = 2302, epsilon = 0.09953
mean_reward :  0.0


  0%|          | 952/2000001 [10:36<263:00:51,  2.11it/s]

buffer size = 2304, epsilon = 0.09952
mean_reward :  0.0


  0%|          | 953/2000001 [10:37<259:42:53,  2.14it/s]

buffer size = 2306, epsilon = 0.09952
mean_reward :  0.0


  0%|          | 954/2000001 [10:37<258:00:09,  2.15it/s]

buffer size = 2308, epsilon = 0.09952
mean_reward :  0.0


  0%|          | 955/2000001 [10:38<256:12:48,  2.17it/s]

buffer size = 2310, epsilon = 0.09952
mean_reward :  0.0


  0%|          | 956/2000001 [10:38<253:59:57,  2.19it/s]

buffer size = 2312, epsilon = 0.09952
mean_reward :  0.0


  0%|          | 957/2000001 [10:38<250:46:34,  2.21it/s]

buffer size = 2314, epsilon = 0.09952
mean_reward :  0.0


  0%|          | 958/2000001 [10:39<250:42:54,  2.21it/s]

buffer size = 2316, epsilon = 0.09952
mean_reward :  0.0


  0%|          | 959/2000001 [10:39<248:30:25,  2.23it/s]

buffer size = 2318, epsilon = 0.09952
mean_reward :  0.0


  0%|          | 960/2000001 [10:40<248:17:28,  2.24it/s]

buffer size = 2320, epsilon = 0.09952
mean_reward :  0.0


  0%|          | 961/2000001 [10:40<248:01:02,  2.24it/s]

buffer size = 2322, epsilon = 0.09952
mean_reward :  0.0


  0%|          | 962/2000001 [10:41<248:11:26,  2.24it/s]

buffer size = 2324, epsilon = 0.09952
mean_reward :  0.0


  0%|          | 963/2000001 [10:41<248:29:17,  2.23it/s]

buffer size = 2326, epsilon = 0.09952
mean_reward :  0.0


  0%|          | 964/2000001 [10:42<245:08:14,  2.27it/s]

buffer size = 2328, epsilon = 0.09952
mean_reward :  0.0


  0%|          | 965/2000001 [10:42<245:33:42,  2.26it/s]

buffer size = 2330, epsilon = 0.09952
mean_reward :  0.0


  0%|          | 966/2000001 [10:42<246:47:57,  2.25it/s]

buffer size = 2332, epsilon = 0.09952
mean_reward :  0.0


  0%|          | 967/2000001 [10:43<245:36:19,  2.26it/s]

buffer size = 2334, epsilon = 0.09952
mean_reward :  0.0


  0%|          | 968/2000001 [10:43<245:46:18,  2.26it/s]

buffer size = 2336, epsilon = 0.09952
mean_reward :  0.0


  0%|          | 969/2000001 [10:44<277:42:00,  2.00it/s]

buffer size = 2338, epsilon = 0.09952
mean_reward :  0.0


  0%|          | 970/2000001 [10:45<326:11:24,  1.70it/s]

buffer size = 2340, epsilon = 0.09952
mean_reward :  0.0


  0%|          | 971/2000001 [10:45<359:58:38,  1.54it/s]

buffer size = 2342, epsilon = 0.09952
mean_reward :  0.0


  0%|          | 972/2000001 [10:46<337:02:44,  1.65it/s]

buffer size = 2344, epsilon = 0.09951
mean_reward :  0.0


  0%|          | 973/2000001 [10:46<308:22:02,  1.80it/s]

buffer size = 2346, epsilon = 0.09951
mean_reward :  0.0


  0%|          | 974/2000001 [10:47<289:08:34,  1.92it/s]

buffer size = 2348, epsilon = 0.09951
mean_reward :  0.0


  0%|          | 975/2000001 [10:47<275:51:25,  2.01it/s]

buffer size = 2350, epsilon = 0.09951
mean_reward :  0.0


  0%|          | 976/2000001 [10:48<265:19:58,  2.09it/s]

buffer size = 2352, epsilon = 0.09951
mean_reward :  0.0


  0%|          | 977/2000001 [10:48<259:42:01,  2.14it/s]

buffer size = 2354, epsilon = 0.09951
mean_reward :  0.0


  0%|          | 978/2000001 [10:49<256:41:48,  2.16it/s]

buffer size = 2356, epsilon = 0.09951
mean_reward :  0.0


  0%|          | 979/2000001 [10:49<255:50:13,  2.17it/s]

buffer size = 2358, epsilon = 0.09951
mean_reward :  0.0


  0%|          | 980/2000001 [10:50<253:06:23,  2.19it/s]

buffer size = 2360, epsilon = 0.09951
mean_reward :  0.0


  0%|          | 981/2000001 [10:50<250:24:18,  2.22it/s]

buffer size = 2362, epsilon = 0.09951
mean_reward :  0.0


  0%|          | 982/2000001 [10:50<246:51:00,  2.25it/s]

buffer size = 2364, epsilon = 0.09951
mean_reward :  0.0


  0%|          | 983/2000001 [10:51<247:36:22,  2.24it/s]

buffer size = 2366, epsilon = 0.09951
mean_reward :  0.0


  0%|          | 984/2000001 [10:51<247:56:57,  2.24it/s]

buffer size = 2368, epsilon = 0.09951
mean_reward :  0.0


  0%|          | 985/2000001 [10:52<247:46:55,  2.24it/s]

buffer size = 2370, epsilon = 0.09951
mean_reward :  0.0


  0%|          | 986/2000001 [10:52<246:31:09,  2.25it/s]

buffer size = 2372, epsilon = 0.09951
mean_reward :  0.0


  0%|          | 987/2000001 [10:53<246:53:31,  2.25it/s]

buffer size = 2374, epsilon = 0.09951
mean_reward :  0.0


  0%|          | 988/2000001 [10:53<247:41:10,  2.24it/s]

buffer size = 2376, epsilon = 0.09951
mean_reward :  0.0


  0%|          | 989/2000001 [10:54<244:58:06,  2.27it/s]

buffer size = 2378, epsilon = 0.09951
mean_reward :  0.0


  0%|          | 990/2000001 [10:54<245:35:58,  2.26it/s]

buffer size = 2380, epsilon = 0.09951
mean_reward :  0.0


  0%|          | 991/2000001 [10:54<248:11:48,  2.24it/s]

buffer size = 2382, epsilon = 0.09951
mean_reward :  0.0


  0%|          | 992/2000001 [10:55<247:27:52,  2.24it/s]

buffer size = 2384, epsilon = 0.09950
mean_reward :  0.0


  0%|          | 993/2000001 [10:55<248:02:09,  2.24it/s]

buffer size = 2386, epsilon = 0.09950
mean_reward :  0.0


  0%|          | 994/2000001 [10:56<256:34:52,  2.16it/s]

buffer size = 2388, epsilon = 0.09950
mean_reward :  0.0


  0%|          | 995/2000001 [10:57<312:48:12,  1.78it/s]

buffer size = 2390, epsilon = 0.09950
mean_reward :  0.0


  0%|          | 996/2000001 [10:57<352:41:16,  1.57it/s]

buffer size = 2392, epsilon = 0.09950
mean_reward :  0.0


  0%|          | 997/2000001 [10:58<343:05:40,  1.62it/s]

buffer size = 2394, epsilon = 0.09950
mean_reward :  0.0


  0%|          | 998/2000001 [10:58<314:37:44,  1.76it/s]

buffer size = 2396, epsilon = 0.09950
mean_reward :  0.0


  0%|          | 999/2000001 [10:59<295:19:49,  1.88it/s]

buffer size = 2398, epsilon = 0.09950
mean_reward :  0.0


  0%|          | 1000/2000001 [10:59<280:26:13,  1.98it/s]

buffer size = 2400, epsilon = 0.09950
mean_reward :  0.0


  0%|          | 1001/2000001 [11:00<269:38:41,  2.06it/s]

buffer size = 2402, epsilon = 0.09950
mean_reward :  0.0


  0%|          | 1002/2000001 [11:00<262:50:01,  2.11it/s]

buffer size = 2404, epsilon = 0.09950
mean_reward :  0.0


  0%|          | 1003/2000001 [11:01<256:40:47,  2.16it/s]

buffer size = 2406, epsilon = 0.09950
mean_reward :  0.0


  0%|          | 1004/2000001 [11:01<252:42:24,  2.20it/s]

buffer size = 2408, epsilon = 0.09950
mean_reward :  0.0


  0%|          | 1005/2000001 [11:02<250:05:28,  2.22it/s]

buffer size = 2410, epsilon = 0.09950
mean_reward :  0.0


  0%|          | 1006/2000001 [11:02<248:00:43,  2.24it/s]

buffer size = 2412, epsilon = 0.09950
mean_reward :  0.0


  0%|          | 1007/2000001 [11:02<249:10:59,  2.23it/s]

buffer size = 2414, epsilon = 0.09950
mean_reward :  0.0


  0%|          | 1008/2000001 [11:03<246:17:57,  2.25it/s]

buffer size = 2416, epsilon = 0.09950
mean_reward :  0.0


  0%|          | 1009/2000001 [11:03<247:01:43,  2.25it/s]

buffer size = 2418, epsilon = 0.09950
mean_reward :  0.0


  0%|          | 1010/2000001 [11:04<246:23:45,  2.25it/s]

buffer size = 2420, epsilon = 0.09950
mean_reward :  0.0


  0%|          | 1011/2000001 [11:04<244:49:33,  2.27it/s]

buffer size = 2422, epsilon = 0.09950
mean_reward :  0.0


  0%|          | 1012/2000001 [11:05<248:55:29,  2.23it/s]

buffer size = 2424, epsilon = 0.09949
mean_reward :  0.0


  0%|          | 1013/2000001 [11:05<245:42:20,  2.26it/s]

buffer size = 2426, epsilon = 0.09949
mean_reward :  0.0


  0%|          | 1014/2000001 [11:06<245:34:04,  2.26it/s]

buffer size = 2428, epsilon = 0.09949
mean_reward :  0.0


  0%|          | 1015/2000001 [11:06<246:19:41,  2.25it/s]

buffer size = 2430, epsilon = 0.09949
mean_reward :  0.0


  0%|          | 1016/2000001 [11:06<251:30:30,  2.21it/s]

buffer size = 2432, epsilon = 0.09949
mean_reward :  0.0


  0%|          | 1017/2000001 [11:07<249:37:10,  2.22it/s]

buffer size = 2434, epsilon = 0.09949
mean_reward :  0.0


  0%|          | 1018/2000001 [11:07<249:28:00,  2.23it/s]

buffer size = 2436, epsilon = 0.09949
mean_reward :  0.0


  0%|          | 1019/2000001 [11:08<251:06:23,  2.21it/s]

buffer size = 2438, epsilon = 0.09949
mean_reward :  0.0


  0%|          | 1020/2000001 [11:09<302:56:58,  1.83it/s]

buffer size = 2440, epsilon = 0.09949
mean_reward :  0.0


  0%|          | 1021/2000001 [11:09<346:56:40,  1.60it/s]

buffer size = 2442, epsilon = 0.09949
mean_reward :  0.0


  0%|          | 1022/2000001 [11:10<356:10:29,  1.56it/s]

buffer size = 2444, epsilon = 0.09949
mean_reward :  0.0


  0%|          | 1023/2000001 [11:10<321:05:12,  1.73it/s]

buffer size = 2446, epsilon = 0.09949
mean_reward :  0.0


  0%|          | 1024/2000001 [11:11<300:33:37,  1.85it/s]

buffer size = 2448, epsilon = 0.09949
mean_reward :  0.0


  0%|          | 1025/2000001 [11:11<282:27:20,  1.97it/s]

buffer size = 2450, epsilon = 0.09949
mean_reward :  0.0


  0%|          | 1026/2000001 [11:12<272:25:29,  2.04it/s]

buffer size = 2452, epsilon = 0.09949
mean_reward :  0.0


  0%|          | 1027/2000001 [11:12<263:40:25,  2.11it/s]

buffer size = 2454, epsilon = 0.09949
mean_reward :  0.0


  0%|          | 1028/2000001 [11:13<260:40:33,  2.13it/s]

buffer size = 2456, epsilon = 0.09949
mean_reward :  0.0


  0%|          | 1029/2000001 [11:13<253:42:18,  2.19it/s]

buffer size = 2458, epsilon = 0.09949
mean_reward :  0.0


  0%|          | 1030/2000001 [11:14<250:48:05,  2.21it/s]

buffer size = 2460, epsilon = 0.09949
mean_reward :  0.0


  0%|          | 1031/2000001 [11:14<251:15:48,  2.21it/s]

buffer size = 2462, epsilon = 0.09949
mean_reward :  0.0


  0%|          | 1032/2000001 [11:14<249:26:52,  2.23it/s]

buffer size = 2464, epsilon = 0.09948
mean_reward :  0.0


  0%|          | 1033/2000001 [11:15<249:24:15,  2.23it/s]

buffer size = 2466, epsilon = 0.09948
mean_reward :  0.0


  0%|          | 1034/2000001 [11:15<247:49:25,  2.24it/s]

buffer size = 2468, epsilon = 0.09948
mean_reward :  0.0


  0%|          | 1035/2000001 [11:16<247:40:57,  2.24it/s]

buffer size = 2470, epsilon = 0.09948
mean_reward :  0.0


  0%|          | 1036/2000001 [11:16<244:29:41,  2.27it/s]

buffer size = 2472, epsilon = 0.09948
mean_reward :  0.0


  0%|          | 1037/2000001 [11:17<243:42:00,  2.28it/s]

buffer size = 2474, epsilon = 0.09948
mean_reward :  0.0


  0%|          | 1038/2000001 [11:17<245:31:28,  2.26it/s]

buffer size = 2476, epsilon = 0.09948
mean_reward :  0.0


  0%|          | 1039/2000001 [11:18<244:45:56,  2.27it/s]

buffer size = 2478, epsilon = 0.09948
mean_reward :  0.0


  0%|          | 1040/2000001 [11:18<245:36:19,  2.26it/s]

buffer size = 2480, epsilon = 0.09948
mean_reward :  0.0


  0%|          | 1041/2000001 [11:18<247:05:11,  2.25it/s]

buffer size = 2482, epsilon = 0.09948
mean_reward :  0.0


  0%|          | 1042/2000001 [11:19<246:13:09,  2.26it/s]

buffer size = 2484, epsilon = 0.09948
mean_reward :  0.0


  0%|          | 1043/2000001 [11:19<247:50:37,  2.24it/s]

buffer size = 2486, epsilon = 0.09948
mean_reward :  0.0


  0%|          | 1044/2000001 [11:20<247:17:44,  2.25it/s]

buffer size = 2488, epsilon = 0.09948
mean_reward :  0.0


  0%|          | 1045/2000001 [11:21<292:14:16,  1.90it/s]

buffer size = 2490, epsilon = 0.09948
mean_reward :  0.0


  0%|          | 1046/2000001 [11:21<337:36:48,  1.64it/s]

buffer size = 2492, epsilon = 0.09948
mean_reward :  0.0


  0%|          | 1047/2000001 [11:22<361:29:35,  1.54it/s]

buffer size = 2494, epsilon = 0.09948
mean_reward :  0.0


  0%|          | 1048/2000001 [11:22<325:03:48,  1.71it/s]

buffer size = 2496, epsilon = 0.09948
mean_reward :  0.0


  0%|          | 1049/2000001 [11:23<301:06:11,  1.84it/s]

buffer size = 2498, epsilon = 0.09948
mean_reward :  0.0


  0%|          | 1050/2000001 [11:23<285:47:30,  1.94it/s]

buffer size = 2500, epsilon = 0.09948
mean_reward :  0.0


  0%|          | 1051/2000001 [11:24<272:20:15,  2.04it/s]

buffer size = 2502, epsilon = 0.09948
mean_reward :  0.0


  0%|          | 1052/2000001 [11:24<266:11:50,  2.09it/s]

buffer size = 2504, epsilon = 0.09947
mean_reward :  0.0


  0%|          | 1053/2000001 [11:25<258:29:23,  2.15it/s]

buffer size = 2506, epsilon = 0.09947
mean_reward :  0.0


  0%|          | 1054/2000001 [11:25<256:54:58,  2.16it/s]

buffer size = 2508, epsilon = 0.09947
mean_reward :  0.0


  0%|          | 1055/2000001 [11:26<253:01:19,  2.19it/s]

buffer size = 2510, epsilon = 0.09947
mean_reward :  0.0


  0%|          | 1056/2000001 [11:26<249:05:53,  2.23it/s]

buffer size = 2512, epsilon = 0.09947
mean_reward :  0.0


  0%|          | 1057/2000001 [11:26<247:35:05,  2.24it/s]

buffer size = 2514, epsilon = 0.09947
mean_reward :  0.0


  0%|          | 1058/2000001 [11:27<245:58:31,  2.26it/s]

buffer size = 2516, epsilon = 0.09947
mean_reward :  0.0


  0%|          | 1059/2000001 [11:27<248:12:59,  2.24it/s]

buffer size = 2518, epsilon = 0.09947
mean_reward :  0.0


  0%|          | 1060/2000001 [11:28<245:31:46,  2.26it/s]

buffer size = 2520, epsilon = 0.09947
mean_reward :  0.0


  0%|          | 1061/2000001 [11:28<246:17:26,  2.25it/s]

buffer size = 2522, epsilon = 0.09947
mean_reward :  0.0


  0%|          | 1062/2000001 [11:29<247:33:42,  2.24it/s]

buffer size = 2524, epsilon = 0.09947
mean_reward :  0.0


  0%|          | 1063/2000001 [11:29<245:53:54,  2.26it/s]

buffer size = 2526, epsilon = 0.09947
mean_reward :  0.0


  0%|          | 1064/2000001 [11:30<247:02:36,  2.25it/s]

buffer size = 2528, epsilon = 0.09947
mean_reward :  0.0


  0%|          | 1065/2000001 [11:30<246:44:03,  2.25it/s]

buffer size = 2530, epsilon = 0.09947
mean_reward :  0.0


  0%|          | 1066/2000001 [11:30<246:47:10,  2.25it/s]

buffer size = 2532, epsilon = 0.09947
mean_reward :  0.0


  0%|          | 1067/2000001 [11:31<245:54:44,  2.26it/s]

buffer size = 2534, epsilon = 0.09947
mean_reward :  0.0


  0%|          | 1068/2000001 [11:31<245:39:29,  2.26it/s]

buffer size = 2536, epsilon = 0.09947
mean_reward :  0.0


  0%|          | 1069/2000001 [11:32<245:04:07,  2.27it/s]

buffer size = 2538, epsilon = 0.09947
mean_reward :  0.0


  0%|          | 1070/2000001 [11:32<277:43:36,  2.00it/s]

buffer size = 2540, epsilon = 0.09947
mean_reward :  0.0


  0%|          | 1071/2000001 [11:33<326:41:12,  1.70it/s]

buffer size = 2542, epsilon = 0.09947
mean_reward :  0.0


  0%|          | 1072/2000001 [11:34<358:16:11,  1.55it/s]

buffer size = 2544, epsilon = 0.09946
mean_reward :  0.0


  0%|          | 1073/2000001 [11:34<334:02:48,  1.66it/s]

buffer size = 2546, epsilon = 0.09946
mean_reward :  0.0


  0%|          | 1074/2000001 [11:35<306:51:59,  1.81it/s]

buffer size = 2548, epsilon = 0.09946
mean_reward :  0.0


  0%|          | 1075/2000001 [11:35<285:58:53,  1.94it/s]

buffer size = 2550, epsilon = 0.09946
mean_reward :  0.0


  0%|          | 1076/2000001 [11:36<275:45:30,  2.01it/s]

buffer size = 2552, epsilon = 0.09946
mean_reward :  0.0


  0%|          | 1077/2000001 [11:36<266:23:14,  2.08it/s]

buffer size = 2554, epsilon = 0.09946
mean_reward :  0.0


  0%|          | 1078/2000001 [11:37<262:55:22,  2.11it/s]

buffer size = 2556, epsilon = 0.09946
mean_reward :  0.0


  0%|          | 1079/2000001 [11:37<258:23:33,  2.15it/s]

buffer size = 2558, epsilon = 0.09946
mean_reward :  0.0


  0%|          | 1080/2000001 [11:38<256:01:14,  2.17it/s]

buffer size = 2560, epsilon = 0.09946
mean_reward :  0.0


  0%|          | 1081/2000001 [11:38<255:33:03,  2.17it/s]

buffer size = 2562, epsilon = 0.09946
mean_reward :  0.0


  0%|          | 1082/2000001 [11:38<250:36:33,  2.22it/s]

buffer size = 2564, epsilon = 0.09946
mean_reward :  0.0


  0%|          | 1083/2000001 [11:39<251:35:57,  2.21it/s]

buffer size = 2566, epsilon = 0.09946
mean_reward :  0.0


  0%|          | 1084/2000001 [11:39<248:43:51,  2.23it/s]

buffer size = 2568, epsilon = 0.09946
mean_reward :  0.0


  0%|          | 1085/2000001 [11:40<249:11:07,  2.23it/s]

buffer size = 2570, epsilon = 0.09946
mean_reward :  0.0


  0%|          | 1086/2000001 [11:40<247:25:41,  2.24it/s]

buffer size = 2572, epsilon = 0.09946
mean_reward :  0.0


  0%|          | 1087/2000001 [11:41<247:52:45,  2.24it/s]

buffer size = 2574, epsilon = 0.09946
mean_reward :  0.0


  0%|          | 1088/2000001 [11:41<247:32:17,  2.24it/s]

buffer size = 2576, epsilon = 0.09946
mean_reward :  0.0


  0%|          | 1089/2000001 [11:42<248:13:21,  2.24it/s]

buffer size = 2578, epsilon = 0.09946
mean_reward :  0.0


  0%|          | 1090/2000001 [11:42<248:12:12,  2.24it/s]

buffer size = 2580, epsilon = 0.09946
mean_reward :  0.0


  0%|          | 1091/2000001 [11:43<246:16:51,  2.25it/s]

buffer size = 2582, epsilon = 0.09946
mean_reward :  0.0


  0%|          | 1092/2000001 [11:43<247:21:13,  2.24it/s]

buffer size = 2584, epsilon = 0.09945
mean_reward :  0.0


  0%|          | 1093/2000001 [11:43<247:43:57,  2.24it/s]

buffer size = 2586, epsilon = 0.09945
mean_reward :  0.0


  0%|          | 1094/2000001 [11:44<248:30:48,  2.23it/s]

buffer size = 2588, epsilon = 0.09945
mean_reward :  0.0


  0%|          | 1095/2000001 [11:44<277:46:37,  2.00it/s]

buffer size = 2590, epsilon = 0.09945
mean_reward :  0.0


  0%|          | 1096/2000001 [11:45<328:15:33,  1.69it/s]

buffer size = 2592, epsilon = 0.09945
mean_reward :  0.0


  0%|          | 1097/2000001 [11:46<358:59:38,  1.55it/s]

buffer size = 2594, epsilon = 0.09945
mean_reward :  0.0


  0%|          | 1098/2000001 [11:47<335:11:59,  1.66it/s]

buffer size = 2596, epsilon = 0.09945
mean_reward :  0.0


  0%|          | 1099/2000001 [11:47<310:35:48,  1.79it/s]

buffer size = 2598, epsilon = 0.09945
mean_reward :  0.0


  0%|          | 1100/2000001 [11:47<290:43:45,  1.91it/s]

buffer size = 2600, epsilon = 0.09945
mean_reward :  0.0


  0%|          | 1101/2000001 [11:48<278:22:29,  1.99it/s]

buffer size = 2602, epsilon = 0.09945
mean_reward :  0.0


  0%|          | 1102/2000001 [11:48<268:21:29,  2.07it/s]

buffer size = 2604, epsilon = 0.09945
mean_reward :  0.0


  0%|          | 1103/2000001 [11:49<261:53:57,  2.12it/s]

buffer size = 2606, epsilon = 0.09945
mean_reward :  0.0


  0%|          | 1104/2000001 [11:49<261:04:59,  2.13it/s]

buffer size = 2608, epsilon = 0.09945
mean_reward :  0.0


  0%|          | 1105/2000001 [11:50<255:42:29,  2.17it/s]

buffer size = 2610, epsilon = 0.09945
mean_reward :  0.0


  0%|          | 1106/2000001 [11:50<253:11:26,  2.19it/s]

buffer size = 2612, epsilon = 0.09945
mean_reward :  0.0


  0%|          | 1107/2000001 [11:51<248:19:59,  2.24it/s]

buffer size = 2614, epsilon = 0.09945
mean_reward :  0.0


  0%|          | 1108/2000001 [11:51<249:31:18,  2.23it/s]

buffer size = 2616, epsilon = 0.09945
mean_reward :  0.0


  0%|          | 1109/2000001 [11:51<249:15:19,  2.23it/s]

buffer size = 2618, epsilon = 0.09945
mean_reward :  0.0


  0%|          | 1110/2000001 [11:52<245:52:51,  2.26it/s]

buffer size = 2620, epsilon = 0.09945
mean_reward :  0.0


  0%|          | 1111/2000001 [11:52<247:36:09,  2.24it/s]

buffer size = 2622, epsilon = 0.09945
mean_reward :  0.0


  0%|          | 1112/2000001 [11:53<244:03:35,  2.28it/s]

buffer size = 2624, epsilon = 0.09944
mean_reward :  0.0


  0%|          | 1113/2000001 [11:53<247:17:25,  2.25it/s]

buffer size = 2626, epsilon = 0.09944
mean_reward :  0.0


  0%|          | 1114/2000001 [11:54<246:41:31,  2.25it/s]

buffer size = 2628, epsilon = 0.09944
mean_reward :  0.0


  0%|          | 1115/2000001 [11:54<248:35:06,  2.23it/s]

buffer size = 2630, epsilon = 0.09944
mean_reward :  0.0


  0%|          | 1116/2000001 [11:55<248:39:28,  2.23it/s]

buffer size = 2632, epsilon = 0.09944
mean_reward :  0.0


  0%|          | 1117/2000001 [11:55<246:36:49,  2.25it/s]

buffer size = 2634, epsilon = 0.09944
mean_reward :  0.0


  0%|          | 1118/2000001 [11:55<248:22:57,  2.24it/s]

buffer size = 2636, epsilon = 0.09944
mean_reward :  0.0


  0%|          | 1119/2000001 [11:56<247:20:46,  2.24it/s]

buffer size = 2638, epsilon = 0.09944
mean_reward :  0.0


  0%|          | 1120/2000001 [11:56<262:30:02,  2.12it/s]

buffer size = 2640, epsilon = 0.09944
mean_reward :  0.0


  0%|          | 1121/2000001 [11:57<318:26:51,  1.74it/s]

buffer size = 2642, epsilon = 0.09944
mean_reward :  0.0


  0%|          | 1122/2000001 [11:58<350:52:30,  1.58it/s]

buffer size = 2644, epsilon = 0.09944
mean_reward :  0.0


  0%|          | 1123/2000001 [11:59<340:11:25,  1.63it/s]

buffer size = 2646, epsilon = 0.09944
mean_reward :  0.0


  0%|          | 1124/2000001 [11:59<310:33:32,  1.79it/s]

buffer size = 2648, epsilon = 0.09944
mean_reward :  0.0


  0%|          | 1125/2000001 [11:59<293:20:05,  1.89it/s]

buffer size = 2650, epsilon = 0.09944
mean_reward :  0.0


  0%|          | 1126/2000001 [12:00<279:52:01,  1.98it/s]

buffer size = 2652, epsilon = 0.09944
mean_reward :  0.0


  0%|          | 1127/2000001 [12:00<271:34:47,  2.04it/s]

buffer size = 2654, epsilon = 0.09944
mean_reward :  0.0


  0%|          | 1128/2000001 [12:01<263:34:47,  2.11it/s]

buffer size = 2656, epsilon = 0.09944
mean_reward :  0.0


  0%|          | 1129/2000001 [12:01<259:26:56,  2.14it/s]

buffer size = 2658, epsilon = 0.09944
mean_reward :  0.0


  0%|          | 1130/2000001 [12:02<254:50:21,  2.18it/s]

buffer size = 2660, epsilon = 0.09944
mean_reward :  0.0


  0%|          | 1131/2000001 [12:02<256:06:51,  2.17it/s]

buffer size = 2662, epsilon = 0.09944
mean_reward :  0.0


  0%|          | 1132/2000001 [12:03<254:40:04,  2.18it/s]

buffer size = 2664, epsilon = 0.09943
mean_reward :  0.0


  0%|          | 1133/2000001 [12:03<252:45:53,  2.20it/s]

buffer size = 2666, epsilon = 0.09943
mean_reward :  0.0


  0%|          | 1134/2000001 [12:04<251:55:19,  2.20it/s]

buffer size = 2668, epsilon = 0.09943
mean_reward :  0.0


  0%|          | 1135/2000001 [12:04<248:29:50,  2.23it/s]

buffer size = 2670, epsilon = 0.09943
mean_reward :  0.0


  0%|          | 1136/2000001 [12:04<252:13:24,  2.20it/s]

buffer size = 2672, epsilon = 0.09943
mean_reward :  0.0


  0%|          | 1137/2000001 [12:05<249:12:59,  2.23it/s]

buffer size = 2674, epsilon = 0.09943
mean_reward :  0.0


  0%|          | 1138/2000001 [12:05<247:17:53,  2.25it/s]

buffer size = 2676, epsilon = 0.09943
mean_reward :  0.0


  0%|          | 1139/2000001 [12:06<248:23:15,  2.24it/s]

buffer size = 2678, epsilon = 0.09943
mean_reward :  0.0


  0%|          | 1140/2000001 [12:06<248:33:34,  2.23it/s]

buffer size = 2680, epsilon = 0.09943
mean_reward :  0.0


  0%|          | 1141/2000001 [12:07<251:00:46,  2.21it/s]

buffer size = 2682, epsilon = 0.09943
mean_reward :  0.0


  0%|          | 1142/2000001 [12:07<249:07:08,  2.23it/s]

buffer size = 2684, epsilon = 0.09943
mean_reward :  0.0


  0%|          | 1143/2000001 [12:08<250:07:31,  2.22it/s]

buffer size = 2686, epsilon = 0.09943
mean_reward :  0.0


  0%|          | 1144/2000001 [12:08<247:48:19,  2.24it/s]

buffer size = 2688, epsilon = 0.09943
mean_reward :  0.0


  0%|          | 1145/2000001 [12:09<257:55:44,  2.15it/s]

buffer size = 2690, epsilon = 0.09943
mean_reward :  0.0


  0%|          | 1146/2000001 [12:09<313:34:11,  1.77it/s]

buffer size = 2692, epsilon = 0.09943
mean_reward :  0.0


  0%|          | 1147/2000001 [12:10<354:31:13,  1.57it/s]

buffer size = 2694, epsilon = 0.09943
mean_reward :  0.0


  0%|          | 1148/2000001 [12:11<347:26:51,  1.60it/s]

buffer size = 2696, epsilon = 0.09943
mean_reward :  0.0


  0%|          | 1149/2000001 [12:11<316:21:12,  1.76it/s]

buffer size = 2698, epsilon = 0.09943
mean_reward :  0.0


  0%|          | 1150/2000001 [12:12<294:46:40,  1.88it/s]

buffer size = 2700, epsilon = 0.09943
mean_reward :  0.0


  0%|          | 1151/2000001 [12:12<282:36:17,  1.96it/s]

buffer size = 2702, epsilon = 0.09942
mean_reward :  0.0


  0%|          | 1152/2000001 [12:12<271:20:59,  2.05it/s]

buffer size = 2704, epsilon = 0.09942
mean_reward :  0.0


  0%|          | 1153/2000001 [12:13<264:53:47,  2.10it/s]

buffer size = 2706, epsilon = 0.09942
mean_reward :  0.0


  0%|          | 1154/2000001 [12:13<259:52:03,  2.14it/s]

buffer size = 2708, epsilon = 0.09942
mean_reward :  0.0


  0%|          | 1155/2000001 [12:14<257:36:53,  2.16it/s]

buffer size = 2710, epsilon = 0.09942
mean_reward :  0.0


  0%|          | 1156/2000001 [12:14<252:38:52,  2.20it/s]

buffer size = 2712, epsilon = 0.09942
mean_reward :  0.0


  0%|          | 1157/2000001 [12:15<252:39:58,  2.20it/s]

buffer size = 2714, epsilon = 0.09942
mean_reward :  0.0


  0%|          | 1158/2000001 [12:15<252:37:50,  2.20it/s]

buffer size = 2716, epsilon = 0.09942
mean_reward :  0.0


  0%|          | 1159/2000001 [12:16<249:39:28,  2.22it/s]

buffer size = 2718, epsilon = 0.09942
mean_reward :  0.0


  0%|          | 1160/2000001 [12:16<249:08:22,  2.23it/s]

buffer size = 2720, epsilon = 0.09942
mean_reward :  0.0


  0%|          | 1161/2000001 [12:17<247:17:35,  2.25it/s]

buffer size = 2722, epsilon = 0.09942
mean_reward :  0.0


  0%|          | 1162/2000001 [12:17<248:20:07,  2.24it/s]

buffer size = 2724, epsilon = 0.09942
mean_reward :  0.0


  0%|          | 1163/2000001 [12:17<247:21:24,  2.24it/s]

buffer size = 2726, epsilon = 0.09942
mean_reward :  0.0


  0%|          | 1164/2000001 [12:18<244:36:40,  2.27it/s]

buffer size = 2728, epsilon = 0.09942
mean_reward :  0.0


  0%|          | 1165/2000001 [12:18<246:26:46,  2.25it/s]

buffer size = 2730, epsilon = 0.09942
mean_reward :  0.0


  0%|          | 1166/2000001 [12:19<245:08:41,  2.26it/s]

buffer size = 2732, epsilon = 0.09942
mean_reward :  0.0


  0%|          | 1167/2000001 [12:19<247:03:28,  2.25it/s]

buffer size = 2734, epsilon = 0.09942
mean_reward :  0.0


  0%|          | 1168/2000001 [12:20<250:03:11,  2.22it/s]

buffer size = 2736, epsilon = 0.09942
mean_reward :  0.0


  0%|          | 1169/2000001 [12:20<252:05:17,  2.20it/s]

buffer size = 2738, epsilon = 0.09942
mean_reward :  0.0


  0%|          | 1170/2000001 [12:21<253:12:47,  2.19it/s]

buffer size = 2740, epsilon = 0.09942
mean_reward :  0.0


  0%|          | 1171/2000001 [12:21<310:54:23,  1.79it/s]

buffer size = 2742, epsilon = 0.09942
mean_reward :  0.0


  0%|          | 1172/2000001 [12:22<345:22:48,  1.61it/s]

buffer size = 2744, epsilon = 0.09941
mean_reward :  0.0


  0%|          | 1173/2000001 [12:23<350:47:03,  1.58it/s]

buffer size = 2746, epsilon = 0.09941
mean_reward :  0.0


  0%|          | 1174/2000001 [12:23<321:25:32,  1.73it/s]

buffer size = 2748, epsilon = 0.09941
mean_reward :  0.0


  0%|          | 1175/2000001 [12:24<297:45:45,  1.86it/s]

buffer size = 2750, epsilon = 0.09941
mean_reward :  0.0


  0%|          | 1176/2000001 [12:24<283:16:40,  1.96it/s]

buffer size = 2752, epsilon = 0.09941
mean_reward :  0.0


  0%|          | 1177/2000001 [12:25<276:02:21,  2.01it/s]

buffer size = 2754, epsilon = 0.09941
mean_reward :  0.0


  0%|          | 1178/2000001 [12:25<264:14:04,  2.10it/s]

buffer size = 2756, epsilon = 0.09941
mean_reward :  0.0


  0%|          | 1179/2000001 [12:25<260:29:48,  2.13it/s]

buffer size = 2758, epsilon = 0.09941
mean_reward :  0.0


  0%|          | 1180/2000001 [12:26<254:30:55,  2.18it/s]

buffer size = 2760, epsilon = 0.09941
mean_reward :  0.0


  0%|          | 1181/2000001 [12:26<252:11:53,  2.20it/s]

buffer size = 2762, epsilon = 0.09941
mean_reward :  0.0


  0%|          | 1182/2000001 [12:27<250:00:07,  2.22it/s]

buffer size = 2764, epsilon = 0.09941
mean_reward :  0.0


  0%|          | 1183/2000001 [12:27<250:08:06,  2.22it/s]

buffer size = 2766, epsilon = 0.09941
mean_reward :  0.0


  0%|          | 1184/2000001 [12:28<249:05:18,  2.23it/s]

buffer size = 2768, epsilon = 0.09941
mean_reward :  0.0


  0%|          | 1185/2000001 [12:28<250:24:19,  2.22it/s]

buffer size = 2770, epsilon = 0.09941
mean_reward :  0.0


  0%|          | 1186/2000001 [12:29<249:48:24,  2.22it/s]

buffer size = 2772, epsilon = 0.09941
mean_reward :  0.0


  0%|          | 1187/2000001 [12:29<248:26:57,  2.23it/s]

buffer size = 2774, epsilon = 0.09941
mean_reward :  0.0


  0%|          | 1188/2000001 [12:29<247:17:59,  2.25it/s]

buffer size = 2776, epsilon = 0.09941
mean_reward :  0.0


  0%|          | 1189/2000001 [12:30<246:29:39,  2.25it/s]

buffer size = 2778, epsilon = 0.09941
mean_reward :  0.0


  0%|          | 1190/2000001 [12:30<246:14:32,  2.25it/s]

buffer size = 2780, epsilon = 0.09941
mean_reward :  0.0


  0%|          | 1191/2000001 [12:31<248:16:32,  2.24it/s]

buffer size = 2782, epsilon = 0.09941
mean_reward :  0.0


  0%|          | 1192/2000001 [12:31<247:13:44,  2.25it/s]

buffer size = 2784, epsilon = 0.09940
mean_reward :  0.0


  0%|          | 1193/2000001 [12:32<247:06:34,  2.25it/s]

buffer size = 2786, epsilon = 0.09940
mean_reward :  0.0


  0%|          | 1194/2000001 [12:32<247:55:29,  2.24it/s]

buffer size = 2788, epsilon = 0.09940
mean_reward :  0.0


  0%|          | 1195/2000001 [12:33<248:40:08,  2.23it/s]

buffer size = 2790, epsilon = 0.09940
mean_reward :  0.0


  0%|          | 1196/2000001 [12:33<301:40:17,  1.84it/s]

buffer size = 2792, epsilon = 0.09940
mean_reward :  0.0


  0%|          | 1197/2000001 [12:34<344:47:58,  1.61it/s]

buffer size = 2794, epsilon = 0.09940
mean_reward :  0.0


  0%|          | 1198/2000001 [12:35<357:17:50,  1.55it/s]

buffer size = 2796, epsilon = 0.09940
mean_reward :  0.0


  0%|          | 1199/2000001 [12:35<324:02:14,  1.71it/s]

buffer size = 2798, epsilon = 0.09940
mean_reward :  0.0


  0%|          | 1200/2000001 [12:36<301:25:11,  1.84it/s]

buffer size = 2800, epsilon = 0.09940
mean_reward :  0.0


  0%|          | 1201/2000001 [12:36<287:33:49,  1.93it/s]

buffer size = 2802, epsilon = 0.09940
mean_reward :  0.0


  0%|          | 1202/2000001 [12:37<277:03:07,  2.00it/s]

buffer size = 2804, epsilon = 0.09940
mean_reward :  0.0


  0%|          | 1203/2000001 [12:37<268:14:06,  2.07it/s]

buffer size = 2806, epsilon = 0.09940
mean_reward :  0.0


  0%|          | 1204/2000001 [12:38<260:24:15,  2.13it/s]

buffer size = 2808, epsilon = 0.09940
mean_reward :  0.0


  0%|          | 1205/2000001 [12:38<257:24:50,  2.16it/s]

buffer size = 2810, epsilon = 0.09940
mean_reward :  0.0


  0%|          | 1206/2000001 [12:38<253:09:06,  2.19it/s]

buffer size = 2812, epsilon = 0.09940
mean_reward :  0.0


  0%|          | 1207/2000001 [12:39<254:21:20,  2.18it/s]

buffer size = 2814, epsilon = 0.09940
mean_reward :  0.0


  0%|          | 1208/2000001 [12:39<254:49:33,  2.18it/s]

buffer size = 2816, epsilon = 0.09940
mean_reward :  0.0


  0%|          | 1209/2000001 [12:40<258:18:34,  2.15it/s]

buffer size = 2818, epsilon = 0.09940
mean_reward :  0.0


  0%|          | 1210/2000001 [12:40<255:40:49,  2.17it/s]

buffer size = 2820, epsilon = 0.09940
mean_reward :  0.0


  0%|          | 1211/2000001 [12:41<253:50:39,  2.19it/s]

buffer size = 2822, epsilon = 0.09940
mean_reward :  0.0


  0%|          | 1212/2000001 [12:41<253:57:21,  2.19it/s]

buffer size = 2824, epsilon = 0.09939
mean_reward :  0.0


  0%|          | 1213/2000001 [12:42<250:28:14,  2.22it/s]

buffer size = 2826, epsilon = 0.09939
mean_reward :  0.0


  0%|          | 1214/2000001 [12:42<251:10:03,  2.21it/s]

buffer size = 2828, epsilon = 0.09939
mean_reward :  0.0


  0%|          | 1215/2000001 [12:43<248:23:33,  2.24it/s]

buffer size = 2830, epsilon = 0.09939
mean_reward :  0.0


  0%|          | 1216/2000001 [12:43<248:32:17,  2.23it/s]

buffer size = 2832, epsilon = 0.09939
mean_reward :  0.0


  0%|          | 1217/2000001 [12:43<249:31:42,  2.23it/s]

buffer size = 2834, epsilon = 0.09939
mean_reward :  0.0


  0%|          | 1218/2000001 [12:44<250:36:39,  2.22it/s]

buffer size = 2836, epsilon = 0.09939
mean_reward :  0.0


  0%|          | 1219/2000001 [12:44<250:03:46,  2.22it/s]

buffer size = 2838, epsilon = 0.09939
mean_reward :  0.0


  0%|          | 1220/2000001 [12:45<259:17:57,  2.14it/s]

buffer size = 2840, epsilon = 0.09939
mean_reward :  0.0


  0%|          | 1221/2000001 [12:46<315:51:46,  1.76it/s]

buffer size = 2842, epsilon = 0.09939
mean_reward :  0.0


  0%|          | 1222/2000001 [12:46<354:02:48,  1.57it/s]

buffer size = 2844, epsilon = 0.09939
mean_reward :  0.0


  0%|          | 1223/2000001 [12:47<352:06:23,  1.58it/s]

buffer size = 2846, epsilon = 0.09939
mean_reward :  0.0


  0%|          | 1224/2000001 [12:48<319:39:29,  1.74it/s]

buffer size = 2848, epsilon = 0.09939
mean_reward :  0.0


  0%|          | 1225/2000001 [12:48<297:57:52,  1.86it/s]

buffer size = 2850, epsilon = 0.09939
mean_reward :  0.0


  0%|          | 1226/2000001 [12:48<283:31:02,  1.96it/s]

buffer size = 2852, epsilon = 0.09939
mean_reward :  0.0


  0%|          | 1227/2000001 [12:49<270:50:03,  2.05it/s]

buffer size = 2854, epsilon = 0.09939
mean_reward :  0.0


  0%|          | 1228/2000001 [12:49<266:42:34,  2.08it/s]

buffer size = 2856, epsilon = 0.09939
mean_reward :  0.0


  0%|          | 1229/2000001 [12:50<261:26:54,  2.12it/s]

buffer size = 2858, epsilon = 0.09939
mean_reward :  0.0


  0%|          | 1230/2000001 [12:50<258:32:32,  2.15it/s]

buffer size = 2860, epsilon = 0.09939
mean_reward :  0.0


  0%|          | 1231/2000001 [12:51<262:21:52,  2.12it/s]

buffer size = 2862, epsilon = 0.09939
mean_reward :  0.0


  0%|          | 1232/2000001 [12:51<283:52:42,  1.96it/s]

buffer size = 2864, epsilon = 0.09938
mean_reward :  0.0


  0%|          | 1233/2000001 [12:52<286:49:35,  1.94it/s]

buffer size = 2866, epsilon = 0.09938
mean_reward :  0.0


  0%|          | 1234/2000001 [12:52<292:52:04,  1.90it/s]

buffer size = 2868, epsilon = 0.09938
mean_reward :  0.0


  0%|          | 1235/2000001 [12:53<293:16:16,  1.89it/s]

buffer size = 2870, epsilon = 0.09938
mean_reward :  0.0


  0%|          | 1236/2000001 [12:53<299:09:04,  1.86it/s]

buffer size = 2872, epsilon = 0.09938
mean_reward :  0.0


  0%|          | 1237/2000001 [12:54<297:16:30,  1.87it/s]

buffer size = 2874, epsilon = 0.09938
mean_reward :  0.0


  0%|          | 1238/2000001 [12:55<302:22:52,  1.84it/s]

buffer size = 2876, epsilon = 0.09938
mean_reward :  0.0


  0%|          | 1239/2000001 [12:55<301:36:52,  1.84it/s]

buffer size = 2878, epsilon = 0.09938
mean_reward :  0.0


  0%|          | 1240/2000001 [12:56<300:27:28,  1.85it/s]

buffer size = 2880, epsilon = 0.09938
mean_reward :  0.0


  0%|          | 1241/2000001 [12:56<298:49:04,  1.86it/s]

buffer size = 2882, epsilon = 0.09938
mean_reward :  0.0


  0%|          | 1242/2000001 [12:57<301:49:18,  1.84it/s]

buffer size = 2884, epsilon = 0.09938
mean_reward :  0.0


  0%|          | 1243/2000001 [12:58<385:27:26,  1.44it/s]

buffer size = 2886, epsilon = 0.09938
mean_reward :  0.0


  0%|          | 1244/2000001 [12:59<432:46:50,  1.28it/s]

buffer size = 2888, epsilon = 0.09938
mean_reward :  0.0


  0%|          | 1245/2000001 [13:00<464:59:27,  1.19it/s]

buffer size = 2890, epsilon = 0.09938
mean_reward :  0.0


  0%|          | 1246/2000001 [13:01<472:32:58,  1.17it/s]

buffer size = 2892, epsilon = 0.09938
mean_reward :  0.0


  0%|          | 1247/2000001 [13:01<430:19:37,  1.29it/s]

buffer size = 2894, epsilon = 0.09938
mean_reward :  0.0


  0%|          | 1248/2000001 [13:02<374:36:56,  1.48it/s]

buffer size = 2896, epsilon = 0.09938
mean_reward :  0.0


  0%|          | 1249/2000001 [13:02<339:02:42,  1.64it/s]

buffer size = 2898, epsilon = 0.09938
mean_reward :  0.0


  0%|          | 1250/2000001 [13:03<311:14:18,  1.78it/s]

buffer size = 2900, epsilon = 0.09938
mean_reward :  0.0


  0%|          | 1251/2000001 [13:03<293:28:01,  1.89it/s]

buffer size = 2902, epsilon = 0.09938
mean_reward :  0.0


  0%|          | 1252/2000001 [13:03<279:34:38,  1.99it/s]

buffer size = 2904, epsilon = 0.09937
mean_reward :  0.0


  0%|          | 1253/2000001 [13:04<272:17:13,  2.04it/s]

buffer size = 2906, epsilon = 0.09937
mean_reward :  0.0


  0%|          | 1254/2000001 [13:04<269:40:46,  2.06it/s]

buffer size = 2908, epsilon = 0.09937
mean_reward :  0.0


  0%|          | 1255/2000001 [13:05<264:26:35,  2.10it/s]

buffer size = 2910, epsilon = 0.09937
mean_reward :  0.0


  0%|          | 1256/2000001 [13:05<257:50:44,  2.15it/s]

buffer size = 2912, epsilon = 0.09937
mean_reward :  0.0


  0%|          | 1257/2000001 [13:06<253:08:22,  2.19it/s]

buffer size = 2914, epsilon = 0.09937
mean_reward :  0.0


  0%|          | 1258/2000001 [13:06<254:07:05,  2.18it/s]

buffer size = 2916, epsilon = 0.09937
mean_reward :  0.0


  0%|          | 1259/2000001 [13:07<251:38:45,  2.21it/s]

buffer size = 2918, epsilon = 0.09937
mean_reward :  0.0


  0%|          | 1260/2000001 [13:07<252:32:51,  2.20it/s]

buffer size = 2920, epsilon = 0.09937
mean_reward :  0.0


  0%|          | 1261/2000001 [13:08<249:46:04,  2.22it/s]

buffer size = 2922, epsilon = 0.09937
mean_reward :  0.0


  0%|          | 1262/2000001 [13:08<250:42:27,  2.21it/s]

buffer size = 2924, epsilon = 0.09937
mean_reward :  0.0


  0%|          | 1263/2000001 [13:08<249:34:29,  2.22it/s]

buffer size = 2926, epsilon = 0.09937
mean_reward :  0.0


  0%|          | 1264/2000001 [13:09<248:19:55,  2.24it/s]

buffer size = 2928, epsilon = 0.09937
mean_reward :  0.0


  0%|          | 1265/2000001 [13:09<251:52:18,  2.20it/s]

buffer size = 2930, epsilon = 0.09937
mean_reward :  0.0


  0%|          | 1266/2000001 [13:10<252:15:35,  2.20it/s]

buffer size = 2932, epsilon = 0.09937
mean_reward :  0.0


  0%|          | 1267/2000001 [13:10<252:28:32,  2.20it/s]

buffer size = 2934, epsilon = 0.09937
mean_reward :  0.0


  0%|          | 1268/2000001 [13:11<251:08:18,  2.21it/s]

buffer size = 2936, epsilon = 0.09937
mean_reward :  0.0


  0%|          | 1269/2000001 [13:11<268:06:24,  2.07it/s]

buffer size = 2938, epsilon = 0.09937
mean_reward :  0.0


  0%|          | 1270/2000001 [13:12<313:43:20,  1.77it/s]

buffer size = 2940, epsilon = 0.09937
mean_reward :  0.0


  0%|          | 1271/2000001 [13:13<350:22:20,  1.58it/s]

buffer size = 2942, epsilon = 0.09937
mean_reward :  0.0


  0%|          | 1272/2000001 [13:13<350:31:15,  1.58it/s]

buffer size = 2944, epsilon = 0.09936
mean_reward :  0.0


  0%|          | 1273/2000001 [13:14<319:08:24,  1.74it/s]

buffer size = 2946, epsilon = 0.09936
mean_reward :  0.0


  0%|          | 1274/2000001 [13:14<300:00:33,  1.85it/s]

buffer size = 2948, epsilon = 0.09936
mean_reward :  0.0


  0%|          | 1275/2000001 [13:15<287:32:00,  1.93it/s]

buffer size = 2950, epsilon = 0.09936
mean_reward :  0.0


  0%|          | 1276/2000001 [13:15<280:30:10,  1.98it/s]

buffer size = 2952, epsilon = 0.09936
mean_reward :  0.0


  0%|          | 1277/2000001 [13:16<268:55:42,  2.06it/s]

buffer size = 2954, epsilon = 0.09936
mean_reward :  0.0


  0%|          | 1278/2000001 [13:16<260:47:01,  2.13it/s]

buffer size = 2956, epsilon = 0.09936
mean_reward :  0.0


  0%|          | 1279/2000001 [13:17<257:38:51,  2.15it/s]

buffer size = 2958, epsilon = 0.09936
mean_reward :  0.0


  0%|          | 1280/2000001 [13:17<254:46:47,  2.18it/s]

buffer size = 2960, epsilon = 0.09936
mean_reward :  0.0


  0%|          | 1281/2000001 [13:17<254:18:26,  2.18it/s]

buffer size = 2962, epsilon = 0.09936
mean_reward :  0.0


  0%|          | 1282/2000001 [13:18<250:59:16,  2.21it/s]

buffer size = 2964, epsilon = 0.09936
mean_reward :  0.0


  0%|          | 1283/2000001 [13:18<250:54:27,  2.21it/s]

buffer size = 2966, epsilon = 0.09936
mean_reward :  0.0


  0%|          | 1284/2000001 [13:19<250:35:02,  2.22it/s]

buffer size = 2968, epsilon = 0.09936
mean_reward :  0.0


  0%|          | 1285/2000001 [13:19<252:42:51,  2.20it/s]

buffer size = 2970, epsilon = 0.09936
mean_reward :  0.0


  0%|          | 1286/2000001 [13:20<258:05:02,  2.15it/s]

buffer size = 2972, epsilon = 0.09936
mean_reward :  0.0


  0%|          | 1287/2000001 [13:20<257:58:01,  2.15it/s]

buffer size = 2974, epsilon = 0.09936
mean_reward :  0.0


  0%|          | 1288/2000001 [13:21<256:46:37,  2.16it/s]

buffer size = 2976, epsilon = 0.09936
mean_reward :  0.0


  0%|          | 1289/2000001 [13:21<253:17:17,  2.19it/s]

buffer size = 2978, epsilon = 0.09936
mean_reward :  0.0


  0%|          | 1290/2000001 [13:22<254:50:44,  2.18it/s]

buffer size = 2980, epsilon = 0.09936
mean_reward :  0.0


  0%|          | 1291/2000001 [13:22<252:21:16,  2.20it/s]

buffer size = 2982, epsilon = 0.09935
mean_reward :  0.0


  0%|          | 1292/2000001 [13:23<254:46:52,  2.18it/s]

buffer size = 2984, epsilon = 0.09935
mean_reward :  0.0


  0%|          | 1293/2000001 [13:23<252:08:38,  2.20it/s]

buffer size = 2986, epsilon = 0.09935
mean_reward :  0.0


  0%|          | 1294/2000001 [13:24<273:38:27,  2.03it/s]

buffer size = 2988, epsilon = 0.09935
mean_reward :  0.0


  0%|          | 1295/2000001 [13:24<325:38:05,  1.70it/s]

buffer size = 2990, epsilon = 0.09935
mean_reward :  0.0


  0%|          | 1296/2000001 [13:25<358:11:12,  1.55it/s]

buffer size = 2992, epsilon = 0.09935
mean_reward :  0.0


  0%|          | 1297/2000001 [13:26<346:29:26,  1.60it/s]

buffer size = 2994, epsilon = 0.09935
mean_reward :  0.0


  0%|          | 1298/2000001 [13:26<315:13:46,  1.76it/s]

buffer size = 2996, epsilon = 0.09935
mean_reward :  0.0


  0%|          | 1299/2000001 [13:27<296:30:52,  1.87it/s]

buffer size = 2998, epsilon = 0.09935
mean_reward :  0.0


  0%|          | 1300/2000001 [13:27<280:35:27,  1.98it/s]

buffer size = 3000, epsilon = 0.09935
mean_reward :  0.0


  0%|          | 1301/2000001 [13:27<269:20:28,  2.06it/s]

buffer size = 3002, epsilon = 0.09935
mean_reward :  0.0


  0%|          | 1302/2000001 [13:28<263:21:08,  2.11it/s]

buffer size = 3004, epsilon = 0.09935
mean_reward :  0.0


  0%|          | 1303/2000001 [13:28<259:30:43,  2.14it/s]

buffer size = 3006, epsilon = 0.09935
mean_reward :  0.0


  0%|          | 1304/2000001 [13:29<258:55:12,  2.14it/s]

buffer size = 3008, epsilon = 0.09935
mean_reward :  0.0


  0%|          | 1305/2000001 [13:29<255:01:45,  2.18it/s]

buffer size = 3010, epsilon = 0.09935
mean_reward :  0.0


  0%|          | 1306/2000001 [13:30<254:12:15,  2.18it/s]

buffer size = 3012, epsilon = 0.09935
mean_reward :  0.0


  0%|          | 1307/2000001 [13:30<252:04:08,  2.20it/s]

buffer size = 3014, epsilon = 0.09935
mean_reward :  0.0


  0%|          | 1308/2000001 [13:31<250:30:58,  2.22it/s]

buffer size = 3016, epsilon = 0.09935
mean_reward :  0.0


  0%|          | 1309/2000001 [13:31<251:02:40,  2.21it/s]

buffer size = 3018, epsilon = 0.09935
mean_reward :  0.0


  0%|          | 1310/2000001 [13:32<249:43:16,  2.22it/s]

buffer size = 3020, epsilon = 0.09935
mean_reward :  0.0


  0%|          | 1311/2000001 [13:32<249:42:00,  2.22it/s]

buffer size = 3022, epsilon = 0.09935
mean_reward :  0.0


  0%|          | 1312/2000001 [13:32<249:38:22,  2.22it/s]

buffer size = 3024, epsilon = 0.09934
mean_reward :  0.0


  0%|          | 1313/2000001 [13:33<252:02:13,  2.20it/s]

buffer size = 3026, epsilon = 0.09934
mean_reward :  0.0


  0%|          | 1314/2000001 [13:33<251:02:48,  2.21it/s]

buffer size = 3028, epsilon = 0.09934
mean_reward :  0.0


  0%|          | 1315/2000001 [13:34<248:50:05,  2.23it/s]

buffer size = 3030, epsilon = 0.09934
mean_reward :  0.0


  0%|          | 1316/2000001 [13:34<252:14:06,  2.20it/s]

buffer size = 3032, epsilon = 0.09934
mean_reward :  0.0


  0%|          | 1317/2000001 [13:35<253:47:43,  2.19it/s]

buffer size = 3034, epsilon = 0.09934
mean_reward :  0.0


  0%|          | 1318/2000001 [13:35<254:35:11,  2.18it/s]

buffer size = 3036, epsilon = 0.09934
mean_reward :  0.0


  0%|          | 1319/2000001 [13:36<278:13:29,  2.00it/s]

buffer size = 3038, epsilon = 0.09934
mean_reward :  0.0


  0%|          | 1320/2000001 [13:37<337:31:15,  1.64it/s]

buffer size = 3040, epsilon = 0.09934
mean_reward :  0.0


  0%|          | 1321/2000001 [13:37<373:34:23,  1.49it/s]

buffer size = 3042, epsilon = 0.09934
mean_reward :  0.0


  0%|          | 1322/2000001 [13:38<348:49:35,  1.59it/s]

buffer size = 3044, epsilon = 0.09934
mean_reward :  0.0


  0%|          | 1323/2000001 [13:38<318:05:45,  1.75it/s]

buffer size = 3046, epsilon = 0.09934
mean_reward :  0.0


  0%|          | 1324/2000001 [13:39<296:02:09,  1.88it/s]

buffer size = 3048, epsilon = 0.09934
mean_reward :  0.0


  0%|          | 1325/2000001 [13:39<285:00:27,  1.95it/s]

buffer size = 3050, epsilon = 0.09934
mean_reward :  0.0


  0%|          | 1326/2000001 [13:40<275:27:41,  2.02it/s]

buffer size = 3052, epsilon = 0.09934
mean_reward :  0.0


  0%|          | 1327/2000001 [13:40<268:57:45,  2.06it/s]

buffer size = 3054, epsilon = 0.09934
mean_reward :  0.0


  0%|          | 1328/2000001 [13:41<264:43:43,  2.10it/s]

buffer size = 3056, epsilon = 0.09934
mean_reward :  0.0


  0%|          | 1329/2000001 [13:41<261:21:24,  2.12it/s]

buffer size = 3058, epsilon = 0.09934
mean_reward :  0.0


  0%|          | 1330/2000001 [13:42<259:28:30,  2.14it/s]

buffer size = 3060, epsilon = 0.09934
mean_reward :  0.0


  0%|          | 1331/2000001 [13:42<256:39:50,  2.16it/s]

buffer size = 3062, epsilon = 0.09934
mean_reward :  0.0


  0%|          | 1332/2000001 [13:43<258:03:45,  2.15it/s]

buffer size = 3064, epsilon = 0.09933
mean_reward :  0.0


  0%|          | 1333/2000001 [13:43<256:29:47,  2.16it/s]

buffer size = 3066, epsilon = 0.09933
mean_reward :  0.0


  0%|          | 1334/2000001 [13:43<257:08:11,  2.16it/s]

buffer size = 3068, epsilon = 0.09933
mean_reward :  0.0


  0%|          | 1335/2000001 [13:44<257:21:07,  2.16it/s]

buffer size = 3070, epsilon = 0.09933
mean_reward :  0.0


  0%|          | 1336/2000001 [13:44<258:28:04,  2.15it/s]

buffer size = 3072, epsilon = 0.09933
mean_reward :  0.0


  0%|          | 1337/2000001 [13:45<257:50:35,  2.15it/s]

buffer size = 3074, epsilon = 0.09933
mean_reward :  0.0


  0%|          | 1338/2000001 [13:45<264:17:50,  2.10it/s]

buffer size = 3076, epsilon = 0.09933
mean_reward :  0.0


  0%|          | 1339/2000001 [13:46<262:44:27,  2.11it/s]

buffer size = 3078, epsilon = 0.09933
mean_reward :  0.0


  0%|          | 1340/2000001 [13:46<261:41:07,  2.12it/s]

buffer size = 3080, epsilon = 0.09933
mean_reward :  0.0


  0%|          | 1341/2000001 [13:47<260:57:36,  2.13it/s]

buffer size = 3082, epsilon = 0.09933
mean_reward :  0.0


  0%|          | 1342/2000001 [13:47<258:12:47,  2.15it/s]

buffer size = 3084, epsilon = 0.09933
mean_reward :  0.0


  0%|          | 1343/2000001 [13:48<261:29:51,  2.12it/s]

buffer size = 3086, epsilon = 0.09933
mean_reward :  0.0


  0%|          | 1344/2000001 [13:48<313:29:20,  1.77it/s]

buffer size = 3088, epsilon = 0.09933
mean_reward :  0.0


  0%|          | 1345/2000001 [13:49<361:16:04,  1.54it/s]

buffer size = 3090, epsilon = 0.09933
mean_reward :  0.0


  0%|          | 1346/2000001 [13:50<367:11:08,  1.51it/s]

buffer size = 3092, epsilon = 0.09933
mean_reward :  0.0


  0%|          | 1347/2000001 [13:50<333:39:55,  1.66it/s]

buffer size = 3094, epsilon = 0.09933
mean_reward :  0.0


  0%|          | 1348/2000001 [13:51<309:55:00,  1.79it/s]

buffer size = 3096, epsilon = 0.09933
mean_reward :  0.0


  0%|          | 1349/2000001 [13:51<294:21:01,  1.89it/s]

buffer size = 3098, epsilon = 0.09933
mean_reward :  0.0


  0%|          | 1350/2000001 [13:52<281:28:42,  1.97it/s]

buffer size = 3100, epsilon = 0.09933
mean_reward :  0.0


  0%|          | 1351/2000001 [13:52<271:52:53,  2.04it/s]

buffer size = 3102, epsilon = 0.09933
mean_reward :  0.0


  0%|          | 1352/2000001 [13:53<266:53:26,  2.08it/s]

buffer size = 3104, epsilon = 0.09932
mean_reward :  0.0


  0%|          | 1353/2000001 [13:53<261:15:01,  2.13it/s]

buffer size = 3106, epsilon = 0.09932
mean_reward :  0.0


  0%|          | 1354/2000001 [13:54<260:07:02,  2.13it/s]

buffer size = 3108, epsilon = 0.09932
mean_reward :  0.0


  0%|          | 1355/2000001 [13:54<259:22:43,  2.14it/s]

buffer size = 3110, epsilon = 0.09932
mean_reward :  0.0


  0%|          | 1356/2000001 [13:55<261:24:05,  2.12it/s]

buffer size = 3112, epsilon = 0.09932
mean_reward :  0.0


  0%|          | 1357/2000001 [13:55<259:50:43,  2.14it/s]

buffer size = 3114, epsilon = 0.09932
mean_reward :  0.0


  0%|          | 1358/2000001 [13:56<254:47:02,  2.18it/s]

buffer size = 3116, epsilon = 0.09932
mean_reward :  0.0


  0%|          | 1359/2000001 [13:56<255:49:50,  2.17it/s]

buffer size = 3118, epsilon = 0.09932
mean_reward :  0.0


  0%|          | 1360/2000001 [13:56<255:22:10,  2.17it/s]

buffer size = 3120, epsilon = 0.09932
mean_reward :  0.0


  0%|          | 1361/2000001 [13:57<256:49:25,  2.16it/s]

buffer size = 3122, epsilon = 0.09932
mean_reward :  0.0


  0%|          | 1362/2000001 [13:57<255:24:59,  2.17it/s]

buffer size = 3124, epsilon = 0.09932
mean_reward :  0.0


  0%|          | 1363/2000001 [13:58<255:01:12,  2.18it/s]

buffer size = 3126, epsilon = 0.09932
mean_reward :  0.0


  0%|          | 1364/2000001 [13:58<253:18:47,  2.19it/s]

buffer size = 3128, epsilon = 0.09932
mean_reward :  0.0


  0%|          | 1365/2000001 [13:59<255:25:42,  2.17it/s]

buffer size = 3130, epsilon = 0.09932
mean_reward :  0.0


  0%|          | 1366/2000001 [13:59<253:36:41,  2.19it/s]

buffer size = 3132, epsilon = 0.09932
mean_reward :  0.0


  0%|          | 1367/2000001 [14:00<251:42:06,  2.21it/s]

buffer size = 3134, epsilon = 0.09932
mean_reward :  0.0


  0%|          | 1368/2000001 [14:00<280:47:59,  1.98it/s]

buffer size = 3136, epsilon = 0.09932
mean_reward :  0.0


  0%|          | 1369/2000001 [14:01<330:17:11,  1.68it/s]

buffer size = 3138, epsilon = 0.09932
mean_reward :  0.0


  0%|          | 1370/2000001 [14:02<362:26:43,  1.53it/s]

buffer size = 3140, epsilon = 0.09932
mean_reward :  0.0


  0%|          | 1371/2000001 [14:02<344:50:04,  1.61it/s]

buffer size = 3142, epsilon = 0.09932
mean_reward :  0.0


  0%|          | 1372/2000001 [14:03<318:03:26,  1.75it/s]

buffer size = 3144, epsilon = 0.09931
mean_reward :  0.0


  0%|          | 1373/2000001 [14:03<298:30:38,  1.86it/s]

buffer size = 3146, epsilon = 0.09931
mean_reward :  0.0


  0%|          | 1374/2000001 [14:04<282:02:05,  1.97it/s]

buffer size = 3148, epsilon = 0.09931
mean_reward :  0.0


  0%|          | 1375/2000001 [14:04<275:54:18,  2.01it/s]

buffer size = 3150, epsilon = 0.09931
mean_reward :  0.0


  0%|          | 1376/2000001 [14:05<269:53:45,  2.06it/s]

buffer size = 3152, epsilon = 0.09931
mean_reward :  0.0


  0%|          | 1377/2000001 [14:05<267:22:36,  2.08it/s]

buffer size = 3154, epsilon = 0.09931
mean_reward :  0.0


  0%|          | 1378/2000001 [14:06<263:13:13,  2.11it/s]

buffer size = 3156, epsilon = 0.09931
mean_reward :  0.0


  0%|          | 1379/2000001 [14:06<260:45:43,  2.13it/s]

buffer size = 3158, epsilon = 0.09931
mean_reward :  0.0


  0%|          | 1380/2000001 [14:07<257:54:46,  2.15it/s]

buffer size = 3160, epsilon = 0.09931
mean_reward :  0.0


  0%|          | 1381/2000001 [14:07<258:15:16,  2.15it/s]

buffer size = 3162, epsilon = 0.09931
mean_reward :  0.0


  0%|          | 1382/2000001 [14:07<258:34:02,  2.15it/s]

buffer size = 3164, epsilon = 0.09931
mean_reward :  0.0


  0%|          | 1383/2000001 [14:08<254:45:28,  2.18it/s]

buffer size = 3166, epsilon = 0.09931
mean_reward :  0.0


  0%|          | 1384/2000001 [14:08<254:44:40,  2.18it/s]

buffer size = 3168, epsilon = 0.09931
mean_reward :  0.0


  0%|          | 1385/2000001 [14:09<253:34:12,  2.19it/s]

buffer size = 3170, epsilon = 0.09931
mean_reward :  0.0


  0%|          | 1386/2000001 [14:09<255:52:37,  2.17it/s]

buffer size = 3172, epsilon = 0.09931
mean_reward :  0.0


  0%|          | 1387/2000001 [14:10<253:35:19,  2.19it/s]

buffer size = 3174, epsilon = 0.09931
mean_reward :  0.0


  0%|          | 1388/2000001 [14:10<254:38:37,  2.18it/s]

buffer size = 3176, epsilon = 0.09931
mean_reward :  0.0


  0%|          | 1389/2000001 [14:11<252:28:45,  2.20it/s]

buffer size = 3178, epsilon = 0.09931
mean_reward :  0.0


  0%|          | 1390/2000001 [14:11<252:41:45,  2.20it/s]

buffer size = 3180, epsilon = 0.09931
mean_reward :  0.0


  0%|          | 1391/2000001 [14:12<254:25:57,  2.18it/s]

buffer size = 3182, epsilon = 0.09931
mean_reward :  0.0


  0%|          | 1392/2000001 [14:12<255:03:57,  2.18it/s]

buffer size = 3184, epsilon = 0.09930
mean_reward :  0.0


  0%|          | 1393/2000001 [14:13<306:55:41,  1.81it/s]

buffer size = 3186, epsilon = 0.09930
mean_reward :  0.0


  0%|          | 1394/2000001 [14:14<354:06:08,  1.57it/s]

buffer size = 3188, epsilon = 0.09930
mean_reward :  0.0


  0%|          | 1395/2000001 [14:14<370:10:58,  1.50it/s]

buffer size = 3190, epsilon = 0.09930
mean_reward :  0.0


  0%|          | 1396/2000001 [14:15<332:04:39,  1.67it/s]

buffer size = 3192, epsilon = 0.09930
mean_reward :  0.0


  0%|          | 1397/2000001 [14:15<316:56:50,  1.75it/s]

buffer size = 3194, epsilon = 0.09930
mean_reward :  0.0


  0%|          | 1398/2000001 [14:16<298:44:20,  1.86it/s]

buffer size = 3196, epsilon = 0.09930
mean_reward :  0.0


  0%|          | 1399/2000001 [14:16<283:53:32,  1.96it/s]

buffer size = 3198, epsilon = 0.09930
mean_reward :  0.0


  0%|          | 1400/2000001 [14:17<275:41:22,  2.01it/s]

buffer size = 3200, epsilon = 0.09930
mean_reward :  0.0


  0%|          | 1401/2000001 [14:17<270:09:05,  2.06it/s]

buffer size = 3202, epsilon = 0.09930
mean_reward :  0.0


  0%|          | 1402/2000001 [14:18<264:16:20,  2.10it/s]

buffer size = 3204, epsilon = 0.09930
mean_reward :  0.0


  0%|          | 1403/2000001 [14:18<259:06:18,  2.14it/s]

buffer size = 3206, epsilon = 0.09930
mean_reward :  0.0


  0%|          | 1404/2000001 [14:18<257:57:02,  2.15it/s]

buffer size = 3208, epsilon = 0.09930
mean_reward :  0.0


  0%|          | 1405/2000001 [14:19<257:39:41,  2.15it/s]

buffer size = 3210, epsilon = 0.09930
mean_reward :  0.0


  0%|          | 1406/2000001 [14:19<261:45:40,  2.12it/s]

buffer size = 3212, epsilon = 0.09930
mean_reward :  0.0


  0%|          | 1407/2000001 [14:20<258:56:58,  2.14it/s]

buffer size = 3214, epsilon = 0.09930
mean_reward :  0.0


  0%|          | 1408/2000001 [14:20<257:36:32,  2.16it/s]

buffer size = 3216, epsilon = 0.09930
mean_reward :  0.0


  0%|          | 1409/2000001 [14:21<260:04:12,  2.13it/s]

buffer size = 3218, epsilon = 0.09930
mean_reward :  0.0


  0%|          | 1410/2000001 [14:21<261:16:16,  2.12it/s]

buffer size = 3220, epsilon = 0.09930
mean_reward :  0.0


  0%|          | 1411/2000001 [14:22<260:20:46,  2.13it/s]

buffer size = 3222, epsilon = 0.09930
mean_reward :  0.0


  0%|          | 1412/2000001 [14:22<257:36:53,  2.16it/s]

buffer size = 3224, epsilon = 0.09929
mean_reward :  0.0


  0%|          | 1413/2000001 [14:23<257:18:28,  2.16it/s]

buffer size = 3226, epsilon = 0.09929
mean_reward :  0.0


  0%|          | 1414/2000001 [14:23<256:33:52,  2.16it/s]

buffer size = 3228, epsilon = 0.09929
mean_reward :  0.0


  0%|          | 1415/2000001 [14:24<256:00:43,  2.17it/s]

buffer size = 3230, epsilon = 0.09929
mean_reward :  0.0


  0%|          | 1416/2000001 [14:24<255:59:21,  2.17it/s]

buffer size = 3232, epsilon = 0.09929
mean_reward :  0.0


  0%|          | 1417/2000001 [14:25<293:13:13,  1.89it/s]

buffer size = 3234, epsilon = 0.09929
mean_reward :  0.0


  0%|          | 1418/2000001 [14:26<334:46:26,  1.66it/s]

buffer size = 3236, epsilon = 0.09929
mean_reward :  0.0


  0%|          | 1419/2000001 [14:26<366:36:42,  1.51it/s]

buffer size = 3238, epsilon = 0.09929
mean_reward :  0.0


  0%|          | 1420/2000001 [14:27<340:05:38,  1.63it/s]

buffer size = 3240, epsilon = 0.09929
mean_reward :  0.0


  0%|          | 1421/2000001 [14:27<315:02:09,  1.76it/s]

buffer size = 3242, epsilon = 0.09929
mean_reward :  0.0


  0%|          | 1422/2000001 [14:28<297:32:10,  1.87it/s]

buffer size = 3244, epsilon = 0.09929
mean_reward :  0.0


  0%|          | 1423/2000001 [14:28<284:01:19,  1.95it/s]

buffer size = 3246, epsilon = 0.09929
mean_reward :  0.0


  0%|          | 1424/2000001 [14:29<278:33:48,  1.99it/s]

buffer size = 3248, epsilon = 0.09929
mean_reward :  0.0


  0%|          | 1425/2000001 [14:29<274:18:30,  2.02it/s]

buffer size = 3250, epsilon = 0.09929
mean_reward :  0.0


  0%|          | 1426/2000001 [14:30<267:51:22,  2.07it/s]

buffer size = 3252, epsilon = 0.09929
mean_reward :  0.0


  0%|          | 1427/2000001 [14:30<266:51:33,  2.08it/s]

buffer size = 3254, epsilon = 0.09929
mean_reward :  0.0


  0%|          | 1428/2000001 [14:31<264:20:38,  2.10it/s]

buffer size = 3256, epsilon = 0.09929
mean_reward :  0.0


  0%|          | 1429/2000001 [14:31<262:46:12,  2.11it/s]

buffer size = 3258, epsilon = 0.09929
mean_reward :  0.0


  0%|          | 1430/2000001 [14:31<258:07:41,  2.15it/s]

buffer size = 3260, epsilon = 0.09929
mean_reward :  0.0


  0%|          | 1431/2000001 [14:32<261:10:05,  2.13it/s]

buffer size = 3262, epsilon = 0.09929
mean_reward :  0.0


  0%|          | 1432/2000001 [14:32<256:40:16,  2.16it/s]

buffer size = 3264, epsilon = 0.09928
mean_reward :  0.0


  0%|          | 1433/2000001 [14:33<257:11:58,  2.16it/s]

buffer size = 3266, epsilon = 0.09928
mean_reward :  0.0


  0%|          | 1434/2000001 [14:33<257:20:51,  2.16it/s]

buffer size = 3268, epsilon = 0.09928
mean_reward :  0.0


  0%|          | 1435/2000001 [14:34<254:59:15,  2.18it/s]

buffer size = 3270, epsilon = 0.09928
mean_reward :  0.0


  0%|          | 1436/2000001 [14:34<259:03:09,  2.14it/s]

buffer size = 3272, epsilon = 0.09928
mean_reward :  0.0


  0%|          | 1437/2000001 [14:35<258:41:41,  2.15it/s]

buffer size = 3274, epsilon = 0.09928
mean_reward :  0.0


  0%|          | 1438/2000001 [14:35<257:49:26,  2.15it/s]

buffer size = 3276, epsilon = 0.09928
mean_reward :  0.0


  0%|          | 1439/2000001 [14:36<256:55:09,  2.16it/s]

buffer size = 3278, epsilon = 0.09928
mean_reward :  0.0


  0%|          | 1440/2000001 [14:36<261:13:23,  2.13it/s]

buffer size = 3280, epsilon = 0.09928
mean_reward :  0.0


  0%|          | 1441/2000001 [14:37<276:12:05,  2.01it/s]

buffer size = 3282, epsilon = 0.09928
mean_reward :  0.0


  0%|          | 1442/2000001 [14:38<337:41:24,  1.64it/s]

buffer size = 3284, epsilon = 0.09928
mean_reward :  0.0


  0%|          | 1443/2000001 [14:38<371:09:06,  1.50it/s]

buffer size = 3286, epsilon = 0.09928
mean_reward :  0.0


  0%|          | 1444/2000001 [14:39<346:30:09,  1.60it/s]

buffer size = 3288, epsilon = 0.09928
mean_reward :  0.0


  0%|          | 1445/2000001 [14:39<318:56:57,  1.74it/s]

buffer size = 3290, epsilon = 0.09928
mean_reward :  0.0


  0%|          | 1446/2000001 [14:40<297:43:32,  1.86it/s]

buffer size = 3292, epsilon = 0.09928
mean_reward :  0.0


  0%|          | 1447/2000001 [14:40<285:56:40,  1.94it/s]

buffer size = 3294, epsilon = 0.09928
mean_reward :  0.0


  0%|          | 1448/2000001 [14:41<276:27:02,  2.01it/s]

buffer size = 3296, epsilon = 0.09928
mean_reward :  0.0


  0%|          | 1449/2000001 [14:41<269:43:06,  2.06it/s]

buffer size = 3298, epsilon = 0.09928
mean_reward :  0.0


  0%|          | 1450/2000001 [14:42<266:42:54,  2.08it/s]

buffer size = 3300, epsilon = 0.09928
mean_reward :  0.0


  0%|          | 1451/2000001 [14:42<262:30:16,  2.11it/s]

buffer size = 3302, epsilon = 0.09928
mean_reward :  0.0


  0%|          | 1452/2000001 [14:43<260:49:52,  2.13it/s]

buffer size = 3304, epsilon = 0.09927
mean_reward :  0.0


  0%|          | 1453/2000001 [14:43<257:04:24,  2.16it/s]

buffer size = 3306, epsilon = 0.09927
mean_reward :  0.0


  0%|          | 1454/2000001 [14:44<262:47:55,  2.11it/s]

buffer size = 3308, epsilon = 0.09927
mean_reward :  0.0


  0%|          | 1455/2000001 [14:44<259:38:55,  2.14it/s]

buffer size = 3310, epsilon = 0.09927
mean_reward :  0.0


  0%|          | 1456/2000001 [14:44<259:21:02,  2.14it/s]

buffer size = 3312, epsilon = 0.09927
mean_reward :  0.0


  0%|          | 1457/2000001 [14:45<256:45:04,  2.16it/s]

buffer size = 3314, epsilon = 0.09927
mean_reward :  0.0


  0%|          | 1458/2000001 [14:45<263:19:57,  2.11it/s]

buffer size = 3316, epsilon = 0.09927
mean_reward :  0.0


  0%|          | 1459/2000001 [14:46<259:52:27,  2.14it/s]

buffer size = 3318, epsilon = 0.09927
mean_reward :  0.0


  0%|          | 1460/2000001 [14:46<256:31:31,  2.16it/s]

buffer size = 3320, epsilon = 0.09927
mean_reward :  0.0


  0%|          | 1461/2000001 [14:47<256:40:36,  2.16it/s]

buffer size = 3322, epsilon = 0.09927
mean_reward :  0.0


  0%|          | 1462/2000001 [14:47<257:05:50,  2.16it/s]

buffer size = 3324, epsilon = 0.09927
mean_reward :  0.0


  0%|          | 1463/2000001 [14:48<257:40:22,  2.15it/s]

buffer size = 3326, epsilon = 0.09927
mean_reward :  0.0


  0%|          | 1464/2000001 [14:48<256:02:59,  2.17it/s]

buffer size = 3328, epsilon = 0.09927
mean_reward :  0.0


  0%|          | 1465/2000001 [14:49<259:07:16,  2.14it/s]

buffer size = 3330, epsilon = 0.09927
mean_reward :  0.0


  0%|          | 1466/2000001 [14:49<318:12:41,  1.74it/s]

buffer size = 3332, epsilon = 0.09927
mean_reward :  0.0


  0%|          | 1467/2000001 [14:50<353:39:58,  1.57it/s]

buffer size = 3334, epsilon = 0.09927
mean_reward :  0.0


  0%|          | 1468/2000001 [14:51<361:52:01,  1.53it/s]

buffer size = 3336, epsilon = 0.09927
mean_reward :  0.0


  0%|          | 1469/2000001 [14:51<331:11:34,  1.68it/s]

buffer size = 3338, epsilon = 0.09927
mean_reward :  0.0


  0%|          | 1470/2000001 [14:52<307:54:27,  1.80it/s]

buffer size = 3340, epsilon = 0.09927
mean_reward :  0.0


  0%|          | 1471/2000001 [14:52<289:56:05,  1.91it/s]

buffer size = 3342, epsilon = 0.09927
mean_reward :  0.0


  0%|          | 1472/2000001 [14:53<279:57:36,  1.98it/s]

buffer size = 3344, epsilon = 0.09926
mean_reward :  0.0


  0%|          | 1473/2000001 [14:53<271:17:30,  2.05it/s]

buffer size = 3346, epsilon = 0.09926
mean_reward :  0.0


  0%|          | 1474/2000001 [14:54<266:36:37,  2.08it/s]

buffer size = 3348, epsilon = 0.09926
mean_reward :  0.0


  0%|          | 1475/2000001 [14:54<262:57:34,  2.11it/s]

buffer size = 3350, epsilon = 0.09926
mean_reward :  0.0


  0%|          | 1476/2000001 [14:55<261:53:00,  2.12it/s]

buffer size = 3352, epsilon = 0.09926
mean_reward :  0.0


  0%|          | 1477/2000001 [14:55<261:03:55,  2.13it/s]

buffer size = 3354, epsilon = 0.09926
mean_reward :  0.0


  0%|          | 1478/2000001 [14:55<256:52:27,  2.16it/s]

buffer size = 3356, epsilon = 0.09926
mean_reward :  0.0


  0%|          | 1479/2000001 [14:56<257:12:51,  2.16it/s]

buffer size = 3358, epsilon = 0.09926
mean_reward :  0.0


  0%|          | 1480/2000001 [14:56<255:43:25,  2.17it/s]

buffer size = 3360, epsilon = 0.09926
mean_reward :  0.0


  0%|          | 1481/2000001 [14:57<254:31:59,  2.18it/s]

buffer size = 3362, epsilon = 0.09926
mean_reward :  0.0


  0%|          | 1482/2000001 [14:57<253:44:00,  2.19it/s]

buffer size = 3364, epsilon = 0.09926
mean_reward :  0.0


  0%|          | 1483/2000001 [14:58<254:01:12,  2.19it/s]

buffer size = 3366, epsilon = 0.09926
mean_reward :  0.0


  0%|          | 1484/2000001 [14:58<256:12:07,  2.17it/s]

buffer size = 3368, epsilon = 0.09926
mean_reward :  0.0


  0%|          | 1485/2000001 [14:59<256:59:29,  2.16it/s]

buffer size = 3370, epsilon = 0.09926
mean_reward :  0.0


  0%|          | 1486/2000001 [14:59<256:27:51,  2.16it/s]

buffer size = 3372, epsilon = 0.09926
mean_reward :  0.0


  0%|          | 1487/2000001 [15:00<255:48:36,  2.17it/s]

buffer size = 3374, epsilon = 0.09926
mean_reward :  0.0


  0%|          | 1488/2000001 [15:00<256:24:15,  2.17it/s]

buffer size = 3376, epsilon = 0.09926
mean_reward :  0.0


  0%|          | 1489/2000001 [15:01<253:57:37,  2.19it/s]

buffer size = 3378, epsilon = 0.09926
mean_reward :  0.0


  0%|          | 1490/2000001 [15:01<286:35:26,  1.94it/s]

buffer size = 3380, epsilon = 0.09926
mean_reward :  0.0


  0%|          | 1491/2000001 [15:02<330:28:25,  1.68it/s]

buffer size = 3382, epsilon = 0.09926
mean_reward :  0.0


  0%|          | 1492/2000001 [15:03<361:03:22,  1.54it/s]

buffer size = 3384, epsilon = 0.09925
mean_reward :  0.0


  0%|          | 1493/2000001 [15:03<344:51:15,  1.61it/s]

buffer size = 3386, epsilon = 0.09925
mean_reward :  0.0


  0%|          | 1494/2000001 [15:04<318:32:52,  1.74it/s]

buffer size = 3388, epsilon = 0.09925
mean_reward :  0.0


  0%|          | 1495/2000001 [15:04<300:28:30,  1.85it/s]

buffer size = 3390, epsilon = 0.09925
mean_reward :  0.0


  0%|          | 1496/2000001 [15:05<287:23:35,  1.93it/s]

buffer size = 3392, epsilon = 0.09925
mean_reward :  0.0


  0%|          | 1497/2000001 [15:05<278:37:36,  1.99it/s]

buffer size = 3394, epsilon = 0.09925
mean_reward :  0.0


  0%|          | 1498/2000001 [15:06<272:09:08,  2.04it/s]

buffer size = 3396, epsilon = 0.09925
mean_reward :  0.0


  0%|          | 1499/2000001 [15:06<266:25:34,  2.08it/s]

buffer size = 3398, epsilon = 0.09925
mean_reward :  0.0


  0%|          | 1500/2000001 [15:07<266:45:06,  2.08it/s]

buffer size = 3400, epsilon = 0.09925
mean_reward :  0.0


  0%|          | 1501/2000001 [15:07<263:36:55,  2.11it/s]

buffer size = 3402, epsilon = 0.09925
mean_reward :  0.0


  0%|          | 1502/2000001 [15:08<263:18:10,  2.11it/s]

buffer size = 3404, epsilon = 0.09925
mean_reward :  0.0


  0%|          | 1503/2000001 [15:08<259:21:15,  2.14it/s]

buffer size = 3406, epsilon = 0.09925
mean_reward :  0.0


  0%|          | 1504/2000001 [15:08<257:17:44,  2.16it/s]

buffer size = 3408, epsilon = 0.09925
mean_reward :  0.0


  0%|          | 1505/2000001 [15:09<255:46:52,  2.17it/s]

buffer size = 3410, epsilon = 0.09925
mean_reward :  0.0


  0%|          | 1506/2000001 [15:09<257:02:35,  2.16it/s]

buffer size = 3412, epsilon = 0.09925
mean_reward :  0.0


  0%|          | 1507/2000001 [15:10<256:13:57,  2.17it/s]

buffer size = 3414, epsilon = 0.09925
mean_reward :  0.0


  0%|          | 1508/2000001 [15:10<255:36:00,  2.17it/s]

buffer size = 3416, epsilon = 0.09925
mean_reward :  0.0


  0%|          | 1509/2000001 [15:11<259:56:56,  2.14it/s]

buffer size = 3418, epsilon = 0.09925
mean_reward :  0.0


  0%|          | 1510/2000001 [15:11<257:17:48,  2.16it/s]

buffer size = 3420, epsilon = 0.09925
mean_reward :  0.0


  0%|          | 1511/2000001 [15:12<257:56:17,  2.15it/s]

buffer size = 3422, epsilon = 0.09924
mean_reward :  0.0


  0%|          | 1512/2000001 [15:12<256:07:42,  2.17it/s]

buffer size = 3424, epsilon = 0.09924
mean_reward :  0.0


  0%|          | 1513/2000001 [15:13<257:18:02,  2.16it/s]

buffer size = 3426, epsilon = 0.09924
mean_reward :  0.0


  0%|          | 1514/2000001 [15:13<256:34:05,  2.16it/s]

buffer size = 3428, epsilon = 0.09924
mean_reward :  0.0


  0%|          | 1515/2000001 [15:14<308:14:14,  1.80it/s]

buffer size = 3430, epsilon = 0.09924
mean_reward :  0.0


  0%|          | 1516/2000001 [15:15<349:30:30,  1.59it/s]

buffer size = 3432, epsilon = 0.09924
mean_reward :  0.0


  0%|          | 1517/2000001 [15:15<368:25:08,  1.51it/s]

buffer size = 3434, epsilon = 0.09924
mean_reward :  0.0


  0%|          | 1518/2000001 [15:16<336:35:20,  1.65it/s]

buffer size = 3436, epsilon = 0.09924
mean_reward :  0.0


  0%|          | 1519/2000001 [15:16<310:09:21,  1.79it/s]

buffer size = 3438, epsilon = 0.09924
mean_reward :  0.0


  0%|          | 1520/2000001 [15:17<294:54:05,  1.88it/s]

buffer size = 3440, epsilon = 0.09924
mean_reward :  0.0


  0%|          | 1521/2000001 [15:17<282:41:36,  1.96it/s]

buffer size = 3442, epsilon = 0.09924
mean_reward :  0.0


  0%|          | 1522/2000001 [15:18<277:19:04,  2.00it/s]

buffer size = 3444, epsilon = 0.09924
mean_reward :  0.0


  0%|          | 1523/2000001 [15:18<268:13:05,  2.07it/s]

buffer size = 3446, epsilon = 0.09924
mean_reward :  0.0


  0%|          | 1524/2000001 [15:19<262:25:05,  2.12it/s]

buffer size = 3448, epsilon = 0.09924
mean_reward :  0.0


  0%|          | 1525/2000001 [15:19<258:50:27,  2.14it/s]

buffer size = 3450, epsilon = 0.09924
mean_reward :  0.0


  0%|          | 1526/2000001 [15:19<257:27:41,  2.16it/s]

buffer size = 3452, epsilon = 0.09924
mean_reward :  0.0


  0%|          | 1527/2000001 [15:20<255:38:14,  2.17it/s]

buffer size = 3454, epsilon = 0.09924
mean_reward :  0.0


  0%|          | 1528/2000001 [15:20<252:26:57,  2.20it/s]

buffer size = 3456, epsilon = 0.09924
mean_reward :  0.0


  0%|          | 1529/2000001 [15:21<255:05:29,  2.18it/s]

buffer size = 3458, epsilon = 0.09924
mean_reward :  0.0


  0%|          | 1530/2000001 [15:21<256:23:38,  2.17it/s]

buffer size = 3460, epsilon = 0.09924
mean_reward :  0.0


  0%|          | 1531/2000001 [15:22<258:46:52,  2.15it/s]

buffer size = 3462, epsilon = 0.09924
mean_reward :  0.0


  0%|          | 1532/2000001 [15:22<257:55:00,  2.15it/s]

buffer size = 3464, epsilon = 0.09923
mean_reward :  0.0


  0%|          | 1533/2000001 [15:23<256:29:56,  2.16it/s]

buffer size = 3466, epsilon = 0.09923
mean_reward :  0.0


  0%|          | 1534/2000001 [15:23<257:07:12,  2.16it/s]

buffer size = 3468, epsilon = 0.09923
mean_reward :  0.0


  0%|          | 1535/2000001 [15:24<255:42:11,  2.17it/s]

buffer size = 3470, epsilon = 0.09923
mean_reward :  0.0


  0%|          | 1536/2000001 [15:24<257:59:11,  2.15it/s]

buffer size = 3472, epsilon = 0.09923
mean_reward :  0.0


  0%|          | 1537/2000001 [15:25<258:02:21,  2.15it/s]

buffer size = 3474, epsilon = 0.09923
mean_reward :  0.0


  0%|          | 1538/2000001 [15:25<259:14:24,  2.14it/s]

buffer size = 3476, epsilon = 0.09923
mean_reward :  0.0


  0%|          | 1539/2000001 [15:26<296:54:54,  1.87it/s]

buffer size = 3478, epsilon = 0.09923
mean_reward :  0.0


  0%|          | 1540/2000001 [15:27<343:01:28,  1.62it/s]

buffer size = 3480, epsilon = 0.09923
mean_reward :  0.0


  0%|          | 1541/2000001 [15:27<369:13:55,  1.50it/s]

buffer size = 3482, epsilon = 0.09923
mean_reward :  0.0


  0%|          | 1542/2000001 [15:28<350:27:33,  1.58it/s]

buffer size = 3484, epsilon = 0.09923
mean_reward :  0.0


  0%|          | 1543/2000001 [15:28<324:23:46,  1.71it/s]

buffer size = 3486, epsilon = 0.09923
mean_reward :  0.0


  0%|          | 1544/2000001 [15:29<304:00:40,  1.83it/s]

buffer size = 3488, epsilon = 0.09923
mean_reward :  0.0


  0%|          | 1545/2000001 [15:29<290:58:02,  1.91it/s]

buffer size = 3490, epsilon = 0.09923
mean_reward :  0.0


  0%|          | 1546/2000001 [15:30<280:11:56,  1.98it/s]

buffer size = 3492, epsilon = 0.09923
mean_reward :  0.0


  0%|          | 1547/2000001 [15:30<274:48:16,  2.02it/s]

buffer size = 3494, epsilon = 0.09923
mean_reward :  0.0


  0%|          | 1548/2000001 [15:31<267:31:00,  2.08it/s]

buffer size = 3496, epsilon = 0.09923
mean_reward :  0.0


  0%|          | 1549/2000001 [15:31<266:27:36,  2.08it/s]

buffer size = 3498, epsilon = 0.09923
mean_reward :  0.0


  0%|          | 1550/2000001 [15:32<264:01:49,  2.10it/s]

buffer size = 3500, epsilon = 0.09923
mean_reward :  0.0


  0%|          | 1551/2000001 [15:32<261:24:49,  2.12it/s]

buffer size = 3502, epsilon = 0.09923
mean_reward :  0.0


  0%|          | 1552/2000001 [15:33<263:23:27,  2.11it/s]

buffer size = 3504, epsilon = 0.09922
mean_reward :  0.0


  0%|          | 1553/2000001 [15:33<260:34:36,  2.13it/s]

buffer size = 3506, epsilon = 0.09922
mean_reward :  0.0


  0%|          | 1554/2000001 [15:33<259:43:17,  2.14it/s]

buffer size = 3508, epsilon = 0.09922
mean_reward :  0.0


  0%|          | 1555/2000001 [15:34<256:46:56,  2.16it/s]

buffer size = 3510, epsilon = 0.09922
mean_reward :  0.0


  0%|          | 1556/2000001 [15:34<260:05:32,  2.13it/s]

buffer size = 3512, epsilon = 0.09922
mean_reward :  0.0


  0%|          | 1557/2000001 [15:35<259:06:44,  2.14it/s]

buffer size = 3514, epsilon = 0.09922
mean_reward :  0.0


  0%|          | 1558/2000001 [15:35<258:05:22,  2.15it/s]

buffer size = 3516, epsilon = 0.09922
mean_reward :  0.0


  0%|          | 1559/2000001 [15:36<257:14:26,  2.16it/s]

buffer size = 3518, epsilon = 0.09922
mean_reward :  0.0


  0%|          | 1560/2000001 [15:36<258:14:13,  2.15it/s]

buffer size = 3520, epsilon = 0.09922
mean_reward :  0.0


  0%|          | 1561/2000001 [15:37<258:30:44,  2.15it/s]

buffer size = 3522, epsilon = 0.09922
mean_reward :  0.0


  0%|          | 1562/2000001 [15:37<258:33:59,  2.15it/s]

buffer size = 3524, epsilon = 0.09922
mean_reward :  0.0


  0%|          | 1563/2000001 [15:38<266:02:07,  2.09it/s]

buffer size = 3526, epsilon = 0.09922
mean_reward :  0.0


  0%|          | 1564/2000001 [15:38<319:26:54,  1.74it/s]

buffer size = 3528, epsilon = 0.09922
mean_reward :  0.0


  0%|          | 1565/2000001 [15:39<358:10:08,  1.55it/s]

buffer size = 3530, epsilon = 0.09922
mean_reward :  0.0


  0%|          | 1566/2000001 [15:40<364:48:16,  1.52it/s]

buffer size = 3532, epsilon = 0.09922
mean_reward :  0.0


  0%|          | 1567/2000001 [15:40<332:08:12,  1.67it/s]

buffer size = 3534, epsilon = 0.09922
mean_reward :  0.0


  0%|          | 1568/2000001 [15:41<308:40:03,  1.80it/s]

buffer size = 3536, epsilon = 0.09922
mean_reward :  0.0


  0%|          | 1569/2000001 [15:41<290:48:57,  1.91it/s]

buffer size = 3538, epsilon = 0.09922
mean_reward :  0.0


  0%|          | 1570/2000001 [15:42<282:48:04,  1.96it/s]

buffer size = 3540, epsilon = 0.09922
mean_reward :  0.0


  0%|          | 1571/2000001 [15:42<275:39:47,  2.01it/s]

buffer size = 3542, epsilon = 0.09922
mean_reward :  0.0


  0%|          | 1572/2000001 [15:43<271:40:26,  2.04it/s]

buffer size = 3544, epsilon = 0.09921
mean_reward :  0.0


  0%|          | 1573/2000001 [15:43<267:22:24,  2.08it/s]

buffer size = 3546, epsilon = 0.09921
mean_reward :  0.0


  0%|          | 1574/2000001 [15:44<264:42:22,  2.10it/s]

buffer size = 3548, epsilon = 0.09921
mean_reward :  0.0


  0%|          | 1575/2000001 [15:44<261:35:50,  2.12it/s]

buffer size = 3550, epsilon = 0.09921
mean_reward :  0.0


  0%|          | 1576/2000001 [15:45<260:18:09,  2.13it/s]

buffer size = 3552, epsilon = 0.09921
mean_reward :  0.0


  0%|          | 1577/2000001 [15:45<259:13:27,  2.14it/s]

buffer size = 3554, epsilon = 0.09921
mean_reward :  0.0


  0%|          | 1578/2000001 [15:46<261:08:36,  2.13it/s]

buffer size = 3556, epsilon = 0.09921
mean_reward :  0.0


  0%|          | 1579/2000001 [15:46<259:45:50,  2.14it/s]

buffer size = 3558, epsilon = 0.09921
mean_reward :  0.0


  0%|          | 1580/2000001 [15:46<257:19:33,  2.16it/s]

buffer size = 3560, epsilon = 0.09921
mean_reward :  0.0


  0%|          | 1581/2000001 [15:47<257:51:47,  2.15it/s]

buffer size = 3562, epsilon = 0.09921
mean_reward :  0.0


  0%|          | 1582/2000001 [15:47<255:56:26,  2.17it/s]

buffer size = 3564, epsilon = 0.09921
mean_reward :  0.0


  0%|          | 1583/2000001 [15:48<256:25:28,  2.16it/s]

buffer size = 3566, epsilon = 0.09921
mean_reward :  0.0


  0%|          | 1584/2000001 [15:48<255:01:18,  2.18it/s]

buffer size = 3568, epsilon = 0.09921
mean_reward :  0.0


  0%|          | 1585/2000001 [15:49<257:55:03,  2.15it/s]

buffer size = 3570, epsilon = 0.09921
mean_reward :  0.0


  0%|          | 1586/2000001 [15:49<258:18:14,  2.15it/s]

buffer size = 3572, epsilon = 0.09921
mean_reward :  0.0


  0%|          | 1587/2000001 [15:50<260:56:29,  2.13it/s]

buffer size = 3574, epsilon = 0.09921
mean_reward :  0.0


  0%|          | 1588/2000001 [15:50<304:25:40,  1.82it/s]

buffer size = 3576, epsilon = 0.09921
mean_reward :  0.0


  0%|          | 1589/2000001 [15:51<347:37:27,  1.60it/s]

buffer size = 3578, epsilon = 0.09921
mean_reward :  0.0


  0%|          | 1590/2000001 [15:52<377:37:51,  1.47it/s]

buffer size = 3580, epsilon = 0.09921
mean_reward :  0.0


  0%|          | 1591/2000001 [15:53<341:53:38,  1.62it/s]

buffer size = 3582, epsilon = 0.09921
mean_reward :  0.0


  0%|          | 1592/2000001 [15:53<318:59:18,  1.74it/s]

buffer size = 3584, epsilon = 0.09920
mean_reward :  0.0


  0%|          | 1593/2000001 [15:53<298:49:07,  1.86it/s]

buffer size = 3586, epsilon = 0.09920
mean_reward :  0.0


  0%|          | 1594/2000001 [15:54<287:40:17,  1.93it/s]

buffer size = 3588, epsilon = 0.09920
mean_reward :  0.0


  0%|          | 1595/2000001 [15:54<279:21:10,  1.99it/s]

buffer size = 3590, epsilon = 0.09920
mean_reward :  0.0


  0%|          | 1596/2000001 [15:55<272:34:26,  2.04it/s]

buffer size = 3592, epsilon = 0.09920
mean_reward :  0.0


  0%|          | 1597/2000001 [15:55<269:24:26,  2.06it/s]

buffer size = 3594, epsilon = 0.09920
mean_reward :  0.0


  0%|          | 1598/2000001 [15:56<264:19:01,  2.10it/s]

buffer size = 3596, epsilon = 0.09920
mean_reward :  0.0


  0%|          | 1599/2000001 [15:56<263:21:10,  2.11it/s]

buffer size = 3598, epsilon = 0.09920
mean_reward :  0.0


  0%|          | 1600/2000001 [15:57<261:59:43,  2.12it/s]

buffer size = 3600, epsilon = 0.09920
mean_reward :  0.0


  0%|          | 1601/2000001 [15:57<260:52:11,  2.13it/s]

buffer size = 3602, epsilon = 0.09920
mean_reward :  0.0


  0%|          | 1602/2000001 [15:58<259:32:33,  2.14it/s]

buffer size = 3604, epsilon = 0.09920
mean_reward :  0.0


  0%|          | 1603/2000001 [15:58<259:52:55,  2.14it/s]

buffer size = 3606, epsilon = 0.09920
mean_reward :  0.0


  0%|          | 1604/2000001 [15:59<260:42:47,  2.13it/s]

buffer size = 3608, epsilon = 0.09920
mean_reward :  0.0


  0%|          | 1605/2000001 [15:59<260:21:58,  2.13it/s]

buffer size = 3610, epsilon = 0.09920
mean_reward :  0.0


  0%|          | 1606/2000001 [16:00<260:20:20,  2.13it/s]

buffer size = 3612, epsilon = 0.09920
mean_reward :  0.0


  0%|          | 1607/2000001 [16:00<259:12:55,  2.14it/s]

buffer size = 3614, epsilon = 0.09920
mean_reward :  0.0


  0%|          | 1608/2000001 [16:00<259:00:06,  2.14it/s]

buffer size = 3616, epsilon = 0.09920
mean_reward :  0.0


  0%|          | 1609/2000001 [16:01<257:20:47,  2.16it/s]

buffer size = 3618, epsilon = 0.09920
mean_reward :  0.0


  0%|          | 1610/2000001 [16:01<257:14:45,  2.16it/s]

buffer size = 3620, epsilon = 0.09920
mean_reward :  0.0


  0%|          | 1611/2000001 [16:02<256:35:52,  2.16it/s]

buffer size = 3622, epsilon = 0.09920
mean_reward :  0.0


  0%|          | 1612/2000001 [16:03<301:56:59,  1.84it/s]

buffer size = 3624, epsilon = 0.09919
mean_reward :  0.0


  0%|          | 1613/2000001 [16:03<353:18:38,  1.57it/s]

buffer size = 3626, epsilon = 0.09919
mean_reward :  0.0


  0%|          | 1614/2000001 [16:04<374:21:38,  1.48it/s]

buffer size = 3628, epsilon = 0.09919
mean_reward :  0.0


  0%|          | 1615/2000001 [16:05<344:56:13,  1.61it/s]

buffer size = 3630, epsilon = 0.09919
mean_reward :  0.0


  0%|          | 1616/2000001 [16:05<315:46:26,  1.76it/s]

buffer size = 3632, epsilon = 0.09919
mean_reward :  0.0


  0%|          | 1617/2000001 [16:06<298:18:21,  1.86it/s]

buffer size = 3634, epsilon = 0.09919
mean_reward :  0.0


  0%|          | 1618/2000001 [16:06<285:27:22,  1.94it/s]

buffer size = 3636, epsilon = 0.09919
mean_reward :  0.0


  0%|          | 1619/2000001 [16:07<280:42:55,  1.98it/s]

buffer size = 3638, epsilon = 0.09919
mean_reward :  0.0


  0%|          | 1620/2000001 [16:07<275:19:27,  2.02it/s]

buffer size = 3640, epsilon = 0.09919
mean_reward :  0.0


  0%|          | 1621/2000001 [16:08<271:34:30,  2.04it/s]

buffer size = 3642, epsilon = 0.09919
mean_reward :  0.0


  0%|          | 1622/2000001 [16:08<266:48:38,  2.08it/s]

buffer size = 3644, epsilon = 0.09919
mean_reward :  0.0


  0%|          | 1623/2000001 [16:08<264:00:48,  2.10it/s]

buffer size = 3646, epsilon = 0.09919
mean_reward :  0.0


  0%|          | 1624/2000001 [16:09<263:59:10,  2.10it/s]

buffer size = 3648, epsilon = 0.09919
mean_reward :  0.0


  0%|          | 1625/2000001 [16:09<264:51:23,  2.10it/s]

buffer size = 3650, epsilon = 0.09919
mean_reward :  0.0


  0%|          | 1626/2000001 [16:10<264:54:37,  2.10it/s]

buffer size = 3652, epsilon = 0.09919
mean_reward :  0.0


  0%|          | 1627/2000001 [16:10<261:26:26,  2.12it/s]

buffer size = 3654, epsilon = 0.09919
mean_reward :  0.0


  0%|          | 1628/2000001 [16:11<262:27:01,  2.12it/s]

buffer size = 3656, epsilon = 0.09919
mean_reward :  0.0


  0%|          | 1629/2000001 [16:11<261:20:21,  2.12it/s]

buffer size = 3658, epsilon = 0.09919
mean_reward :  0.0


  0%|          | 1630/2000001 [16:12<263:52:24,  2.10it/s]

buffer size = 3660, epsilon = 0.09919
mean_reward :  0.0


  0%|          | 1631/2000001 [16:12<261:54:20,  2.12it/s]

buffer size = 3662, epsilon = 0.09919
mean_reward :  0.0


  0%|          | 1632/2000001 [16:13<263:42:18,  2.11it/s]

buffer size = 3664, epsilon = 0.09918
mean_reward :  0.0


  0%|          | 1633/2000001 [16:13<262:17:33,  2.12it/s]

buffer size = 3666, epsilon = 0.09918
mean_reward :  0.0


  0%|          | 1634/2000001 [16:14<262:26:56,  2.12it/s]

buffer size = 3668, epsilon = 0.09918
mean_reward :  0.0


  0%|          | 1635/2000001 [16:14<262:02:11,  2.12it/s]

buffer size = 3670, epsilon = 0.09918
mean_reward :  0.0


  0%|          | 1636/2000001 [16:15<311:48:34,  1.78it/s]

buffer size = 3672, epsilon = 0.09918
mean_reward :  0.0


  0%|          | 1637/2000001 [16:16<346:43:56,  1.60it/s]

buffer size = 3674, epsilon = 0.09918
mean_reward :  0.0


  0%|          | 1638/2000001 [16:16<373:52:31,  1.48it/s]

buffer size = 3676, epsilon = 0.09918
mean_reward :  0.0


  0%|          | 1639/2000001 [16:17<340:16:32,  1.63it/s]

buffer size = 3678, epsilon = 0.09918
mean_reward :  0.0


  0%|          | 1640/2000001 [16:17<315:59:14,  1.76it/s]

buffer size = 3680, epsilon = 0.09918
mean_reward :  0.0


  0%|          | 1641/2000001 [16:18<296:25:40,  1.87it/s]

buffer size = 3682, epsilon = 0.09918
mean_reward :  0.0


  0%|          | 1642/2000001 [16:18<286:56:34,  1.93it/s]

buffer size = 3684, epsilon = 0.09918
mean_reward :  0.0


  0%|          | 1643/2000001 [16:19<276:45:52,  2.01it/s]

buffer size = 3686, epsilon = 0.09918
mean_reward :  0.0


  0%|          | 1644/2000001 [16:19<272:28:39,  2.04it/s]

buffer size = 3688, epsilon = 0.09918
mean_reward :  0.0


  0%|          | 1645/2000001 [16:20<274:11:12,  2.02it/s]

buffer size = 3690, epsilon = 0.09918
mean_reward :  0.0


  0%|          | 1646/2000001 [16:20<268:33:14,  2.07it/s]

buffer size = 3692, epsilon = 0.09918
mean_reward :  0.0


  0%|          | 1647/2000001 [16:21<267:49:01,  2.07it/s]

buffer size = 3694, epsilon = 0.09918
mean_reward :  0.0


  0%|          | 1648/2000001 [16:21<264:43:03,  2.10it/s]

buffer size = 3696, epsilon = 0.09918
mean_reward :  0.0


  0%|          | 1649/2000001 [16:22<262:24:17,  2.12it/s]

buffer size = 3698, epsilon = 0.09918
mean_reward :  0.0


  0%|          | 1650/2000001 [16:22<262:17:34,  2.12it/s]

buffer size = 3700, epsilon = 0.09918
mean_reward :  0.0


  0%|          | 1651/2000001 [16:23<258:48:06,  2.14it/s]

buffer size = 3702, epsilon = 0.09917
mean_reward :  0.0


  0%|          | 1652/2000001 [16:23<260:15:35,  2.13it/s]

buffer size = 3704, epsilon = 0.09917
mean_reward :  0.0


  0%|          | 1653/2000001 [16:23<260:40:29,  2.13it/s]

buffer size = 3706, epsilon = 0.09917
mean_reward :  0.0


  0%|          | 1654/2000001 [16:24<259:41:22,  2.14it/s]

buffer size = 3708, epsilon = 0.09917
mean_reward :  0.0


  0%|          | 1655/2000001 [16:24<262:32:34,  2.11it/s]

buffer size = 3710, epsilon = 0.09917
mean_reward :  0.0


  0%|          | 1656/2000001 [16:25<260:02:51,  2.13it/s]

buffer size = 3712, epsilon = 0.09917
mean_reward :  0.0


  0%|          | 1657/2000001 [16:25<260:02:49,  2.13it/s]

buffer size = 3714, epsilon = 0.09917
mean_reward :  0.0


  0%|          | 1658/2000001 [16:26<259:50:24,  2.14it/s]

buffer size = 3716, epsilon = 0.09917
mean_reward :  0.0


  0%|          | 1659/2000001 [16:26<259:45:45,  2.14it/s]

buffer size = 3718, epsilon = 0.09917
mean_reward :  0.0


  0%|          | 1660/2000001 [16:27<297:30:06,  1.87it/s]

buffer size = 3720, epsilon = 0.09917
mean_reward :  0.0


  0%|          | 1661/2000001 [16:28<345:05:52,  1.61it/s]

buffer size = 3722, epsilon = 0.09917
mean_reward :  0.0


  0%|          | 1662/2000001 [16:29<376:18:09,  1.48it/s]

buffer size = 3724, epsilon = 0.09917
mean_reward :  0.0


  0%|          | 1663/2000001 [16:29<347:42:06,  1.60it/s]

buffer size = 3726, epsilon = 0.09917
mean_reward :  0.0


  0%|          | 1664/2000001 [16:30<322:01:59,  1.72it/s]

buffer size = 3728, epsilon = 0.09917
mean_reward :  0.0


  0%|          | 1665/2000001 [16:30<304:51:54,  1.82it/s]

buffer size = 3730, epsilon = 0.09917
mean_reward :  0.0


  0%|          | 1666/2000001 [16:31<293:24:21,  1.89it/s]

buffer size = 3732, epsilon = 0.09917
mean_reward :  0.0


  0%|          | 1667/2000001 [16:31<283:48:07,  1.96it/s]

buffer size = 3734, epsilon = 0.09917
mean_reward :  0.0


  0%|          | 1668/2000001 [16:31<276:39:56,  2.01it/s]

buffer size = 3736, epsilon = 0.09917
mean_reward :  0.0


  0%|          | 1669/2000001 [16:32<271:14:31,  2.05it/s]

buffer size = 3738, epsilon = 0.09917
mean_reward :  0.0


  0%|          | 1670/2000001 [16:32<269:44:23,  2.06it/s]

buffer size = 3740, epsilon = 0.09917
mean_reward :  0.0


  0%|          | 1671/2000001 [16:33<267:31:10,  2.07it/s]

buffer size = 3742, epsilon = 0.09917
mean_reward :  0.0


  0%|          | 1672/2000001 [16:33<264:46:27,  2.10it/s]

buffer size = 3744, epsilon = 0.09916
mean_reward :  0.0


  0%|          | 1673/2000001 [16:34<263:50:48,  2.10it/s]

buffer size = 3746, epsilon = 0.09916
mean_reward :  0.0


  0%|          | 1674/2000001 [16:34<263:06:17,  2.11it/s]

buffer size = 3748, epsilon = 0.09916
mean_reward :  0.0


  0%|          | 1675/2000001 [16:35<265:02:46,  2.09it/s]

buffer size = 3750, epsilon = 0.09916
mean_reward :  0.0


  0%|          | 1676/2000001 [16:35<260:59:40,  2.13it/s]

buffer size = 3752, epsilon = 0.09916
mean_reward :  0.0


  0%|          | 1677/2000001 [16:36<262:05:51,  2.12it/s]

buffer size = 3754, epsilon = 0.09916
mean_reward :  0.0


  0%|          | 1678/2000001 [16:36<261:36:10,  2.12it/s]

buffer size = 3756, epsilon = 0.09916
mean_reward :  0.0


  0%|          | 1679/2000001 [16:37<265:01:00,  2.09it/s]

buffer size = 3758, epsilon = 0.09916
mean_reward :  0.0


  0%|          | 1680/2000001 [16:37<263:28:35,  2.11it/s]

buffer size = 3760, epsilon = 0.09916
mean_reward :  0.0


  0%|          | 1681/2000001 [16:38<262:36:52,  2.11it/s]

buffer size = 3762, epsilon = 0.09916
mean_reward :  0.0


  0%|          | 1682/2000001 [16:38<261:00:30,  2.13it/s]

buffer size = 3764, epsilon = 0.09916
mean_reward :  0.0


  0%|          | 1683/2000001 [16:39<260:17:18,  2.13it/s]

buffer size = 3766, epsilon = 0.09916
mean_reward :  0.0


  0%|          | 1684/2000001 [16:39<295:58:12,  1.88it/s]

buffer size = 3768, epsilon = 0.09916
mean_reward :  0.0


  0%|          | 1685/2000001 [16:40<345:38:01,  1.61it/s]

buffer size = 3770, epsilon = 0.09916
mean_reward :  0.0


  0%|          | 1686/2000001 [16:41<376:45:13,  1.47it/s]

buffer size = 3772, epsilon = 0.09916
mean_reward :  0.0


  0%|          | 1687/2000001 [16:41<341:15:03,  1.63it/s]

buffer size = 3774, epsilon = 0.09916
mean_reward :  0.0


  0%|          | 1688/2000001 [16:42<317:47:34,  1.75it/s]

buffer size = 3776, epsilon = 0.09916
mean_reward :  0.0


  0%|          | 1689/2000001 [16:42<300:06:43,  1.85it/s]

buffer size = 3778, epsilon = 0.09916
mean_reward :  0.0


  0%|          | 1690/2000001 [16:43<288:31:12,  1.92it/s]

buffer size = 3780, epsilon = 0.09916
mean_reward :  0.0


  0%|          | 1691/2000001 [16:43<281:44:06,  1.97it/s]

buffer size = 3782, epsilon = 0.09916
mean_reward :  0.0


  0%|          | 1692/2000001 [16:44<276:20:19,  2.01it/s]

buffer size = 3784, epsilon = 0.09915
mean_reward :  0.0


  0%|          | 1693/2000001 [16:44<271:30:58,  2.04it/s]

buffer size = 3786, epsilon = 0.09915
mean_reward :  0.0


  0%|          | 1694/2000001 [16:45<266:46:07,  2.08it/s]

buffer size = 3788, epsilon = 0.09915
mean_reward :  0.0


  0%|          | 1695/2000001 [16:45<268:19:05,  2.07it/s]

buffer size = 3790, epsilon = 0.09915
mean_reward :  0.0


  0%|          | 1696/2000001 [16:46<270:25:10,  2.05it/s]

buffer size = 3792, epsilon = 0.09915
mean_reward :  0.0


  0%|          | 1697/2000001 [16:46<266:58:23,  2.08it/s]

buffer size = 3794, epsilon = 0.09915
mean_reward :  0.0


  0%|          | 1698/2000001 [16:47<263:47:36,  2.10it/s]

buffer size = 3796, epsilon = 0.09915
mean_reward :  0.0


  0%|          | 1699/2000001 [16:47<265:39:55,  2.09it/s]

buffer size = 3798, epsilon = 0.09915
mean_reward :  0.0


  0%|          | 1700/2000001 [16:47<262:25:08,  2.12it/s]

buffer size = 3800, epsilon = 0.09915
mean_reward :  0.0


  0%|          | 1701/2000001 [16:48<262:41:06,  2.11it/s]

buffer size = 3802, epsilon = 0.09915
mean_reward :  0.0


  0%|          | 1702/2000001 [16:48<262:03:50,  2.12it/s]

buffer size = 3804, epsilon = 0.09915
mean_reward :  0.0


  0%|          | 1703/2000001 [16:49<261:43:34,  2.12it/s]

buffer size = 3806, epsilon = 0.09915
mean_reward :  0.0


  0%|          | 1704/2000001 [16:49<265:07:08,  2.09it/s]

buffer size = 3808, epsilon = 0.09915
mean_reward :  0.0


  0%|          | 1705/2000001 [16:50<261:48:56,  2.12it/s]

buffer size = 3810, epsilon = 0.09915
mean_reward :  0.0


  0%|          | 1706/2000001 [16:50<262:32:03,  2.11it/s]

buffer size = 3812, epsilon = 0.09915
mean_reward :  0.0


  0%|          | 1707/2000001 [16:51<260:20:50,  2.13it/s]

buffer size = 3814, epsilon = 0.09915
mean_reward :  0.0


  0%|          | 1708/2000001 [16:52<311:29:24,  1.78it/s]

buffer size = 3816, epsilon = 0.09915
mean_reward :  0.0


  0%|          | 1709/2000001 [16:52<354:47:13,  1.56it/s]

buffer size = 3818, epsilon = 0.09915
mean_reward :  0.0


  0%|          | 1710/2000001 [16:53<373:38:41,  1.49it/s]

buffer size = 3820, epsilon = 0.09915
mean_reward :  0.0


  0%|          | 1711/2000001 [16:54<339:07:42,  1.64it/s]

buffer size = 3822, epsilon = 0.09915
mean_reward :  0.0


  0%|          | 1712/2000001 [16:54<320:07:33,  1.73it/s]

buffer size = 3824, epsilon = 0.09914
mean_reward :  0.0


  0%|          | 1713/2000001 [16:55<303:35:58,  1.83it/s]

buffer size = 3826, epsilon = 0.09914
mean_reward :  0.0


  0%|          | 1714/2000001 [16:55<290:53:43,  1.91it/s]

buffer size = 3828, epsilon = 0.09914
mean_reward :  0.0


  0%|          | 1715/2000001 [16:56<282:14:13,  1.97it/s]

buffer size = 3830, epsilon = 0.09914
mean_reward :  0.0


  0%|          | 1716/2000001 [16:56<275:39:33,  2.01it/s]

buffer size = 3832, epsilon = 0.09914
mean_reward :  0.0


  0%|          | 1717/2000001 [16:56<271:18:23,  2.05it/s]

buffer size = 3834, epsilon = 0.09914
mean_reward :  0.0


  0%|          | 1718/2000001 [16:57<268:38:36,  2.07it/s]

buffer size = 3836, epsilon = 0.09914
mean_reward :  0.0


  0%|          | 1719/2000001 [16:57<268:36:31,  2.07it/s]

buffer size = 3838, epsilon = 0.09914
mean_reward :  0.0


  0%|          | 1720/2000001 [16:58<265:17:22,  2.09it/s]

buffer size = 3840, epsilon = 0.09914
mean_reward :  0.0


  0%|          | 1721/2000001 [16:58<261:30:45,  2.12it/s]

buffer size = 3842, epsilon = 0.09914
mean_reward :  0.0


  0%|          | 1722/2000001 [16:59<263:40:32,  2.11it/s]

buffer size = 3844, epsilon = 0.09914
mean_reward :  0.0


  0%|          | 1723/2000001 [16:59<260:57:43,  2.13it/s]

buffer size = 3846, epsilon = 0.09914
mean_reward :  0.0


  0%|          | 1724/2000001 [17:00<263:01:59,  2.11it/s]

buffer size = 3848, epsilon = 0.09914
mean_reward :  0.0


  0%|          | 1725/2000001 [17:00<261:11:17,  2.13it/s]

buffer size = 3850, epsilon = 0.09914
mean_reward :  0.0


  0%|          | 1726/2000001 [17:01<260:46:53,  2.13it/s]

buffer size = 3852, epsilon = 0.09914
mean_reward :  0.0


  0%|          | 1727/2000001 [17:01<259:34:27,  2.14it/s]

buffer size = 3854, epsilon = 0.09914
mean_reward :  0.0


  0%|          | 1728/2000001 [17:02<260:28:47,  2.13it/s]

buffer size = 3856, epsilon = 0.09914
mean_reward :  0.0


  0%|          | 1729/2000001 [17:02<260:55:41,  2.13it/s]

buffer size = 3858, epsilon = 0.09914
mean_reward :  0.0


  0%|          | 1730/2000001 [17:03<261:00:56,  2.13it/s]

buffer size = 3860, epsilon = 0.09914
mean_reward :  0.0


  0%|          | 1731/2000001 [17:03<260:33:01,  2.13it/s]

buffer size = 3862, epsilon = 0.09914
mean_reward :  0.0


  0%|          | 1732/2000001 [17:04<310:19:04,  1.79it/s]

buffer size = 3864, epsilon = 0.09913
mean_reward :  0.0


  0%|          | 1733/2000001 [17:05<358:40:56,  1.55it/s]

buffer size = 3866, epsilon = 0.09913
mean_reward :  0.0


  0%|          | 1734/2000001 [17:05<377:46:38,  1.47it/s]

buffer size = 3868, epsilon = 0.09913
mean_reward :  0.0


  0%|          | 1735/2000001 [17:06<345:31:10,  1.61it/s]

buffer size = 3870, epsilon = 0.09913
mean_reward :  0.0


  0%|          | 1736/2000001 [17:06<318:54:53,  1.74it/s]

buffer size = 3872, epsilon = 0.09913
mean_reward :  0.0


  0%|          | 1737/2000001 [17:07<302:49:57,  1.83it/s]

buffer size = 3874, epsilon = 0.09913
mean_reward :  0.0


  0%|          | 1738/2000001 [17:07<289:53:23,  1.91it/s]

buffer size = 3876, epsilon = 0.09913
mean_reward :  0.0


  0%|          | 1739/2000001 [17:08<282:46:04,  1.96it/s]

buffer size = 3878, epsilon = 0.09913
mean_reward :  0.0


  0%|          | 1740/2000001 [17:08<274:31:51,  2.02it/s]

buffer size = 3880, epsilon = 0.09913
mean_reward :  0.0


  0%|          | 1741/2000001 [17:09<270:29:24,  2.05it/s]

buffer size = 3882, epsilon = 0.09913
mean_reward :  0.0


  0%|          | 1742/2000001 [17:09<270:51:08,  2.05it/s]

buffer size = 3884, epsilon = 0.09913
mean_reward :  0.0


  0%|          | 1743/2000001 [17:10<268:13:27,  2.07it/s]

buffer size = 3886, epsilon = 0.09913
mean_reward :  0.0


  0%|          | 1744/2000001 [17:10<268:45:58,  2.07it/s]

buffer size = 3888, epsilon = 0.09913
mean_reward :  0.0


  0%|          | 1745/2000001 [17:11<265:08:23,  2.09it/s]

buffer size = 3890, epsilon = 0.09913
mean_reward :  0.0


  0%|          | 1746/2000001 [17:11<266:36:55,  2.08it/s]

buffer size = 3892, epsilon = 0.09913
mean_reward :  0.0


  0%|          | 1747/2000001 [17:12<262:01:07,  2.12it/s]

buffer size = 3894, epsilon = 0.09913
mean_reward :  0.0


  0%|          | 1748/2000001 [17:12<262:05:19,  2.12it/s]

buffer size = 3896, epsilon = 0.09913
mean_reward :  0.0


  0%|          | 1749/2000001 [17:13<259:52:00,  2.14it/s]

buffer size = 3898, epsilon = 0.09913
mean_reward :  0.0


  0%|          | 1750/2000001 [17:13<260:17:29,  2.13it/s]

buffer size = 3900, epsilon = 0.09913
mean_reward :  0.0


  0%|          | 1751/2000001 [17:13<260:18:07,  2.13it/s]

buffer size = 3902, epsilon = 0.09913
mean_reward :  0.0


  0%|          | 1752/2000001 [17:14<260:18:06,  2.13it/s]

buffer size = 3904, epsilon = 0.09912
mean_reward :  0.0


  0%|          | 1753/2000001 [17:14<258:51:14,  2.14it/s]

buffer size = 3906, epsilon = 0.09912
mean_reward :  0.0


  0%|          | 1754/2000001 [17:15<257:28:36,  2.16it/s]

buffer size = 3908, epsilon = 0.09912
mean_reward :  0.0


  0%|          | 1755/2000001 [17:15<261:23:12,  2.12it/s]

buffer size = 3910, epsilon = 0.09912
mean_reward :  0.0


  0%|          | 1756/2000001 [17:16<314:27:44,  1.77it/s]

buffer size = 3912, epsilon = 0.09912
mean_reward :  0.0


  0%|          | 1757/2000001 [17:17<349:38:27,  1.59it/s]

buffer size = 3914, epsilon = 0.09912
mean_reward :  0.0


  0%|          | 1758/2000001 [17:18<374:17:28,  1.48it/s]

buffer size = 3916, epsilon = 0.09912
mean_reward :  0.0


  0%|          | 1759/2000001 [17:18<341:27:13,  1.63it/s]

buffer size = 3918, epsilon = 0.09912
mean_reward :  0.0


  0%|          | 1760/2000001 [17:19<315:34:35,  1.76it/s]

buffer size = 3920, epsilon = 0.09912
mean_reward :  0.0


  0%|          | 1761/2000001 [17:19<301:22:06,  1.84it/s]

buffer size = 3922, epsilon = 0.09912
mean_reward :  0.0


  0%|          | 1762/2000001 [17:20<292:06:46,  1.90it/s]

buffer size = 3924, epsilon = 0.09912
mean_reward :  0.0


  0%|          | 1763/2000001 [17:20<283:15:17,  1.96it/s]

buffer size = 3926, epsilon = 0.09912
mean_reward :  0.0


  0%|          | 1764/2000001 [17:21<276:44:28,  2.01it/s]

buffer size = 3928, epsilon = 0.09912
mean_reward :  0.0


  0%|          | 1765/2000001 [17:21<270:43:31,  2.05it/s]

buffer size = 3930, epsilon = 0.09912
mean_reward :  0.0


  0%|          | 1766/2000001 [17:21<269:36:25,  2.06it/s]

buffer size = 3932, epsilon = 0.09912
mean_reward :  0.0


  0%|          | 1767/2000001 [17:22<264:37:14,  2.10it/s]

buffer size = 3934, epsilon = 0.09912
mean_reward :  0.0


  0%|          | 1768/2000001 [17:22<264:30:52,  2.10it/s]

buffer size = 3936, epsilon = 0.09912
mean_reward :  0.0


  0%|          | 1769/2000001 [17:23<262:03:53,  2.12it/s]

buffer size = 3938, epsilon = 0.09912
mean_reward :  0.0


  0%|          | 1770/2000001 [17:23<262:50:22,  2.11it/s]

buffer size = 3940, epsilon = 0.09912
mean_reward :  0.0


  0%|          | 1771/2000001 [17:24<261:26:53,  2.12it/s]

buffer size = 3942, epsilon = 0.09912
mean_reward :  0.0


  0%|          | 1772/2000001 [17:24<262:15:07,  2.12it/s]

buffer size = 3944, epsilon = 0.09911
mean_reward :  0.0


  0%|          | 1773/2000001 [17:25<263:32:05,  2.11it/s]

buffer size = 3946, epsilon = 0.09911
mean_reward :  0.0


  0%|          | 1774/2000001 [17:25<261:28:02,  2.12it/s]

buffer size = 3948, epsilon = 0.09911
mean_reward :  0.0


  0%|          | 1775/2000001 [17:26<263:21:16,  2.11it/s]

buffer size = 3950, epsilon = 0.09911
mean_reward :  0.0


  0%|          | 1776/2000001 [17:26<261:48:58,  2.12it/s]

buffer size = 3952, epsilon = 0.09911
mean_reward :  0.0


  0%|          | 1777/2000001 [17:27<262:22:31,  2.12it/s]

buffer size = 3954, epsilon = 0.09911
mean_reward :  0.0


  0%|          | 1778/2000001 [17:27<260:43:52,  2.13it/s]

buffer size = 3956, epsilon = 0.09911
mean_reward :  0.0


  0%|          | 1779/2000001 [17:28<260:44:53,  2.13it/s]

buffer size = 3958, epsilon = 0.09911
mean_reward :  0.0


  0%|          | 1780/2000001 [17:28<308:22:19,  1.80it/s]

buffer size = 3960, epsilon = 0.09911
mean_reward :  0.0


  0%|          | 1781/2000001 [17:29<346:59:20,  1.60it/s]

buffer size = 3962, epsilon = 0.09911
mean_reward :  0.0


  0%|          | 1782/2000001 [17:30<374:10:23,  1.48it/s]

buffer size = 3964, epsilon = 0.09911
mean_reward :  0.0


  0%|          | 1783/2000001 [17:30<340:06:45,  1.63it/s]

buffer size = 3966, epsilon = 0.09911
mean_reward :  0.0


  0%|          | 1784/2000001 [17:31<316:59:09,  1.75it/s]

buffer size = 3968, epsilon = 0.09911
mean_reward :  0.0


  0%|          | 1785/2000001 [17:31<299:31:19,  1.85it/s]

buffer size = 3970, epsilon = 0.09911
mean_reward :  0.0


  0%|          | 1786/2000001 [17:32<287:13:50,  1.93it/s]

buffer size = 3972, epsilon = 0.09911
mean_reward :  0.0


  0%|          | 1787/2000001 [17:32<280:54:50,  1.98it/s]

buffer size = 3974, epsilon = 0.09911
mean_reward :  0.0


  0%|          | 1788/2000001 [17:33<275:12:43,  2.02it/s]

buffer size = 3976, epsilon = 0.09911
mean_reward :  0.0


  0%|          | 1789/2000001 [17:33<269:49:17,  2.06it/s]

buffer size = 3978, epsilon = 0.09911
mean_reward :  0.0


  0%|          | 1790/2000001 [17:34<265:40:11,  2.09it/s]

buffer size = 3980, epsilon = 0.09911
mean_reward :  0.0


  0%|          | 1791/2000001 [17:34<265:36:26,  2.09it/s]

buffer size = 3982, epsilon = 0.09910
mean_reward :  0.0


  0%|          | 1792/2000001 [17:35<265:37:43,  2.09it/s]

buffer size = 3984, epsilon = 0.09910
mean_reward :  0.0


  0%|          | 1793/2000001 [17:35<264:43:17,  2.10it/s]

buffer size = 3986, epsilon = 0.09910
mean_reward :  0.0


  0%|          | 1794/2000001 [17:36<261:20:51,  2.12it/s]

buffer size = 3988, epsilon = 0.09910
mean_reward :  0.0


  0%|          | 1795/2000001 [17:36<262:55:35,  2.11it/s]

buffer size = 3990, epsilon = 0.09910
mean_reward :  0.0


  0%|          | 1796/2000001 [17:37<264:29:32,  2.10it/s]

buffer size = 3992, epsilon = 0.09910
mean_reward :  0.0


  0%|          | 1797/2000001 [17:37<263:28:49,  2.11it/s]

buffer size = 3994, epsilon = 0.09910
mean_reward :  0.0


  0%|          | 1798/2000001 [17:37<262:00:52,  2.12it/s]

buffer size = 3996, epsilon = 0.09910
mean_reward :  0.0


  0%|          | 1799/2000001 [17:38<262:44:49,  2.11it/s]

buffer size = 3998, epsilon = 0.09910
mean_reward :  0.0


  0%|          | 1800/2000001 [17:38<262:03:47,  2.12it/s]

buffer size = 4000, epsilon = 0.09910
mean_reward :  0.0


  0%|          | 1801/2000001 [17:39<261:06:24,  2.13it/s]

buffer size = 4002, epsilon = 0.09910
mean_reward :  0.0


  0%|          | 1802/2000001 [17:39<264:19:09,  2.10it/s]

buffer size = 4004, epsilon = 0.09910
mean_reward :  0.0


  0%|          | 1803/2000001 [17:40<262:02:05,  2.12it/s]

buffer size = 4006, epsilon = 0.09910
mean_reward :  0.0


  0%|          | 1804/2000001 [17:41<312:47:38,  1.77it/s]

buffer size = 4008, epsilon = 0.09910
mean_reward :  0.0


  0%|          | 1805/2000001 [17:41<350:42:16,  1.58it/s]

buffer size = 4010, epsilon = 0.09910
mean_reward :  0.0


  0%|          | 1806/2000001 [17:42<370:54:55,  1.50it/s]

buffer size = 4012, epsilon = 0.09910
mean_reward :  0.0


  0%|          | 1807/2000001 [17:43<338:31:49,  1.64it/s]

buffer size = 4014, epsilon = 0.09910
mean_reward :  0.0


  0%|          | 1808/2000001 [17:43<317:16:44,  1.75it/s]

buffer size = 4016, epsilon = 0.09910
mean_reward :  0.0


  0%|          | 1809/2000001 [17:44<301:14:40,  1.84it/s]

buffer size = 4018, epsilon = 0.09910
mean_reward :  0.0


  0%|          | 1810/2000001 [17:44<289:48:11,  1.92it/s]

buffer size = 4020, epsilon = 0.09910
mean_reward :  0.0


  0%|          | 1811/2000001 [17:45<281:36:10,  1.97it/s]

buffer size = 4022, epsilon = 0.09910
mean_reward :  0.0


  0%|          | 1812/2000001 [17:45<274:45:45,  2.02it/s]

buffer size = 4024, epsilon = 0.09909
mean_reward :  0.0


  0%|          | 1813/2000001 [17:45<272:27:59,  2.04it/s]

buffer size = 4026, epsilon = 0.09909
mean_reward :  0.0


  0%|          | 1814/2000001 [17:46<272:00:24,  2.04it/s]

buffer size = 4028, epsilon = 0.09909
mean_reward :  0.0


  0%|          | 1815/2000001 [17:46<268:50:04,  2.06it/s]

buffer size = 4030, epsilon = 0.09909
mean_reward :  0.0


  0%|          | 1816/2000001 [17:47<266:51:59,  2.08it/s]

buffer size = 4032, epsilon = 0.09909
mean_reward :  0.0


  0%|          | 1817/2000001 [17:47<266:08:43,  2.09it/s]

buffer size = 4034, epsilon = 0.09909
mean_reward :  0.0


  0%|          | 1818/2000001 [17:48<262:28:49,  2.11it/s]

buffer size = 4036, epsilon = 0.09909
mean_reward :  0.0


  0%|          | 1819/2000001 [17:48<266:06:20,  2.09it/s]

buffer size = 4038, epsilon = 0.09909
mean_reward :  0.0


  0%|          | 1820/2000001 [17:49<264:29:31,  2.10it/s]

buffer size = 4040, epsilon = 0.09909
mean_reward :  0.0


  0%|          | 1821/2000001 [17:49<266:23:51,  2.08it/s]

buffer size = 4042, epsilon = 0.09909
mean_reward :  0.0


  0%|          | 1822/2000001 [17:50<267:57:29,  2.07it/s]

buffer size = 4044, epsilon = 0.09909
mean_reward :  0.0


  0%|          | 1823/2000001 [17:50<263:12:55,  2.11it/s]

buffer size = 4046, epsilon = 0.09909
mean_reward :  0.0


  0%|          | 1824/2000001 [17:51<263:33:11,  2.11it/s]

buffer size = 4048, epsilon = 0.09909
mean_reward :  0.0


  0%|          | 1825/2000001 [17:51<262:07:57,  2.12it/s]

buffer size = 4050, epsilon = 0.09909
mean_reward :  0.0


  0%|          | 1826/2000001 [17:52<265:24:50,  2.09it/s]

buffer size = 4052, epsilon = 0.09909
mean_reward :  0.0


  0%|          | 1827/2000001 [17:52<267:20:13,  2.08it/s]

buffer size = 4054, epsilon = 0.09909
mean_reward :  0.0


  0%|          | 1828/2000001 [17:53<317:41:41,  1.75it/s]

buffer size = 4056, epsilon = 0.09909
mean_reward :  0.0


  0%|          | 1829/2000001 [17:54<356:54:37,  1.56it/s]

buffer size = 4058, epsilon = 0.09909
mean_reward :  0.0


  0%|          | 1830/2000001 [17:54<372:11:01,  1.49it/s]

buffer size = 4060, epsilon = 0.09909
mean_reward :  0.0


  0%|          | 1831/2000001 [17:55<339:03:26,  1.64it/s]

buffer size = 4062, epsilon = 0.09909
mean_reward :  0.0


  0%|          | 1832/2000001 [17:55<314:05:57,  1.77it/s]

buffer size = 4064, epsilon = 0.09908
mean_reward :  0.0


  0%|          | 1833/2000001 [17:56<298:09:28,  1.86it/s]

buffer size = 4066, epsilon = 0.09908
mean_reward :  0.0


  0%|          | 1834/2000001 [17:56<286:06:40,  1.94it/s]

buffer size = 4068, epsilon = 0.09908
mean_reward :  0.0


  0%|          | 1835/2000001 [17:57<278:33:24,  1.99it/s]

buffer size = 4070, epsilon = 0.09908
mean_reward :  0.0


  0%|          | 1836/2000001 [17:57<273:07:38,  2.03it/s]

buffer size = 4072, epsilon = 0.09908
mean_reward :  0.0


  0%|          | 1837/2000001 [17:58<270:54:12,  2.05it/s]

buffer size = 4074, epsilon = 0.09908
mean_reward :  0.0


  0%|          | 1838/2000001 [17:58<266:43:17,  2.08it/s]

buffer size = 4076, epsilon = 0.09908
mean_reward :  0.0


  0%|          | 1839/2000001 [17:59<271:50:42,  2.04it/s]

buffer size = 4078, epsilon = 0.09908
mean_reward :  0.0


  0%|          | 1840/2000001 [17:59<268:19:32,  2.07it/s]

buffer size = 4080, epsilon = 0.09908
mean_reward :  0.0


  0%|          | 1841/2000001 [18:00<267:13:18,  2.08it/s]

buffer size = 4082, epsilon = 0.09908
mean_reward :  0.0


  0%|          | 1842/2000001 [18:00<265:57:51,  2.09it/s]

buffer size = 4084, epsilon = 0.09908
mean_reward :  0.0


  0%|          | 1843/2000001 [18:01<265:36:17,  2.09it/s]

buffer size = 4086, epsilon = 0.09908
mean_reward :  0.0


  0%|          | 1844/2000001 [18:01<263:31:41,  2.11it/s]

buffer size = 4088, epsilon = 0.09908
mean_reward :  0.0


  0%|          | 1845/2000001 [18:02<261:24:03,  2.12it/s]

buffer size = 4090, epsilon = 0.09908
mean_reward :  0.0


  0%|          | 1846/2000001 [18:02<263:13:03,  2.11it/s]

buffer size = 4092, epsilon = 0.09908
mean_reward :  0.0


  0%|          | 1847/2000001 [18:03<264:23:08,  2.10it/s]

buffer size = 4094, epsilon = 0.09908
mean_reward :  0.0


  0%|          | 1848/2000001 [18:03<265:10:11,  2.09it/s]

buffer size = 4096, epsilon = 0.09908
mean_reward :  0.0


  0%|          | 1849/2000001 [18:03<263:34:34,  2.11it/s]

buffer size = 4098, epsilon = 0.09908
mean_reward :  0.0


  0%|          | 1850/2000001 [18:04<264:55:09,  2.10it/s]

buffer size = 4100, epsilon = 0.09908
mean_reward :  0.0


  0%|          | 1851/2000001 [18:04<271:05:58,  2.05it/s]

buffer size = 4102, epsilon = 0.09908
mean_reward :  0.0


  0%|          | 1852/2000001 [18:05<322:30:31,  1.72it/s]

buffer size = 4104, epsilon = 0.09907
mean_reward :  0.0


  0%|          | 1853/2000001 [18:06<356:19:14,  1.56it/s]

buffer size = 4106, epsilon = 0.09907
mean_reward :  0.0


  0%|          | 1854/2000001 [18:07<373:54:15,  1.48it/s]

buffer size = 4108, epsilon = 0.09907
mean_reward :  0.0


  0%|          | 1855/2000001 [18:07<342:55:02,  1.62it/s]

buffer size = 4110, epsilon = 0.09907
mean_reward :  0.0


  0%|          | 1856/2000001 [18:08<318:06:27,  1.74it/s]

buffer size = 4112, epsilon = 0.09907
mean_reward :  0.0


  0%|          | 1857/2000001 [18:08<301:57:02,  1.84it/s]

buffer size = 4114, epsilon = 0.09907
mean_reward :  0.0


  0%|          | 1858/2000001 [18:09<291:07:06,  1.91it/s]

buffer size = 4116, epsilon = 0.09907
mean_reward :  0.0


  0%|          | 1859/2000001 [18:09<287:02:40,  1.93it/s]

buffer size = 4118, epsilon = 0.09907
mean_reward :  0.0


  0%|          | 1860/2000001 [18:10<281:27:21,  1.97it/s]

buffer size = 4120, epsilon = 0.09907
mean_reward :  0.0


  0%|          | 1861/2000001 [18:10<280:15:06,  1.98it/s]

buffer size = 4122, epsilon = 0.09907
mean_reward :  0.0


  0%|          | 1862/2000001 [18:11<275:27:48,  2.01it/s]

buffer size = 4124, epsilon = 0.09907
mean_reward :  0.0


  0%|          | 1863/2000001 [18:11<272:27:17,  2.04it/s]

buffer size = 4126, epsilon = 0.09907
mean_reward :  0.0


  0%|          | 1864/2000001 [18:12<268:46:01,  2.07it/s]

buffer size = 4128, epsilon = 0.09907
mean_reward :  0.0


  0%|          | 1865/2000001 [18:12<267:17:04,  2.08it/s]

buffer size = 4130, epsilon = 0.09907
mean_reward :  0.0


  0%|          | 1866/2000001 [18:13<264:27:42,  2.10it/s]

buffer size = 4132, epsilon = 0.09907
mean_reward :  0.0


  0%|          | 1867/2000001 [18:13<261:36:57,  2.12it/s]

buffer size = 4134, epsilon = 0.09907
mean_reward :  0.0


  0%|          | 1868/2000001 [18:13<260:20:40,  2.13it/s]

buffer size = 4136, epsilon = 0.09907
mean_reward :  0.0


  0%|          | 1869/2000001 [18:14<260:37:19,  2.13it/s]

buffer size = 4138, epsilon = 0.09907
mean_reward :  0.0


  0%|          | 1870/2000001 [18:14<262:11:58,  2.12it/s]

buffer size = 4140, epsilon = 0.09907
mean_reward :  0.0


  0%|          | 1871/2000001 [18:15<261:06:52,  2.13it/s]

buffer size = 4142, epsilon = 0.09907
mean_reward :  0.0


  0%|          | 1872/2000001 [18:15<265:01:38,  2.09it/s]

buffer size = 4144, epsilon = 0.09906
mean_reward :  0.0


  0%|          | 1873/2000001 [18:16<262:35:10,  2.11it/s]

buffer size = 4146, epsilon = 0.09906
mean_reward :  0.0


  0%|          | 1874/2000001 [18:16<266:28:40,  2.08it/s]

buffer size = 4148, epsilon = 0.09906
mean_reward :  0.0


  0%|          | 1875/2000001 [18:17<274:40:25,  2.02it/s]

buffer size = 4150, epsilon = 0.09906
mean_reward :  0.0


  0%|          | 1876/2000001 [18:18<325:39:39,  1.70it/s]

buffer size = 4152, epsilon = 0.09906
mean_reward :  0.0


  0%|          | 1877/2000001 [18:18<359:28:49,  1.54it/s]

buffer size = 4154, epsilon = 0.09906
mean_reward :  0.0


  0%|          | 1878/2000001 [18:19<365:24:24,  1.52it/s]

buffer size = 4156, epsilon = 0.09906
mean_reward :  0.0


  0%|          | 1879/2000001 [18:20<337:49:49,  1.64it/s]

buffer size = 4158, epsilon = 0.09906
mean_reward :  0.0


  0%|          | 1880/2000001 [18:20<315:57:29,  1.76it/s]

buffer size = 4160, epsilon = 0.09906
mean_reward :  0.0


  0%|          | 1881/2000001 [18:21<300:32:52,  1.85it/s]

buffer size = 4162, epsilon = 0.09906
mean_reward :  0.0


  0%|          | 1882/2000001 [18:21<289:16:23,  1.92it/s]

buffer size = 4164, epsilon = 0.09906
mean_reward :  0.0


  0%|          | 1883/2000001 [18:22<283:32:00,  1.96it/s]

buffer size = 4166, epsilon = 0.09906
mean_reward :  0.0


  0%|          | 1884/2000001 [18:22<278:06:50,  2.00it/s]

buffer size = 4168, epsilon = 0.09906
mean_reward :  0.0


  0%|          | 1885/2000001 [18:23<274:34:57,  2.02it/s]

buffer size = 4170, epsilon = 0.09906
mean_reward :  0.0


  0%|          | 1886/2000001 [18:23<271:02:37,  2.05it/s]

buffer size = 4172, epsilon = 0.09906
mean_reward :  0.0


  0%|          | 1887/2000001 [18:23<268:17:40,  2.07it/s]

buffer size = 4174, epsilon = 0.09906
mean_reward :  0.0


  0%|          | 1888/2000001 [18:24<266:45:54,  2.08it/s]

buffer size = 4176, epsilon = 0.09906
mean_reward :  0.0


  0%|          | 1889/2000001 [18:24<265:03:20,  2.09it/s]

buffer size = 4178, epsilon = 0.09906
mean_reward :  0.0


  0%|          | 1890/2000001 [18:25<266:28:59,  2.08it/s]

buffer size = 4180, epsilon = 0.09906
mean_reward :  0.0


  0%|          | 1891/2000001 [18:25<264:42:36,  2.10it/s]

buffer size = 4182, epsilon = 0.09906
mean_reward :  0.0


  0%|          | 1892/2000001 [18:26<265:03:14,  2.09it/s]

buffer size = 4184, epsilon = 0.09905
mean_reward :  0.0


  0%|          | 1893/2000001 [18:26<263:34:31,  2.11it/s]

buffer size = 4186, epsilon = 0.09905
mean_reward :  0.0


  0%|          | 1894/2000001 [18:27<264:52:10,  2.10it/s]

buffer size = 4188, epsilon = 0.09905
mean_reward :  0.0


  0%|          | 1895/2000001 [18:27<264:26:17,  2.10it/s]

buffer size = 4190, epsilon = 0.09905
mean_reward :  0.0


  0%|          | 1896/2000001 [18:28<264:56:58,  2.09it/s]

buffer size = 4192, epsilon = 0.09905
mean_reward :  0.0


  0%|          | 1897/2000001 [18:28<264:13:22,  2.10it/s]

buffer size = 4194, epsilon = 0.09905
mean_reward :  0.0


  0%|          | 1898/2000001 [18:29<265:45:29,  2.09it/s]

buffer size = 4196, epsilon = 0.09905
mean_reward :  0.0


  0%|          | 1899/2000001 [18:29<288:49:37,  1.92it/s]

buffer size = 4198, epsilon = 0.09905
mean_reward :  0.0


  0%|          | 1900/2000001 [18:30<339:49:48,  1.63it/s]

buffer size = 4200, epsilon = 0.09905
mean_reward :  0.0


  0%|          | 1901/2000001 [18:31<374:22:40,  1.48it/s]

buffer size = 4202, epsilon = 0.09905
mean_reward :  0.0


  0%|          | 1902/2000001 [18:32<356:05:33,  1.56it/s]

buffer size = 4204, epsilon = 0.09905
mean_reward :  0.0


  0%|          | 1903/2000001 [18:32<329:03:34,  1.69it/s]

buffer size = 4206, epsilon = 0.09905
mean_reward :  0.0


  0%|          | 1904/2000001 [18:32<308:26:04,  1.80it/s]

buffer size = 4208, epsilon = 0.09905
mean_reward :  0.0


  0%|          | 1905/2000001 [18:33<296:36:02,  1.87it/s]

buffer size = 4210, epsilon = 0.09905
mean_reward :  0.0


  0%|          | 1906/2000001 [18:33<286:08:11,  1.94it/s]

buffer size = 4212, epsilon = 0.09905
mean_reward :  0.0


  0%|          | 1907/2000001 [18:34<282:42:51,  1.96it/s]

buffer size = 4214, epsilon = 0.09905
mean_reward :  0.0


  0%|          | 1908/2000001 [18:34<278:36:36,  1.99it/s]

buffer size = 4216, epsilon = 0.09905
mean_reward :  0.0


  0%|          | 1909/2000001 [18:35<276:02:49,  2.01it/s]

buffer size = 4218, epsilon = 0.09905
mean_reward :  0.0


  0%|          | 1910/2000001 [18:35<271:03:49,  2.05it/s]

buffer size = 4220, epsilon = 0.09905
mean_reward :  0.0


  0%|          | 1911/2000001 [18:36<265:40:22,  2.09it/s]

buffer size = 4222, epsilon = 0.09905
mean_reward :  0.0


  0%|          | 1912/2000001 [18:36<266:37:18,  2.08it/s]

buffer size = 4224, epsilon = 0.09904
mean_reward :  0.0


  0%|          | 1913/2000001 [18:37<267:41:51,  2.07it/s]

buffer size = 4226, epsilon = 0.09904
mean_reward :  0.0


  0%|          | 1914/2000001 [18:37<267:41:55,  2.07it/s]

buffer size = 4228, epsilon = 0.09904
mean_reward :  0.0


  0%|          | 1915/2000001 [18:38<264:45:52,  2.10it/s]

buffer size = 4230, epsilon = 0.09904
mean_reward :  0.0


  0%|          | 1916/2000001 [18:38<265:46:15,  2.09it/s]

buffer size = 4232, epsilon = 0.09904
mean_reward :  0.0


  0%|          | 1917/2000001 [18:39<265:27:47,  2.09it/s]

buffer size = 4234, epsilon = 0.09904
mean_reward :  0.0


  0%|          | 1918/2000001 [18:39<268:54:26,  2.06it/s]

buffer size = 4236, epsilon = 0.09904
mean_reward :  0.0


  0%|          | 1919/2000001 [18:40<267:49:06,  2.07it/s]

buffer size = 4238, epsilon = 0.09904
mean_reward :  0.0


  0%|          | 1920/2000001 [18:40<268:46:27,  2.07it/s]

buffer size = 4240, epsilon = 0.09904
mean_reward :  0.0


  0%|          | 1921/2000001 [18:41<266:19:39,  2.08it/s]

buffer size = 4242, epsilon = 0.09904
mean_reward :  0.0


  0%|          | 1922/2000001 [18:41<268:48:03,  2.06it/s]

buffer size = 4244, epsilon = 0.09904
mean_reward :  0.0


  0%|          | 1923/2000001 [18:42<306:54:19,  1.81it/s]

buffer size = 4246, epsilon = 0.09904
mean_reward :  0.0


  0%|          | 1924/2000001 [18:43<348:51:51,  1.59it/s]

buffer size = 4248, epsilon = 0.09904
mean_reward :  0.0


  0%|          | 1925/2000001 [18:43<379:37:40,  1.46it/s]

buffer size = 4250, epsilon = 0.09904
mean_reward :  0.0


  0%|          | 1926/2000001 [18:44<354:01:13,  1.57it/s]

buffer size = 4252, epsilon = 0.09904
mean_reward :  0.0


  0%|          | 1927/2000001 [18:44<329:31:44,  1.68it/s]

buffer size = 4254, epsilon = 0.09904
mean_reward :  0.0


  0%|          | 1928/2000001 [18:45<308:55:21,  1.80it/s]

buffer size = 4256, epsilon = 0.09904
mean_reward :  0.0


  0%|          | 1929/2000001 [18:45<298:29:10,  1.86it/s]

buffer size = 4258, epsilon = 0.09904
mean_reward :  0.0


  0%|          | 1930/2000001 [18:46<289:22:09,  1.92it/s]

buffer size = 4260, epsilon = 0.09904
mean_reward :  0.0


  0%|          | 1931/2000001 [18:46<287:20:07,  1.93it/s]

buffer size = 4262, epsilon = 0.09904
mean_reward :  0.0


  0%|          | 1932/2000001 [18:47<279:31:22,  1.99it/s]

buffer size = 4264, epsilon = 0.09903
mean_reward :  0.0


  0%|          | 1933/2000001 [18:47<275:24:02,  2.02it/s]

buffer size = 4266, epsilon = 0.09903
mean_reward :  0.0


  0%|          | 1934/2000001 [18:48<272:16:05,  2.04it/s]

buffer size = 4268, epsilon = 0.09903
mean_reward :  0.0


  0%|          | 1935/2000001 [18:48<272:59:59,  2.03it/s]

buffer size = 4270, epsilon = 0.09903
mean_reward :  0.0


  0%|          | 1936/2000001 [18:49<270:43:07,  2.05it/s]

buffer size = 4272, epsilon = 0.09903
mean_reward :  0.0


  0%|          | 1937/2000001 [18:49<274:04:13,  2.03it/s]

buffer size = 4274, epsilon = 0.09903
mean_reward :  0.0


  0%|          | 1938/2000001 [18:50<270:08:30,  2.05it/s]

buffer size = 4276, epsilon = 0.09903
mean_reward :  0.0


  0%|          | 1939/2000001 [18:50<266:55:16,  2.08it/s]

buffer size = 4278, epsilon = 0.09903
mean_reward :  0.0


  0%|          | 1940/2000001 [18:51<268:56:11,  2.06it/s]

buffer size = 4280, epsilon = 0.09903
mean_reward :  0.0


  0%|          | 1941/2000001 [18:51<270:52:56,  2.05it/s]

buffer size = 4282, epsilon = 0.09903
mean_reward :  0.0


  0%|          | 1942/2000001 [18:52<268:32:52,  2.07it/s]

buffer size = 4284, epsilon = 0.09903
mean_reward :  0.0


  0%|          | 1943/2000001 [18:52<266:46:26,  2.08it/s]

buffer size = 4286, epsilon = 0.09903
mean_reward :  0.0


  0%|          | 1944/2000001 [18:53<266:50:40,  2.08it/s]

buffer size = 4288, epsilon = 0.09903
mean_reward :  0.0


  0%|          | 1945/2000001 [18:53<268:34:53,  2.07it/s]

buffer size = 4290, epsilon = 0.09903
mean_reward :  0.0


  0%|          | 1946/2000001 [18:54<270:10:25,  2.05it/s]

buffer size = 4292, epsilon = 0.09903
mean_reward :  0.0


  0%|          | 1947/2000001 [18:54<322:26:39,  1.72it/s]

buffer size = 4294, epsilon = 0.09903
mean_reward :  0.0


  0%|          | 1948/2000001 [18:55<362:47:00,  1.53it/s]

buffer size = 4296, epsilon = 0.09903
mean_reward :  0.0


  0%|          | 1949/2000001 [18:56<381:50:53,  1.45it/s]

buffer size = 4298, epsilon = 0.09903
mean_reward :  0.0


  0%|          | 1950/2000001 [18:57<345:49:52,  1.60it/s]

buffer size = 4300, epsilon = 0.09903
mean_reward :  0.0


  0%|          | 1951/2000001 [18:57<324:21:56,  1.71it/s]

buffer size = 4302, epsilon = 0.09903
mean_reward :  0.0


  0%|          | 1952/2000001 [18:58<304:46:55,  1.82it/s]

buffer size = 4304, epsilon = 0.09902
mean_reward :  0.0


  0%|          | 1953/2000001 [18:58<293:57:23,  1.89it/s]

buffer size = 4306, epsilon = 0.09902
mean_reward :  0.0


  0%|          | 1954/2000001 [18:58<285:36:40,  1.94it/s]

buffer size = 4308, epsilon = 0.09902
mean_reward :  0.0


  0%|          | 1955/2000001 [18:59<285:07:02,  1.95it/s]

buffer size = 4310, epsilon = 0.09902
mean_reward :  0.0


  0%|          | 1956/2000001 [18:59<279:03:50,  1.99it/s]

buffer size = 4312, epsilon = 0.09902
mean_reward :  0.0


  0%|          | 1957/2000001 [19:00<274:42:15,  2.02it/s]

buffer size = 4314, epsilon = 0.09902
mean_reward :  0.0


  0%|          | 1958/2000001 [19:00<271:10:15,  2.05it/s]

buffer size = 4316, epsilon = 0.09902
mean_reward :  0.0


  0%|          | 1959/2000001 [19:01<270:42:24,  2.05it/s]

buffer size = 4318, epsilon = 0.09902
mean_reward :  0.0


  0%|          | 1960/2000001 [19:01<267:39:08,  2.07it/s]

buffer size = 4320, epsilon = 0.09902
mean_reward :  0.0


  0%|          | 1961/2000001 [19:02<267:26:36,  2.08it/s]

buffer size = 4322, epsilon = 0.09902
mean_reward :  0.0


  0%|          | 1962/2000001 [19:02<267:35:30,  2.07it/s]

buffer size = 4324, epsilon = 0.09902
mean_reward :  0.0


  0%|          | 1963/2000001 [19:03<265:04:24,  2.09it/s]

buffer size = 4326, epsilon = 0.09902
mean_reward :  0.0


  0%|          | 1964/2000001 [19:03<266:51:00,  2.08it/s]

buffer size = 4328, epsilon = 0.09902
mean_reward :  0.0


  0%|          | 1965/2000001 [19:04<264:19:29,  2.10it/s]

buffer size = 4330, epsilon = 0.09902
mean_reward :  0.0


  0%|          | 1966/2000001 [19:04<268:13:44,  2.07it/s]

buffer size = 4332, epsilon = 0.09902
mean_reward :  0.0


  0%|          | 1967/2000001 [19:05<267:32:55,  2.07it/s]

buffer size = 4334, epsilon = 0.09902
mean_reward :  0.0


  0%|          | 1968/2000001 [19:05<270:34:03,  2.05it/s]

buffer size = 4336, epsilon = 0.09902
mean_reward :  0.0


  0%|          | 1969/2000001 [19:06<268:18:12,  2.07it/s]

buffer size = 4338, epsilon = 0.09902
mean_reward :  0.0


  0%|          | 1970/2000001 [19:06<301:36:29,  1.84it/s]

buffer size = 4340, epsilon = 0.09902
mean_reward :  0.0


  0%|          | 1971/2000001 [19:07<352:53:08,  1.57it/s]

buffer size = 4342, epsilon = 0.09902
mean_reward :  0.0


  0%|          | 1972/2000001 [19:08<379:00:33,  1.46it/s]

buffer size = 4344, epsilon = 0.09901
mean_reward :  0.0


  0%|          | 1973/2000001 [19:09<353:33:59,  1.57it/s]

buffer size = 4346, epsilon = 0.09901
mean_reward :  0.0


  0%|          | 1974/2000001 [19:09<326:28:36,  1.70it/s]

buffer size = 4348, epsilon = 0.09901
mean_reward :  0.0


  0%|          | 1975/2000001 [19:10<309:46:26,  1.79it/s]

buffer size = 4350, epsilon = 0.09901
mean_reward :  0.0


  0%|          | 1976/2000001 [19:10<294:31:54,  1.88it/s]

buffer size = 4352, epsilon = 0.09901
mean_reward :  0.0


  0%|          | 1977/2000001 [19:10<287:22:34,  1.93it/s]

buffer size = 4354, epsilon = 0.09901
mean_reward :  0.0


  0%|          | 1978/2000001 [19:11<278:45:57,  1.99it/s]

buffer size = 4356, epsilon = 0.09901
mean_reward :  0.0


  0%|          | 1979/2000001 [19:11<275:31:32,  2.01it/s]

buffer size = 4358, epsilon = 0.09901
mean_reward :  0.0


  0%|          | 1980/2000001 [19:12<270:42:29,  2.05it/s]

buffer size = 4360, epsilon = 0.09901
mean_reward :  0.0


  0%|          | 1981/2000001 [19:12<271:14:07,  2.05it/s]

buffer size = 4362, epsilon = 0.09901
mean_reward :  0.0


  0%|          | 1982/2000001 [19:13<267:28:03,  2.08it/s]

buffer size = 4364, epsilon = 0.09901
mean_reward :  0.0


  0%|          | 1983/2000001 [19:13<266:04:43,  2.09it/s]

buffer size = 4366, epsilon = 0.09901
mean_reward :  0.0


  0%|          | 1984/2000001 [19:14<266:24:04,  2.08it/s]

buffer size = 4368, epsilon = 0.09901
mean_reward :  0.0


  0%|          | 1985/2000001 [19:14<267:08:39,  2.08it/s]

buffer size = 4370, epsilon = 0.09901
mean_reward :  0.0


  0%|          | 1986/2000001 [19:15<265:00:03,  2.09it/s]

buffer size = 4372, epsilon = 0.09901
mean_reward :  0.0


  0%|          | 1987/2000001 [19:15<265:43:47,  2.09it/s]

buffer size = 4374, epsilon = 0.09901
mean_reward :  0.0


  0%|          | 1988/2000001 [19:16<266:08:13,  2.09it/s]

buffer size = 4376, epsilon = 0.09901
mean_reward :  0.0


  0%|          | 1989/2000001 [19:16<268:10:15,  2.07it/s]

buffer size = 4378, epsilon = 0.09901
mean_reward :  0.0


  0%|          | 1990/2000001 [19:17<268:46:44,  2.06it/s]

buffer size = 4380, epsilon = 0.09901
mean_reward :  0.0


  0%|          | 1991/2000001 [19:17<266:55:46,  2.08it/s]

buffer size = 4382, epsilon = 0.09901
mean_reward :  0.0


  0%|          | 1992/2000001 [19:18<265:47:06,  2.09it/s]

buffer size = 4384, epsilon = 0.09900
mean_reward :  0.0


  0%|          | 1993/2000001 [19:18<265:15:15,  2.09it/s]

buffer size = 4386, epsilon = 0.09900
mean_reward :  0.0


  0%|          | 1994/2000001 [19:19<308:07:25,  1.80it/s]

buffer size = 4388, epsilon = 0.09900
mean_reward :  0.0


  0%|          | 1995/2000001 [19:20<358:34:01,  1.55it/s]

buffer size = 4390, epsilon = 0.09900
mean_reward :  0.0


  0%|          | 1996/2000001 [19:20<380:07:47,  1.46it/s]

buffer size = 4392, epsilon = 0.09900
mean_reward :  0.0


  0%|          | 1997/2000001 [19:21<345:19:26,  1.61it/s]

buffer size = 4394, epsilon = 0.09900
mean_reward :  0.0


  0%|          | 1998/2000001 [19:21<323:43:58,  1.71it/s]

buffer size = 4396, epsilon = 0.09900
mean_reward :  0.0


  0%|          | 1999/2000001 [19:22<308:20:07,  1.80it/s]

buffer size = 4398, epsilon = 0.09900
mean_reward :  0.0


  0%|          | 2000/2000001 [19:22<294:50:44,  1.88it/s]

buffer size = 4400, epsilon = 0.09900
mean_reward :  0.0


  0%|          | 2001/2000001 [19:23<288:06:40,  1.93it/s]

buffer size = 4402, epsilon = 0.09900
mean_reward :  0.0


  0%|          | 2002/2000001 [19:23<279:11:10,  1.99it/s]

buffer size = 4404, epsilon = 0.09900
mean_reward :  0.0


  0%|          | 2003/2000001 [19:24<275:45:21,  2.01it/s]

buffer size = 4406, epsilon = 0.09900
mean_reward :  0.0


  0%|          | 2004/2000001 [19:24<273:18:24,  2.03it/s]

buffer size = 4408, epsilon = 0.09900
mean_reward :  0.0


  0%|          | 2005/2000001 [19:25<272:15:41,  2.04it/s]

buffer size = 4410, epsilon = 0.09900
mean_reward :  0.0


  0%|          | 2006/2000001 [19:25<269:38:48,  2.06it/s]

buffer size = 4412, epsilon = 0.09900
mean_reward :  0.0


  0%|          | 2007/2000001 [19:26<271:18:08,  2.05it/s]

buffer size = 4414, epsilon = 0.09900
mean_reward :  0.0


  0%|          | 2008/2000001 [19:26<269:10:54,  2.06it/s]

buffer size = 4416, epsilon = 0.09900
mean_reward :  0.0


  0%|          | 2009/2000001 [19:27<297:59:22,  1.86it/s]

buffer size = 4418, epsilon = 0.09900
mean_reward :  0.0


  0%|          | 2010/2000001 [19:28<303:03:05,  1.83it/s]

buffer size = 4420, epsilon = 0.09900
mean_reward :  0.0


  0%|          | 2011/2000001 [19:28<296:14:17,  1.87it/s]

buffer size = 4422, epsilon = 0.09899
mean_reward :  0.0


  0%|          | 2012/2000001 [19:29<289:12:33,  1.92it/s]

buffer size = 4424, epsilon = 0.09899
mean_reward :  0.0


  0%|          | 2013/2000001 [19:29<285:58:24,  1.94it/s]

buffer size = 4426, epsilon = 0.09899
mean_reward :  0.0


  0%|          | 2014/2000001 [19:29<278:17:57,  1.99it/s]

buffer size = 4428, epsilon = 0.09899
mean_reward :  0.0


  0%|          | 2015/2000001 [19:30<277:44:02,  2.00it/s]

buffer size = 4430, epsilon = 0.09899
mean_reward :  0.0


  0%|          | 2016/2000001 [19:30<273:30:46,  2.03it/s]

buffer size = 4432, epsilon = 0.09899
mean_reward :  0.0


  0%|          | 2017/2000001 [19:31<323:39:56,  1.71it/s]

buffer size = 4434, epsilon = 0.09899
mean_reward :  0.0


  0%|          | 2018/2000001 [19:32<367:19:47,  1.51it/s]

buffer size = 4436, epsilon = 0.09899
mean_reward :  0.0


  0%|          | 2019/2000001 [19:33<384:47:08,  1.44it/s]

buffer size = 4438, epsilon = 0.09899
mean_reward :  0.0


  0%|          | 2020/2000001 [19:33<349:57:53,  1.59it/s]

buffer size = 4440, epsilon = 0.09899
mean_reward :  0.0


  0%|          | 2021/2000001 [19:34<325:30:51,  1.70it/s]

buffer size = 4442, epsilon = 0.09899
mean_reward :  0.0


  0%|          | 2022/2000001 [19:34<310:04:32,  1.79it/s]

buffer size = 4444, epsilon = 0.09899
mean_reward :  0.0


  0%|          | 2023/2000001 [19:35<298:20:38,  1.86it/s]

buffer size = 4446, epsilon = 0.09899
mean_reward :  0.0


  0%|          | 2024/2000001 [19:35<288:34:08,  1.92it/s]

buffer size = 4448, epsilon = 0.09899
mean_reward :  0.0


  0%|          | 2025/2000001 [19:36<280:35:07,  1.98it/s]

buffer size = 4450, epsilon = 0.09899
mean_reward :  0.0


  0%|          | 2026/2000001 [19:36<280:03:53,  1.98it/s]

buffer size = 4452, epsilon = 0.09899
mean_reward :  0.0


  0%|          | 2027/2000001 [19:37<279:50:34,  1.98it/s]

buffer size = 4454, epsilon = 0.09899
mean_reward :  0.0


  0%|          | 2028/2000001 [19:37<277:07:13,  2.00it/s]

buffer size = 4456, epsilon = 0.09899
mean_reward :  0.0


  0%|          | 2029/2000001 [19:38<275:09:13,  2.02it/s]

buffer size = 4458, epsilon = 0.09899
mean_reward :  0.0


  0%|          | 2030/2000001 [19:38<276:25:00,  2.01it/s]

buffer size = 4460, epsilon = 0.09899
mean_reward :  0.0


  0%|          | 2031/2000001 [19:39<273:11:05,  2.03it/s]

buffer size = 4462, epsilon = 0.09899
mean_reward :  0.0


  0%|          | 2032/2000001 [19:39<275:01:58,  2.02it/s]

buffer size = 4464, epsilon = 0.09898
mean_reward :  0.0


  0%|          | 2033/2000001 [19:40<273:00:20,  2.03it/s]

buffer size = 4466, epsilon = 0.09898
mean_reward :  0.0


  0%|          | 2034/2000001 [19:40<273:08:34,  2.03it/s]

buffer size = 4468, epsilon = 0.09898
mean_reward :  0.0


  0%|          | 2035/2000001 [19:41<271:55:19,  2.04it/s]

buffer size = 4470, epsilon = 0.09898
mean_reward :  0.0


  0%|          | 2036/2000001 [19:41<271:36:25,  2.04it/s]

buffer size = 4472, epsilon = 0.09898
mean_reward :  0.0


  0%|          | 2037/2000001 [19:42<271:40:13,  2.04it/s]

buffer size = 4474, epsilon = 0.09898
mean_reward :  0.0


  0%|          | 2038/2000001 [19:42<271:33:31,  2.04it/s]

buffer size = 4476, epsilon = 0.09898
mean_reward :  0.0


  0%|          | 2039/2000001 [19:43<272:36:57,  2.04it/s]

buffer size = 4478, epsilon = 0.09898
mean_reward :  0.0


  0%|          | 2040/2000001 [19:43<316:36:46,  1.75it/s]

buffer size = 4480, epsilon = 0.09898
mean_reward :  0.0


  0%|          | 2041/2000001 [19:44<354:18:13,  1.57it/s]

buffer size = 4482, epsilon = 0.09898
mean_reward :  0.0


  0%|          | 2042/2000001 [19:45<382:11:39,  1.45it/s]

buffer size = 4484, epsilon = 0.09898
mean_reward :  0.0


  0%|          | 2043/2000001 [19:46<357:05:12,  1.55it/s]

buffer size = 4486, epsilon = 0.09898
mean_reward :  0.0


  0%|          | 2044/2000001 [19:46<329:26:34,  1.68it/s]

buffer size = 4488, epsilon = 0.09898
mean_reward :  0.0


  0%|          | 2045/2000001 [19:47<312:59:18,  1.77it/s]

buffer size = 4490, epsilon = 0.09898
mean_reward :  0.0


  0%|          | 2046/2000001 [19:47<299:19:06,  1.85it/s]

buffer size = 4492, epsilon = 0.09898
mean_reward :  0.0


  0%|          | 2047/2000001 [19:47<288:08:10,  1.93it/s]

buffer size = 4494, epsilon = 0.09898
mean_reward :  0.0


  0%|          | 2048/2000001 [19:48<283:37:17,  1.96it/s]

buffer size = 4496, epsilon = 0.09898
mean_reward :  0.0


  0%|          | 2049/2000001 [19:48<279:05:59,  1.99it/s]

buffer size = 4498, epsilon = 0.09898
mean_reward :  0.0


  0%|          | 2050/2000001 [19:49<276:23:56,  2.01it/s]

buffer size = 4500, epsilon = 0.09898
mean_reward :  0.0


  0%|          | 2051/2000001 [19:49<276:07:07,  2.01it/s]

buffer size = 4502, epsilon = 0.09898
mean_reward :  0.0


  0%|          | 2052/2000001 [19:50<274:04:34,  2.02it/s]

buffer size = 4504, epsilon = 0.09897
mean_reward :  0.0


  0%|          | 2053/2000001 [19:50<271:23:05,  2.05it/s]

buffer size = 4506, epsilon = 0.09897
mean_reward :  0.0


  0%|          | 2054/2000001 [19:51<270:01:16,  2.06it/s]

buffer size = 4508, epsilon = 0.09897
mean_reward :  0.0


  0%|          | 2055/2000001 [19:51<270:24:34,  2.05it/s]

buffer size = 4510, epsilon = 0.09897
mean_reward :  0.0


  0%|          | 2056/2000001 [19:52<271:30:14,  2.04it/s]

buffer size = 4512, epsilon = 0.09897
mean_reward :  0.0


  0%|          | 2057/2000001 [19:52<271:10:24,  2.05it/s]

buffer size = 4514, epsilon = 0.09897
mean_reward :  0.0


  0%|          | 2058/2000001 [19:53<270:52:45,  2.05it/s]

buffer size = 4516, epsilon = 0.09897
mean_reward :  0.0


  0%|          | 2059/2000001 [19:53<269:52:14,  2.06it/s]

buffer size = 4518, epsilon = 0.09897
mean_reward :  0.0


  0%|          | 2060/2000001 [19:54<270:17:42,  2.05it/s]

buffer size = 4520, epsilon = 0.09897
mean_reward :  0.0


  0%|          | 2061/2000001 [19:54<272:44:50,  2.03it/s]

buffer size = 4522, epsilon = 0.09897
mean_reward :  0.0


  0%|          | 2062/2000001 [19:55<275:33:22,  2.01it/s]

buffer size = 4524, epsilon = 0.09897
mean_reward :  0.0


  0%|          | 2063/2000001 [19:55<279:10:56,  1.99it/s]

buffer size = 4526, epsilon = 0.09897
mean_reward :  0.0


  0%|          | 2064/2000001 [19:56<328:21:38,  1.69it/s]

buffer size = 4528, epsilon = 0.09897
mean_reward :  0.0


  0%|          | 2065/2000001 [19:57<367:02:13,  1.51it/s]

buffer size = 4530, epsilon = 0.09897
mean_reward :  0.0


  0%|          | 2066/2000001 [19:58<372:57:36,  1.49it/s]

buffer size = 4532, epsilon = 0.09897
mean_reward :  0.0


  0%|          | 2067/2000001 [19:58<344:53:22,  1.61it/s]

buffer size = 4534, epsilon = 0.09897
mean_reward :  0.0


  0%|          | 2068/2000001 [19:59<323:49:15,  1.71it/s]

buffer size = 4536, epsilon = 0.09897
mean_reward :  0.0


  0%|          | 2069/2000001 [19:59<308:08:39,  1.80it/s]

buffer size = 4538, epsilon = 0.09897
mean_reward :  0.0


  0%|          | 2070/2000001 [20:00<295:06:08,  1.88it/s]

buffer size = 4540, epsilon = 0.09897
mean_reward :  0.0


  0%|          | 2071/2000001 [20:00<289:33:28,  1.92it/s]

buffer size = 4542, epsilon = 0.09897
mean_reward :  0.0


  0%|          | 2072/2000001 [20:01<281:14:47,  1.97it/s]

buffer size = 4544, epsilon = 0.09896
mean_reward :  0.0


  0%|          | 2073/2000001 [20:01<277:52:32,  2.00it/s]

buffer size = 4546, epsilon = 0.09896
mean_reward :  0.0


  0%|          | 2074/2000001 [20:02<273:15:24,  2.03it/s]

buffer size = 4548, epsilon = 0.09896
mean_reward :  0.0


  0%|          | 2075/2000001 [20:02<273:16:38,  2.03it/s]

buffer size = 4550, epsilon = 0.09896
mean_reward :  0.0


  0%|          | 2076/2000001 [20:03<271:07:53,  2.05it/s]

buffer size = 4552, epsilon = 0.09896
mean_reward :  0.0


  0%|          | 2077/2000001 [20:03<268:41:38,  2.07it/s]

buffer size = 4554, epsilon = 0.09896
mean_reward :  0.0


  0%|          | 2078/2000001 [20:03<272:32:32,  2.04it/s]

buffer size = 4556, epsilon = 0.09896
mean_reward :  0.0


  0%|          | 2079/2000001 [20:04<270:14:06,  2.05it/s]

buffer size = 4558, epsilon = 0.09896
mean_reward :  0.0


  0%|          | 2080/2000001 [20:04<271:55:25,  2.04it/s]

buffer size = 4560, epsilon = 0.09896
mean_reward :  0.0


  0%|          | 2081/2000001 [20:05<269:17:21,  2.06it/s]

buffer size = 4562, epsilon = 0.09896
mean_reward :  0.0


  0%|          | 2082/2000001 [20:05<270:36:47,  2.05it/s]

buffer size = 4564, epsilon = 0.09896
mean_reward :  0.0


  0%|          | 2083/2000001 [20:06<269:21:36,  2.06it/s]

buffer size = 4566, epsilon = 0.09896
mean_reward :  0.0


  0%|          | 2084/2000001 [20:06<270:38:02,  2.05it/s]

buffer size = 4568, epsilon = 0.09896
mean_reward :  0.0


  0%|          | 2085/2000001 [20:07<268:42:13,  2.07it/s]

buffer size = 4570, epsilon = 0.09896
mean_reward :  0.0


  0%|          | 2086/2000001 [20:07<271:16:12,  2.05it/s]

buffer size = 4572, epsilon = 0.09896
mean_reward :  0.0


  0%|          | 2087/2000001 [20:08<315:41:34,  1.76it/s]

buffer size = 4574, epsilon = 0.09896
mean_reward :  0.0


  0%|          | 2088/2000001 [20:09<356:33:23,  1.56it/s]

buffer size = 4576, epsilon = 0.09896
mean_reward :  0.0


  0%|          | 2089/2000001 [20:10<387:24:43,  1.43it/s]

buffer size = 4578, epsilon = 0.09896
mean_reward :  0.0


  0%|          | 2090/2000001 [20:10<351:17:10,  1.58it/s]

buffer size = 4580, epsilon = 0.09896
mean_reward :  0.0


  0%|          | 2091/2000001 [20:11<329:02:54,  1.69it/s]

buffer size = 4582, epsilon = 0.09896
mean_reward :  0.0


  0%|          | 2092/2000001 [20:11<310:21:13,  1.79it/s]

buffer size = 4584, epsilon = 0.09895
mean_reward :  0.0


  0%|          | 2093/2000001 [20:12<298:27:02,  1.86it/s]

buffer size = 4586, epsilon = 0.09895
mean_reward :  0.0


  0%|          | 2094/2000001 [20:12<287:41:31,  1.93it/s]

buffer size = 4588, epsilon = 0.09895
mean_reward :  0.0


  0%|          | 2095/2000001 [20:13<286:38:25,  1.94it/s]

buffer size = 4590, epsilon = 0.09895
mean_reward :  0.0


  0%|          | 2096/2000001 [20:13<280:03:59,  1.98it/s]

buffer size = 4592, epsilon = 0.09895
mean_reward :  0.0


  0%|          | 2097/2000001 [20:14<278:54:40,  1.99it/s]

buffer size = 4594, epsilon = 0.09895
mean_reward :  0.0


  0%|          | 2098/2000001 [20:14<273:42:59,  2.03it/s]

buffer size = 4596, epsilon = 0.09895
mean_reward :  0.0


  0%|          | 2099/2000001 [20:15<274:18:50,  2.02it/s]

buffer size = 4598, epsilon = 0.09895
mean_reward :  0.0


  0%|          | 2100/2000001 [20:15<273:15:08,  2.03it/s]

buffer size = 4600, epsilon = 0.09895
mean_reward :  0.0


  0%|          | 2101/2000001 [20:16<273:54:26,  2.03it/s]

buffer size = 4602, epsilon = 0.09895
mean_reward :  0.0


  0%|          | 2102/2000001 [20:16<271:45:10,  2.04it/s]

buffer size = 4604, epsilon = 0.09895
mean_reward :  0.0


  0%|          | 2103/2000001 [20:17<277:20:28,  2.00it/s]

buffer size = 4606, epsilon = 0.09895
mean_reward :  0.0


  0%|          | 2104/2000001 [20:17<273:02:11,  2.03it/s]

buffer size = 4608, epsilon = 0.09895
mean_reward :  0.0


  0%|          | 2105/2000001 [20:18<272:53:39,  2.03it/s]

buffer size = 4610, epsilon = 0.09895
mean_reward :  0.0


  0%|          | 2106/2000001 [20:18<273:02:34,  2.03it/s]

buffer size = 4612, epsilon = 0.09895
mean_reward :  0.0


  0%|          | 2107/2000001 [20:19<273:32:22,  2.03it/s]

buffer size = 4614, epsilon = 0.09895
mean_reward :  0.0


  0%|          | 2108/2000001 [20:19<271:34:01,  2.04it/s]

buffer size = 4616, epsilon = 0.09895
mean_reward :  0.0


  0%|          | 2109/2000001 [20:20<274:56:12,  2.02it/s]

buffer size = 4618, epsilon = 0.09895
mean_reward :  0.0


  0%|          | 2110/2000001 [20:20<317:41:59,  1.75it/s]

buffer size = 4620, epsilon = 0.09895
mean_reward :  0.0


  0%|          | 2111/2000001 [20:21<358:49:58,  1.55it/s]

buffer size = 4622, epsilon = 0.09895
mean_reward :  0.0


  0%|          | 2112/2000001 [20:22<390:27:15,  1.42it/s]

buffer size = 4624, epsilon = 0.09894
mean_reward :  0.0


  0%|          | 2113/2000001 [20:22<352:50:55,  1.57it/s]

buffer size = 4626, epsilon = 0.09894
mean_reward :  0.0


  0%|          | 2114/2000001 [20:23<331:03:11,  1.68it/s]

buffer size = 4628, epsilon = 0.09894
mean_reward :  0.0


  0%|          | 2115/2000001 [20:23<312:52:19,  1.77it/s]

buffer size = 4630, epsilon = 0.09894
mean_reward :  0.0


  0%|          | 2116/2000001 [20:24<300:25:14,  1.85it/s]

buffer size = 4632, epsilon = 0.09894
mean_reward :  0.0


  0%|          | 2117/2000001 [20:24<296:34:31,  1.87it/s]

buffer size = 4634, epsilon = 0.09894
mean_reward :  0.0


  0%|          | 2118/2000001 [20:25<291:38:51,  1.90it/s]

buffer size = 4636, epsilon = 0.09894
mean_reward :  0.0


  0%|          | 2119/2000001 [20:25<282:44:08,  1.96it/s]

buffer size = 4638, epsilon = 0.09894
mean_reward :  0.0


  0%|          | 2120/2000001 [20:26<279:53:41,  1.98it/s]

buffer size = 4640, epsilon = 0.09894
mean_reward :  0.0


  0%|          | 2121/2000001 [20:26<278:29:08,  1.99it/s]

buffer size = 4642, epsilon = 0.09894
mean_reward :  0.0


  0%|          | 2122/2000001 [20:27<275:08:57,  2.02it/s]

buffer size = 4644, epsilon = 0.09894
mean_reward :  0.0


  0%|          | 2123/2000001 [20:27<274:29:15,  2.02it/s]

buffer size = 4646, epsilon = 0.09894
mean_reward :  0.0


  0%|          | 2124/2000001 [20:28<269:39:46,  2.06it/s]

buffer size = 4648, epsilon = 0.09894
mean_reward :  0.0


  0%|          | 2125/2000001 [20:28<272:32:55,  2.04it/s]

buffer size = 4650, epsilon = 0.09894
mean_reward :  0.0


  0%|          | 2126/2000001 [20:29<270:28:15,  2.05it/s]

buffer size = 4652, epsilon = 0.09894
mean_reward :  0.0


  0%|          | 2127/2000001 [20:29<270:04:29,  2.05it/s]

buffer size = 4654, epsilon = 0.09894
mean_reward :  0.0


  0%|          | 2128/2000001 [20:30<268:04:13,  2.07it/s]

buffer size = 4656, epsilon = 0.09894
mean_reward :  0.0


  0%|          | 2129/2000001 [20:30<270:42:24,  2.05it/s]

buffer size = 4658, epsilon = 0.09894
mean_reward :  0.0


  0%|          | 2130/2000001 [20:31<268:38:37,  2.07it/s]

buffer size = 4660, epsilon = 0.09894
mean_reward :  0.0


  0%|          | 2131/2000001 [20:31<267:05:58,  2.08it/s]

buffer size = 4662, epsilon = 0.09894
mean_reward :  0.0


  0%|          | 2132/2000001 [20:32<263:59:53,  2.10it/s]

buffer size = 4664, epsilon = 0.09893
mean_reward :  0.0


  0%|          | 2133/2000001 [20:32<298:50:38,  1.86it/s]

buffer size = 4666, epsilon = 0.09893
mean_reward :  0.0


  0%|          | 2134/2000001 [20:33<343:12:35,  1.62it/s]

buffer size = 4668, epsilon = 0.09893
mean_reward :  0.0


  0%|          | 2135/2000001 [20:34<375:56:46,  1.48it/s]

buffer size = 4670, epsilon = 0.09893
mean_reward :  0.0


  0%|          | 2136/2000001 [20:35<355:05:17,  1.56it/s]

buffer size = 4672, epsilon = 0.09893
mean_reward :  0.0


  0%|          | 2137/2000001 [20:35<326:32:52,  1.70it/s]

buffer size = 4674, epsilon = 0.09893
mean_reward :  0.0


  0%|          | 2138/2000001 [20:36<311:00:31,  1.78it/s]

buffer size = 4676, epsilon = 0.09893
mean_reward :  0.0


  0%|          | 2139/2000001 [20:36<296:52:31,  1.87it/s]

buffer size = 4678, epsilon = 0.09893
mean_reward :  0.0


  0%|          | 2140/2000001 [20:37<293:34:16,  1.89it/s]

buffer size = 4680, epsilon = 0.09893
mean_reward :  0.0


  0%|          | 2141/2000001 [20:37<285:50:02,  1.94it/s]

buffer size = 4682, epsilon = 0.09893
mean_reward :  0.0


  0%|          | 2142/2000001 [20:37<278:28:30,  1.99it/s]

buffer size = 4684, epsilon = 0.09893
mean_reward :  0.0


  0%|          | 2143/2000001 [20:38<271:48:56,  2.04it/s]

buffer size = 4686, epsilon = 0.09893
mean_reward :  0.0


  0%|          | 2144/2000001 [20:38<268:49:40,  2.06it/s]

buffer size = 4688, epsilon = 0.09893
mean_reward :  0.0


  0%|          | 2145/2000001 [20:39<264:39:17,  2.10it/s]

buffer size = 4690, epsilon = 0.09893
mean_reward :  0.0


  0%|          | 2146/2000001 [20:39<262:52:53,  2.11it/s]

buffer size = 4692, epsilon = 0.09893
mean_reward :  0.0


  0%|          | 2147/2000001 [20:40<264:08:21,  2.10it/s]

buffer size = 4694, epsilon = 0.09893
mean_reward :  0.0


  0%|          | 2148/2000001 [20:40<269:40:22,  2.06it/s]

buffer size = 4696, epsilon = 0.09893
mean_reward :  0.0


  0%|          | 2149/2000001 [20:41<271:00:12,  2.05it/s]

buffer size = 4698, epsilon = 0.09893
mean_reward :  0.0


  0%|          | 2150/2000001 [20:41<268:38:08,  2.07it/s]

buffer size = 4700, epsilon = 0.09893
mean_reward :  0.0


  0%|          | 2151/2000001 [20:42<273:06:55,  2.03it/s]

buffer size = 4702, epsilon = 0.09892
mean_reward :  0.0


  0%|          | 2152/2000001 [20:42<269:49:38,  2.06it/s]

buffer size = 4704, epsilon = 0.09892
mean_reward :  0.0


  0%|          | 2153/2000001 [20:43<270:49:47,  2.05it/s]

buffer size = 4706, epsilon = 0.09892
mean_reward :  0.0


  0%|          | 2154/2000001 [20:43<267:05:25,  2.08it/s]

buffer size = 4708, epsilon = 0.09892
mean_reward :  0.0


  0%|          | 2155/2000001 [20:44<267:04:56,  2.08it/s]

buffer size = 4710, epsilon = 0.09892
mean_reward :  0.0


  0%|          | 2156/2000001 [20:44<264:35:50,  2.10it/s]

buffer size = 4712, epsilon = 0.09892
mean_reward :  0.0


  0%|          | 2157/2000001 [20:45<314:28:42,  1.76it/s]

buffer size = 4714, epsilon = 0.09892
mean_reward :  0.0


  0%|          | 2158/2000001 [20:46<356:43:26,  1.56it/s]

buffer size = 4716, epsilon = 0.09892
mean_reward :  0.0


  0%|          | 2159/2000001 [20:47<375:18:59,  1.48it/s]

buffer size = 4718, epsilon = 0.09892
mean_reward :  0.0


  0%|          | 2160/2000001 [20:47<345:55:16,  1.60it/s]

buffer size = 4720, epsilon = 0.09892
mean_reward :  0.0


  0%|          | 2161/2000001 [20:48<317:51:55,  1.75it/s]

buffer size = 4722, epsilon = 0.09892
mean_reward :  0.0


  0%|          | 2162/2000001 [20:48<300:21:29,  1.85it/s]

buffer size = 4724, epsilon = 0.09892
mean_reward :  0.0


  0%|          | 2163/2000001 [20:48<287:15:18,  1.93it/s]

buffer size = 4726, epsilon = 0.09892
mean_reward :  0.0


  0%|          | 2164/2000001 [20:49<279:58:44,  1.98it/s]

buffer size = 4728, epsilon = 0.09892
mean_reward :  0.0


  0%|          | 2165/2000001 [20:49<273:11:44,  2.03it/s]

buffer size = 4730, epsilon = 0.09892
mean_reward :  0.0


  0%|          | 2166/2000001 [20:50<269:16:44,  2.06it/s]

buffer size = 4732, epsilon = 0.09892
mean_reward :  0.0


  0%|          | 2167/2000001 [20:50<268:03:12,  2.07it/s]

buffer size = 4734, epsilon = 0.09892
mean_reward :  0.0


  0%|          | 2168/2000001 [20:51<269:08:36,  2.06it/s]

buffer size = 4736, epsilon = 0.09892
mean_reward :  0.0


  0%|          | 2169/2000001 [20:51<272:14:32,  2.04it/s]

buffer size = 4738, epsilon = 0.09892
mean_reward :  0.0


  0%|          | 2170/2000001 [20:52<270:46:57,  2.05it/s]

buffer size = 4740, epsilon = 0.09892
mean_reward :  0.0


  0%|          | 2171/2000001 [20:52<272:55:26,  2.03it/s]

buffer size = 4742, epsilon = 0.09892
mean_reward :  0.0


  0%|          | 2172/2000001 [20:53<271:28:04,  2.04it/s]

buffer size = 4744, epsilon = 0.09891
mean_reward :  0.0


  0%|          | 2173/2000001 [20:53<274:18:41,  2.02it/s]

buffer size = 4746, epsilon = 0.09891
mean_reward :  0.0


  0%|          | 2174/2000001 [20:54<272:31:24,  2.04it/s]

buffer size = 4748, epsilon = 0.09891
mean_reward :  0.0


  0%|          | 2175/2000001 [20:54<277:02:53,  2.00it/s]

buffer size = 4750, epsilon = 0.09891
mean_reward :  0.0


  0%|          | 2176/2000001 [20:55<273:06:16,  2.03it/s]

buffer size = 4752, epsilon = 0.09891
mean_reward :  0.0


  0%|          | 2177/2000001 [20:55<270:27:11,  2.05it/s]

buffer size = 4754, epsilon = 0.09891
mean_reward :  0.0


  0%|          | 2178/2000001 [20:56<267:39:11,  2.07it/s]

buffer size = 4756, epsilon = 0.09891
mean_reward :  0.0


  0%|          | 2179/2000001 [20:56<267:58:43,  2.07it/s]

buffer size = 4758, epsilon = 0.09891
mean_reward :  0.0


  0%|          | 2180/2000001 [20:57<296:06:34,  1.87it/s]

buffer size = 4760, epsilon = 0.09891
mean_reward :  0.0


  0%|          | 2181/2000001 [20:58<340:14:11,  1.63it/s]

buffer size = 4762, epsilon = 0.09891
mean_reward :  0.0


  0%|          | 2182/2000001 [20:58<367:23:15,  1.51it/s]

buffer size = 4764, epsilon = 0.09891
mean_reward :  0.0


  0%|          | 2183/2000001 [20:59<367:26:55,  1.51it/s]

buffer size = 4766, epsilon = 0.09891
mean_reward :  0.0


  0%|          | 2184/2000001 [21:00<339:16:19,  1.64it/s]

buffer size = 4768, epsilon = 0.09891
mean_reward :  0.0


  0%|          | 2185/2000001 [21:00<317:53:41,  1.75it/s]

buffer size = 4770, epsilon = 0.09891
mean_reward :  0.0


  0%|          | 2186/2000001 [21:01<304:11:44,  1.82it/s]

buffer size = 4772, epsilon = 0.09891
mean_reward :  0.0


  0%|          | 2187/2000001 [21:01<293:05:36,  1.89it/s]

buffer size = 4774, epsilon = 0.09891
mean_reward :  0.0


  0%|          | 2188/2000001 [21:02<286:04:50,  1.94it/s]

buffer size = 4776, epsilon = 0.09891
mean_reward :  0.0


  0%|          | 2189/2000001 [21:02<281:37:26,  1.97it/s]

buffer size = 4778, epsilon = 0.09891
mean_reward :  0.0


  0%|          | 2190/2000001 [21:03<279:11:37,  1.99it/s]

buffer size = 4780, epsilon = 0.09891
mean_reward :  0.0


  0%|          | 2191/2000001 [21:03<276:48:53,  2.00it/s]

buffer size = 4782, epsilon = 0.09891
mean_reward :  0.0


  0%|          | 2192/2000001 [21:03<276:00:24,  2.01it/s]

buffer size = 4784, epsilon = 0.09890
mean_reward :  0.0


  0%|          | 2193/2000001 [21:04<274:00:42,  2.03it/s]

buffer size = 4786, epsilon = 0.09890
mean_reward :  0.0


  0%|          | 2194/2000001 [21:05<284:34:59,  1.95it/s]

buffer size = 4788, epsilon = 0.09890
mean_reward :  0.0


  0%|          | 2195/2000001 [21:05<281:07:42,  1.97it/s]

buffer size = 4790, epsilon = 0.09890
mean_reward :  0.0


  0%|          | 2196/2000001 [21:06<279:38:22,  1.98it/s]

buffer size = 4792, epsilon = 0.09890
mean_reward :  0.0


  0%|          | 2197/2000001 [21:06<276:21:14,  2.01it/s]

buffer size = 4794, epsilon = 0.09890
mean_reward :  0.0


  0%|          | 2198/2000001 [21:07<278:49:37,  1.99it/s]

buffer size = 4796, epsilon = 0.09890
mean_reward :  0.0


  0%|          | 2199/2000001 [21:07<278:13:09,  1.99it/s]

buffer size = 4798, epsilon = 0.09890
mean_reward :  0.0


  0%|          | 2200/2000001 [21:08<277:28:21,  2.00it/s]

buffer size = 4800, epsilon = 0.09890
mean_reward :  0.0


  0%|          | 2201/2000001 [21:08<274:12:58,  2.02it/s]

buffer size = 4802, epsilon = 0.09890
mean_reward :  0.0


  0%|          | 2202/2000001 [21:08<274:25:02,  2.02it/s]

buffer size = 4804, epsilon = 0.09890
mean_reward :  0.0


  0%|          | 2203/2000001 [21:09<284:33:59,  1.95it/s]

buffer size = 4806, epsilon = 0.09890
mean_reward :  0.0


  0%|          | 2204/2000001 [21:10<346:48:30,  1.60it/s]

buffer size = 4808, epsilon = 0.09890
mean_reward :  0.0


  0%|          | 2205/2000001 [21:11<385:10:38,  1.44it/s]

buffer size = 4810, epsilon = 0.09890
mean_reward :  0.0


  0%|          | 2206/2000001 [21:11<370:46:15,  1.50it/s]

buffer size = 4812, epsilon = 0.09890
mean_reward :  0.0


  0%|          | 2207/2000001 [21:12<343:37:11,  1.61it/s]

buffer size = 4814, epsilon = 0.09890
mean_reward :  0.0


  0%|          | 2208/2000001 [21:12<322:40:35,  1.72it/s]

buffer size = 4816, epsilon = 0.09890
mean_reward :  0.0


  0%|          | 2209/2000001 [21:13<307:57:10,  1.80it/s]

buffer size = 4818, epsilon = 0.09890
mean_reward :  0.0


  0%|          | 2210/2000001 [21:13<297:03:09,  1.87it/s]

buffer size = 4820, epsilon = 0.09890
mean_reward :  0.0


  0%|          | 2211/2000001 [21:14<292:02:39,  1.90it/s]

buffer size = 4822, epsilon = 0.09890
mean_reward :  0.0


  0%|          | 2212/2000001 [21:14<283:30:37,  1.96it/s]

buffer size = 4824, epsilon = 0.09889
mean_reward :  0.0


  0%|          | 2213/2000001 [21:15<281:40:48,  1.97it/s]

buffer size = 4826, epsilon = 0.09889
mean_reward :  0.0


  0%|          | 2214/2000001 [21:15<276:54:52,  2.00it/s]

buffer size = 4828, epsilon = 0.09889
mean_reward :  0.0


  0%|          | 2215/2000001 [21:16<277:25:05,  2.00it/s]

buffer size = 4830, epsilon = 0.09889
mean_reward :  0.0


  0%|          | 2216/2000001 [21:16<274:07:33,  2.02it/s]

buffer size = 4832, epsilon = 0.09889
mean_reward :  0.0


  0%|          | 2217/2000001 [21:17<277:50:56,  2.00it/s]

buffer size = 4834, epsilon = 0.09889
mean_reward :  0.0


  0%|          | 2218/2000001 [21:17<274:40:41,  2.02it/s]

buffer size = 4836, epsilon = 0.09889
mean_reward :  0.0


  0%|          | 2219/2000001 [21:18<275:42:47,  2.01it/s]

buffer size = 4838, epsilon = 0.09889
mean_reward :  0.0


  0%|          | 2220/2000001 [21:18<271:44:36,  2.04it/s]

buffer size = 4840, epsilon = 0.09889
mean_reward :  0.0


  0%|          | 2221/2000001 [21:19<272:46:25,  2.03it/s]

buffer size = 4842, epsilon = 0.09889
mean_reward :  0.0


  0%|          | 2222/2000001 [21:19<271:42:15,  2.04it/s]

buffer size = 4844, epsilon = 0.09889
mean_reward :  0.0


  0%|          | 2223/2000001 [21:20<273:25:30,  2.03it/s]

buffer size = 4846, epsilon = 0.09889
mean_reward :  0.0


  0%|          | 2224/2000001 [21:20<272:14:10,  2.04it/s]

buffer size = 4848, epsilon = 0.09889
mean_reward :  0.0


  0%|          | 2225/2000001 [21:21<270:38:04,  2.05it/s]

buffer size = 4850, epsilon = 0.09889
mean_reward :  0.0


  0%|          | 2226/2000001 [21:21<285:57:11,  1.94it/s]

buffer size = 4852, epsilon = 0.09889
mean_reward :  0.0


  0%|          | 2227/2000001 [21:22<338:45:12,  1.64it/s]

buffer size = 4854, epsilon = 0.09889
mean_reward :  0.0


  0%|          | 2228/2000001 [21:23<369:52:45,  1.50it/s]

buffer size = 4856, epsilon = 0.09889
mean_reward :  0.0


  0%|          | 2229/2000001 [21:24<377:49:30,  1.47it/s]

buffer size = 4858, epsilon = 0.09889
mean_reward :  0.0


  0%|          | 2230/2000001 [21:24<351:44:04,  1.58it/s]

buffer size = 4860, epsilon = 0.09889
mean_reward :  0.0


  0%|          | 2231/2000001 [21:25<329:08:30,  1.69it/s]

buffer size = 4862, epsilon = 0.09889
mean_reward :  0.0


  0%|          | 2232/2000001 [21:25<312:30:51,  1.78it/s]

buffer size = 4864, epsilon = 0.09888
mean_reward :  0.0


  0%|          | 2233/2000001 [21:26<299:06:41,  1.86it/s]

buffer size = 4866, epsilon = 0.09888
mean_reward :  0.0


  0%|          | 2234/2000001 [21:26<293:10:44,  1.89it/s]

buffer size = 4868, epsilon = 0.09888
mean_reward :  0.0


  0%|          | 2235/2000001 [21:27<285:39:39,  1.94it/s]

buffer size = 4870, epsilon = 0.09888
mean_reward :  0.0


  0%|          | 2236/2000001 [21:27<283:28:51,  1.96it/s]

buffer size = 4872, epsilon = 0.09888
mean_reward :  0.0


  0%|          | 2237/2000001 [21:28<279:48:55,  1.98it/s]

buffer size = 4874, epsilon = 0.09888
mean_reward :  0.0


  0%|          | 2238/2000001 [21:28<278:10:07,  1.99it/s]

buffer size = 4876, epsilon = 0.09888
mean_reward :  0.0


  0%|          | 2239/2000001 [21:29<282:50:16,  1.96it/s]

buffer size = 4878, epsilon = 0.09888
mean_reward :  0.0


  0%|          | 2240/2000001 [21:29<279:08:26,  1.99it/s]

buffer size = 4880, epsilon = 0.09888
mean_reward :  0.0


  0%|          | 2241/2000001 [21:30<280:19:21,  1.98it/s]

buffer size = 4882, epsilon = 0.09888
mean_reward :  0.0


  0%|          | 2242/2000001 [21:30<277:42:02,  2.00it/s]

buffer size = 4884, epsilon = 0.09888
mean_reward :  0.0


  0%|          | 2243/2000001 [21:31<278:25:37,  1.99it/s]

buffer size = 4886, epsilon = 0.09888
mean_reward :  0.0


  0%|          | 2244/2000001 [21:31<275:30:09,  2.01it/s]

buffer size = 4888, epsilon = 0.09888
mean_reward :  0.0


  0%|          | 2245/2000001 [21:32<275:48:04,  2.01it/s]

buffer size = 4890, epsilon = 0.09888
mean_reward :  0.0


  0%|          | 2246/2000001 [21:32<275:29:48,  2.01it/s]

buffer size = 4892, epsilon = 0.09888
mean_reward :  0.0


  0%|          | 2247/2000001 [21:33<275:18:38,  2.02it/s]

buffer size = 4894, epsilon = 0.09888
mean_reward :  0.0


  0%|          | 2248/2000001 [21:33<272:47:54,  2.03it/s]

buffer size = 4896, epsilon = 0.09888
mean_reward :  0.0


  0%|          | 2249/2000001 [21:34<281:27:01,  1.97it/s]

buffer size = 4898, epsilon = 0.09888
mean_reward :  0.0


  0%|          | 2250/2000001 [21:34<337:41:05,  1.64it/s]

buffer size = 4900, epsilon = 0.09888
mean_reward :  0.0


  0%|          | 2251/2000001 [21:35<364:35:41,  1.52it/s]

buffer size = 4902, epsilon = 0.09888
mean_reward :  0.0


  0%|          | 2252/2000001 [21:36<377:12:06,  1.47it/s]

buffer size = 4904, epsilon = 0.09887
mean_reward :  0.0


  0%|          | 2253/2000001 [21:36<346:04:34,  1.60it/s]

buffer size = 4906, epsilon = 0.09887
mean_reward :  0.0


  0%|          | 2254/2000001 [21:37<328:03:39,  1.69it/s]

buffer size = 4908, epsilon = 0.09887
mean_reward :  0.0


  0%|          | 2255/2000001 [21:37<310:47:24,  1.79it/s]

buffer size = 4910, epsilon = 0.09887
mean_reward :  0.0


  0%|          | 2256/2000001 [21:38<300:29:01,  1.85it/s]

buffer size = 4912, epsilon = 0.09887
mean_reward :  0.0


  0%|          | 2257/2000001 [21:38<291:53:02,  1.90it/s]

buffer size = 4914, epsilon = 0.09887
mean_reward :  0.0


  0%|          | 2258/2000001 [21:39<290:24:42,  1.91it/s]

buffer size = 4916, epsilon = 0.09887
mean_reward :  0.0


  0%|          | 2259/2000001 [21:39<285:01:12,  1.95it/s]

buffer size = 4918, epsilon = 0.09887
mean_reward :  0.0


  0%|          | 2260/2000001 [21:40<282:16:35,  1.97it/s]

buffer size = 4920, epsilon = 0.09887
mean_reward :  0.0


  0%|          | 2261/2000001 [21:40<280:41:24,  1.98it/s]

buffer size = 4922, epsilon = 0.09887
mean_reward :  0.0


  0%|          | 2262/2000001 [21:41<278:46:20,  1.99it/s]

buffer size = 4924, epsilon = 0.09887
mean_reward :  0.0


  0%|          | 2263/2000001 [21:41<276:15:58,  2.01it/s]

buffer size = 4926, epsilon = 0.09887
mean_reward :  0.0


  0%|          | 2264/2000001 [21:42<275:44:17,  2.01it/s]

buffer size = 4928, epsilon = 0.09887
mean_reward :  0.0


  0%|          | 2265/2000001 [21:42<274:19:12,  2.02it/s]

buffer size = 4930, epsilon = 0.09887
mean_reward :  0.0


  0%|          | 2266/2000001 [21:43<275:02:06,  2.02it/s]

buffer size = 4932, epsilon = 0.09887
mean_reward :  0.0


  0%|          | 2267/2000001 [21:43<272:52:43,  2.03it/s]

buffer size = 4934, epsilon = 0.09887
mean_reward :  0.0


  0%|          | 2268/2000001 [21:44<273:42:29,  2.03it/s]

buffer size = 4936, epsilon = 0.09887
mean_reward :  0.0


  0%|          | 2269/2000001 [21:44<271:59:32,  2.04it/s]

buffer size = 4938, epsilon = 0.09887
mean_reward :  0.0


  0%|          | 2270/2000001 [21:45<272:57:06,  2.03it/s]

buffer size = 4940, epsilon = 0.09887
mean_reward :  0.0


  0%|          | 2271/2000001 [21:45<273:44:14,  2.03it/s]

buffer size = 4942, epsilon = 0.09887
mean_reward :  0.0


  0%|          | 2272/2000001 [21:46<271:56:53,  2.04it/s]

buffer size = 4944, epsilon = 0.09886
mean_reward :  0.0


  0%|          | 2273/2000001 [21:47<323:11:58,  1.72it/s]

buffer size = 4946, epsilon = 0.09886
mean_reward :  0.0


  0%|          | 2274/2000001 [21:48<362:59:45,  1.53it/s]

buffer size = 4948, epsilon = 0.09886
mean_reward :  0.0


  0%|          | 2275/2000001 [21:48<385:32:37,  1.44it/s]

buffer size = 4950, epsilon = 0.09886
mean_reward :  0.0


  0%|          | 2276/2000001 [21:49<351:18:12,  1.58it/s]

buffer size = 4952, epsilon = 0.09886
mean_reward :  0.0


  0%|          | 2277/2000001 [21:49<329:40:38,  1.68it/s]

buffer size = 4954, epsilon = 0.09886
mean_reward :  0.0


  0%|          | 2278/2000001 [21:50<314:20:19,  1.77it/s]

buffer size = 4956, epsilon = 0.09886
mean_reward :  0.0


  0%|          | 2279/2000001 [21:50<300:43:19,  1.85it/s]

buffer size = 4958, epsilon = 0.09886
mean_reward :  0.0


  0%|          | 2280/2000001 [21:51<292:56:26,  1.89it/s]

buffer size = 4960, epsilon = 0.09886
mean_reward :  0.0


  0%|          | 2281/2000001 [21:51<290:28:37,  1.91it/s]

buffer size = 4962, epsilon = 0.09886
mean_reward :  0.0


  0%|          | 2282/2000001 [21:52<284:44:25,  1.95it/s]

buffer size = 4964, epsilon = 0.09886
mean_reward :  0.0


  0%|          | 2283/2000001 [21:52<280:37:34,  1.98it/s]

buffer size = 4966, epsilon = 0.09886
mean_reward :  0.0


  0%|          | 2284/2000001 [21:53<276:25:16,  2.01it/s]

buffer size = 4968, epsilon = 0.09886
mean_reward :  0.0


  0%|          | 2285/2000001 [21:53<275:42:27,  2.01it/s]

buffer size = 4970, epsilon = 0.09886
mean_reward :  0.0


  0%|          | 2286/2000001 [21:54<275:59:37,  2.01it/s]

buffer size = 4972, epsilon = 0.09886
mean_reward :  0.0


  0%|          | 2287/2000001 [21:54<276:47:16,  2.00it/s]

buffer size = 4974, epsilon = 0.09886
mean_reward :  0.0


  0%|          | 2288/2000001 [21:55<276:37:17,  2.01it/s]

buffer size = 4976, epsilon = 0.09886
mean_reward :  0.0


  0%|          | 2289/2000001 [21:55<276:10:03,  2.01it/s]

buffer size = 4978, epsilon = 0.09886
mean_reward :  0.0


  0%|          | 2290/2000001 [21:56<272:42:45,  2.03it/s]

buffer size = 4980, epsilon = 0.09886
mean_reward :  0.0


  0%|          | 2291/2000001 [21:56<270:41:56,  2.05it/s]

buffer size = 4982, epsilon = 0.09886
mean_reward :  0.0


  0%|          | 2292/2000001 [21:57<271:54:33,  2.04it/s]

buffer size = 4984, epsilon = 0.09885
mean_reward :  0.0


  0%|          | 2293/2000001 [21:57<269:57:19,  2.06it/s]

buffer size = 4986, epsilon = 0.09885
mean_reward :  0.0


  0%|          | 2294/2000001 [21:58<272:22:48,  2.04it/s]

buffer size = 4988, epsilon = 0.09885
mean_reward :  0.0


  0%|          | 2295/2000001 [21:58<267:59:30,  2.07it/s]

buffer size = 4990, epsilon = 0.09885
mean_reward :  0.0


  0%|          | 2296/2000001 [21:59<319:36:26,  1.74it/s]

buffer size = 4992, epsilon = 0.09885
mean_reward :  0.0


  0%|          | 2297/2000001 [22:00<365:30:19,  1.52it/s]

buffer size = 4994, epsilon = 0.09885
mean_reward :  0.0


  0%|          | 2298/2000001 [22:00<376:50:27,  1.47it/s]

buffer size = 4996, epsilon = 0.09885
mean_reward :  0.0


  0%|          | 2299/2000001 [22:01<343:29:18,  1.62it/s]

buffer size = 4998, epsilon = 0.09885
mean_reward :  0.0


  0%|          | 2300/2000001 [22:01<322:08:48,  1.72it/s]

buffer size = 5000, epsilon = 0.09885
mean_reward :  0.0


  0%|          | 2301/2000001 [22:02<308:07:28,  1.80it/s]

buffer size = 5002, epsilon = 0.09885
mean_reward :  0.0


  0%|          | 2302/2000001 [22:02<300:05:42,  1.85it/s]

buffer size = 5004, epsilon = 0.09885
mean_reward :  0.0


  0%|          | 2303/2000001 [22:03<292:46:16,  1.90it/s]

buffer size = 5006, epsilon = 0.09885
mean_reward :  0.0


  0%|          | 2304/2000001 [22:03<283:45:52,  1.96it/s]

buffer size = 5008, epsilon = 0.09885
mean_reward :  0.0


  0%|          | 2305/2000001 [22:04<281:16:11,  1.97it/s]

buffer size = 5010, epsilon = 0.09885
mean_reward :  0.0


  0%|          | 2306/2000001 [22:04<279:21:35,  1.99it/s]

buffer size = 5012, epsilon = 0.09885
mean_reward :  0.0


  0%|          | 2307/2000001 [22:05<280:04:46,  1.98it/s]

buffer size = 5014, epsilon = 0.09885
mean_reward :  0.0


  0%|          | 2308/2000001 [22:05<275:29:09,  2.01it/s]

buffer size = 5016, epsilon = 0.09885
mean_reward :  0.0


  0%|          | 2309/2000001 [22:06<276:49:28,  2.00it/s]

buffer size = 5018, epsilon = 0.09885
mean_reward :  0.0


  0%|          | 2310/2000001 [22:06<275:06:53,  2.02it/s]

buffer size = 5020, epsilon = 0.09885
mean_reward :  0.0


  0%|          | 2311/2000001 [22:07<275:59:43,  2.01it/s]

buffer size = 5022, epsilon = 0.09885
mean_reward :  0.0


  0%|          | 2312/2000001 [22:07<279:02:09,  1.99it/s]

buffer size = 5024, epsilon = 0.09884
mean_reward :  0.0


  0%|          | 2313/2000001 [22:08<278:19:59,  1.99it/s]

buffer size = 5026, epsilon = 0.09884
mean_reward :  0.0


  0%|          | 2314/2000001 [22:08<276:02:36,  2.01it/s]

buffer size = 5028, epsilon = 0.09884
mean_reward :  0.0


  0%|          | 2315/2000001 [22:09<275:20:40,  2.02it/s]

buffer size = 5030, epsilon = 0.09884
mean_reward :  0.0


  0%|          | 2316/2000001 [22:09<277:14:23,  2.00it/s]

buffer size = 5032, epsilon = 0.09884
mean_reward :  0.0


  0%|          | 2317/2000001 [22:10<279:06:39,  1.99it/s]

buffer size = 5034, epsilon = 0.09884
mean_reward :  0.0


  0%|          | 2318/2000001 [22:10<279:51:51,  1.98it/s]

buffer size = 5036, epsilon = 0.09884
mean_reward :  0.0


  0%|          | 2319/2000001 [22:11<328:37:25,  1.69it/s]

buffer size = 5038, epsilon = 0.09884
mean_reward :  0.0


  0%|          | 2320/2000001 [22:12<365:38:09,  1.52it/s]

buffer size = 5040, epsilon = 0.09884
mean_reward :  0.0


  0%|          | 2321/2000001 [22:13<387:42:05,  1.43it/s]

buffer size = 5042, epsilon = 0.09884
mean_reward :  0.0


  0%|          | 2322/2000001 [22:13<351:30:30,  1.58it/s]

buffer size = 5044, epsilon = 0.09884
mean_reward :  0.0


  0%|          | 2323/2000001 [22:14<327:26:48,  1.69it/s]

buffer size = 5046, epsilon = 0.09884
mean_reward :  0.0


  0%|          | 2324/2000001 [22:14<313:07:36,  1.77it/s]

buffer size = 5048, epsilon = 0.09884
mean_reward :  0.0


  0%|          | 2325/2000001 [22:15<300:51:01,  1.84it/s]

buffer size = 5050, epsilon = 0.09884
mean_reward :  0.0


  0%|          | 2326/2000001 [22:15<296:35:46,  1.87it/s]

buffer size = 5052, epsilon = 0.09884
mean_reward :  0.0


  0%|          | 2327/2000001 [22:16<290:07:56,  1.91it/s]

buffer size = 5054, epsilon = 0.09884
mean_reward :  0.0


  0%|          | 2328/2000001 [22:16<286:55:06,  1.93it/s]

buffer size = 5056, epsilon = 0.09884
mean_reward :  0.0


  0%|          | 2329/2000001 [22:17<285:40:18,  1.94it/s]

buffer size = 5058, epsilon = 0.09884
mean_reward :  0.0


  0%|          | 2330/2000001 [22:17<282:36:11,  1.96it/s]

buffer size = 5060, epsilon = 0.09884
mean_reward :  0.0


  0%|          | 2331/2000001 [22:18<276:50:02,  2.00it/s]

buffer size = 5062, epsilon = 0.09884
mean_reward :  0.0


  0%|          | 2332/2000001 [22:18<275:48:56,  2.01it/s]

buffer size = 5064, epsilon = 0.09883
mean_reward :  0.0


  0%|          | 2333/2000001 [22:19<273:44:59,  2.03it/s]

buffer size = 5066, epsilon = 0.09883
mean_reward :  0.0


  0%|          | 2334/2000001 [22:19<273:38:34,  2.03it/s]

buffer size = 5068, epsilon = 0.09883
mean_reward :  0.0


  0%|          | 2335/2000001 [22:20<276:15:29,  2.01it/s]

buffer size = 5070, epsilon = 0.09883
mean_reward :  0.0


  0%|          | 2336/2000001 [22:20<277:16:34,  2.00it/s]

buffer size = 5072, epsilon = 0.09883
mean_reward :  0.0


  0%|          | 2337/2000001 [22:21<274:41:16,  2.02it/s]

buffer size = 5074, epsilon = 0.09883
mean_reward :  0.0


  0%|          | 2338/2000001 [22:21<275:09:27,  2.02it/s]

buffer size = 5076, epsilon = 0.09883
mean_reward :  0.0


  0%|          | 2339/2000001 [22:22<273:18:49,  2.03it/s]

buffer size = 5078, epsilon = 0.09883
mean_reward :  0.0


  0%|          | 2340/2000001 [22:22<274:25:56,  2.02it/s]

buffer size = 5080, epsilon = 0.09883
mean_reward :  0.0


  0%|          | 2341/2000001 [22:23<272:43:25,  2.03it/s]

buffer size = 5082, epsilon = 0.09883
mean_reward :  0.0


  0%|          | 2342/2000001 [22:24<322:18:29,  1.72it/s]

buffer size = 5084, epsilon = 0.09883
mean_reward :  0.0


  0%|          | 2343/2000001 [22:24<365:40:47,  1.52it/s]

buffer size = 5086, epsilon = 0.09883
mean_reward :  0.0


  0%|          | 2344/2000001 [22:25<384:01:37,  1.44it/s]

buffer size = 5088, epsilon = 0.09883
mean_reward :  0.0


  0%|          | 2345/2000001 [22:26<352:53:18,  1.57it/s]

buffer size = 5090, epsilon = 0.09883
mean_reward :  0.0


  0%|          | 2346/2000001 [22:26<328:48:23,  1.69it/s]

buffer size = 5092, epsilon = 0.09883
mean_reward :  0.0


  0%|          | 2347/2000001 [22:27<312:15:54,  1.78it/s]

buffer size = 5094, epsilon = 0.09883
mean_reward :  0.0


  0%|          | 2348/2000001 [22:27<299:48:27,  1.85it/s]

buffer size = 5096, epsilon = 0.09883
mean_reward :  0.0


  0%|          | 2349/2000001 [22:28<292:56:14,  1.89it/s]

buffer size = 5098, epsilon = 0.09883
mean_reward :  0.0


  0%|          | 2350/2000001 [22:28<288:41:46,  1.92it/s]

buffer size = 5100, epsilon = 0.09883
mean_reward :  0.0


  0%|          | 2351/2000001 [22:29<285:26:54,  1.94it/s]

buffer size = 5102, epsilon = 0.09883
mean_reward :  0.0


  0%|          | 2352/2000001 [22:29<280:26:13,  1.98it/s]

buffer size = 5104, epsilon = 0.09882
mean_reward :  0.0


  0%|          | 2353/2000001 [22:30<279:07:27,  1.99it/s]

buffer size = 5106, epsilon = 0.09882
mean_reward :  0.0


  0%|          | 2354/2000001 [22:30<277:39:29,  2.00it/s]

buffer size = 5108, epsilon = 0.09882
mean_reward :  0.0


  0%|          | 2355/2000001 [22:31<275:04:29,  2.02it/s]

buffer size = 5110, epsilon = 0.09882
mean_reward :  0.0


  0%|          | 2356/2000001 [22:31<272:48:18,  2.03it/s]

buffer size = 5112, epsilon = 0.09882
mean_reward :  0.0


  0%|          | 2357/2000001 [22:32<272:41:35,  2.03it/s]

buffer size = 5114, epsilon = 0.09882
mean_reward :  0.0


  0%|          | 2358/2000001 [22:32<272:17:12,  2.04it/s]

buffer size = 5116, epsilon = 0.09882
mean_reward :  0.0


  0%|          | 2359/2000001 [22:33<273:19:34,  2.03it/s]

buffer size = 5118, epsilon = 0.09882
mean_reward :  0.0


  0%|          | 2360/2000001 [22:33<274:04:26,  2.02it/s]

buffer size = 5120, epsilon = 0.09882
mean_reward :  0.0


  0%|          | 2361/2000001 [22:34<274:24:21,  2.02it/s]

buffer size = 5122, epsilon = 0.09882
mean_reward :  0.0


  0%|          | 2362/2000001 [22:34<273:32:03,  2.03it/s]

buffer size = 5124, epsilon = 0.09882
mean_reward :  0.0


  0%|          | 2363/2000001 [22:34<273:41:54,  2.03it/s]

buffer size = 5126, epsilon = 0.09882
mean_reward :  0.0


  0%|          | 2364/2000001 [22:35<272:29:53,  2.04it/s]

buffer size = 5128, epsilon = 0.09882
mean_reward :  0.0


  0%|          | 2365/2000001 [22:36<319:24:18,  1.74it/s]

buffer size = 5130, epsilon = 0.09882
mean_reward :  0.0


  0%|          | 2366/2000001 [22:37<355:40:02,  1.56it/s]

buffer size = 5132, epsilon = 0.09882
mean_reward :  0.0


  0%|          | 2367/2000001 [22:37<385:19:20,  1.44it/s]

buffer size = 5134, epsilon = 0.09882
mean_reward :  0.0


  0%|          | 2368/2000001 [22:38<354:52:13,  1.56it/s]

buffer size = 5136, epsilon = 0.09882
mean_reward :  0.0


  0%|          | 2369/2000001 [22:38<329:00:54,  1.69it/s]

buffer size = 5138, epsilon = 0.09882
mean_reward :  0.0


  0%|          | 2370/2000001 [22:39<313:52:51,  1.77it/s]

buffer size = 5140, epsilon = 0.09882
mean_reward :  0.0


  0%|          | 2371/2000001 [22:39<301:12:18,  1.84it/s]

buffer size = 5142, epsilon = 0.09882
mean_reward :  0.0


  0%|          | 2372/2000001 [22:40<294:27:51,  1.88it/s]

buffer size = 5144, epsilon = 0.09881
mean_reward :  0.0


  0%|          | 2373/2000001 [22:40<287:52:32,  1.93it/s]

buffer size = 5146, epsilon = 0.09881
mean_reward :  0.0


  0%|          | 2374/2000001 [22:41<286:46:36,  1.93it/s]

buffer size = 5148, epsilon = 0.09881
mean_reward :  0.0


  0%|          | 2375/2000001 [22:41<284:03:22,  1.95it/s]

buffer size = 5150, epsilon = 0.09881
mean_reward :  0.0


  0%|          | 2376/2000001 [22:42<283:07:03,  1.96it/s]

buffer size = 5152, epsilon = 0.09881
mean_reward :  0.0


  0%|          | 2377/2000001 [22:42<281:17:30,  1.97it/s]

buffer size = 5154, epsilon = 0.09881
mean_reward :  0.0


  0%|          | 2378/2000001 [22:43<279:41:12,  1.98it/s]

buffer size = 5156, epsilon = 0.09881
mean_reward :  0.0


  0%|          | 2379/2000001 [22:43<278:29:58,  1.99it/s]

buffer size = 5158, epsilon = 0.09881
mean_reward :  0.0


  0%|          | 2380/2000001 [22:44<278:20:18,  1.99it/s]

buffer size = 5160, epsilon = 0.09881
mean_reward :  0.0


  0%|          | 2381/2000001 [22:44<278:57:00,  1.99it/s]

buffer size = 5162, epsilon = 0.09881
mean_reward :  0.0


  0%|          | 2382/2000001 [22:45<278:24:47,  1.99it/s]

buffer size = 5164, epsilon = 0.09881
mean_reward :  0.0


  0%|          | 2383/2000001 [22:45<279:43:04,  1.98it/s]

buffer size = 5166, epsilon = 0.09881
mean_reward :  0.0


  0%|          | 2384/2000001 [22:46<278:38:10,  1.99it/s]

buffer size = 5168, epsilon = 0.09881
mean_reward :  0.0


  0%|          | 2385/2000001 [22:46<277:38:36,  2.00it/s]

buffer size = 5170, epsilon = 0.09881
mean_reward :  0.0


  0%|          | 2386/2000001 [22:47<277:07:07,  2.00it/s]

buffer size = 5172, epsilon = 0.09881
mean_reward :  0.0


  0%|          | 2387/2000001 [22:47<277:58:36,  2.00it/s]

buffer size = 5174, epsilon = 0.09881
mean_reward :  0.0


  0%|          | 2388/2000001 [22:48<325:07:13,  1.71it/s]

buffer size = 5176, epsilon = 0.09881
mean_reward :  0.0


  0%|          | 2389/2000001 [22:49<359:22:07,  1.54it/s]

buffer size = 5178, epsilon = 0.09881
mean_reward :  0.0


  0%|          | 2390/2000001 [22:50<391:43:21,  1.42it/s]

buffer size = 5180, epsilon = 0.09881
mean_reward :  0.0


  0%|          | 2391/2000001 [22:50<364:30:05,  1.52it/s]

buffer size = 5182, epsilon = 0.09881
mean_reward :  0.0


  0%|          | 2392/2000001 [22:51<338:10:37,  1.64it/s]

buffer size = 5184, epsilon = 0.09880
mean_reward :  0.0


  0%|          | 2393/2000001 [22:51<323:11:01,  1.72it/s]

buffer size = 5186, epsilon = 0.09880
mean_reward :  0.0


  0%|          | 2394/2000001 [22:52<311:51:06,  1.78it/s]

buffer size = 5188, epsilon = 0.09880
mean_reward :  0.0


  0%|          | 2395/2000001 [22:52<306:27:29,  1.81it/s]

buffer size = 5190, epsilon = 0.09880
mean_reward :  0.0


  0%|          | 2396/2000001 [22:53<298:14:52,  1.86it/s]

buffer size = 5192, epsilon = 0.09880
mean_reward :  0.0


  0%|          | 2397/2000001 [22:53<291:19:54,  1.90it/s]

buffer size = 5194, epsilon = 0.09880
mean_reward :  0.0


  0%|          | 2398/2000001 [22:54<283:59:39,  1.95it/s]

buffer size = 5196, epsilon = 0.09880
mean_reward :  0.0


  0%|          | 2399/2000001 [22:54<283:19:08,  1.96it/s]

buffer size = 5198, epsilon = 0.09880
mean_reward :  0.0


  0%|          | 2400/2000001 [22:55<280:49:40,  1.98it/s]

buffer size = 5200, epsilon = 0.09880
mean_reward :  0.0


  0%|          | 2401/2000001 [22:55<280:13:26,  1.98it/s]

buffer size = 5202, epsilon = 0.09880
mean_reward :  0.0


  0%|          | 2402/2000001 [22:56<278:55:18,  1.99it/s]

buffer size = 5204, epsilon = 0.09880
mean_reward :  0.0


  0%|          | 2403/2000001 [22:56<278:22:44,  1.99it/s]

buffer size = 5206, epsilon = 0.09880
mean_reward :  0.0


  0%|          | 2404/2000001 [22:57<277:02:20,  2.00it/s]

buffer size = 5208, epsilon = 0.09880
mean_reward :  0.0


  0%|          | 2405/2000001 [22:57<278:39:10,  1.99it/s]

buffer size = 5210, epsilon = 0.09880
mean_reward :  0.0


  0%|          | 2406/2000001 [22:58<275:56:46,  2.01it/s]

buffer size = 5212, epsilon = 0.09880
mean_reward :  0.0


  0%|          | 2407/2000001 [22:58<277:07:01,  2.00it/s]

buffer size = 5214, epsilon = 0.09880
mean_reward :  0.0


  0%|          | 2408/2000001 [22:59<276:58:05,  2.00it/s]

buffer size = 5216, epsilon = 0.09880
mean_reward :  0.0


  0%|          | 2409/2000001 [22:59<275:53:24,  2.01it/s]

buffer size = 5218, epsilon = 0.09880
mean_reward :  0.0


  0%|          | 2410/2000001 [23:00<276:59:25,  2.00it/s]

buffer size = 5220, epsilon = 0.09880
mean_reward :  0.0


  0%|          | 2411/2000001 [23:01<332:47:28,  1.67it/s]

buffer size = 5222, epsilon = 0.09880
mean_reward :  0.0


  0%|          | 2412/2000001 [23:02<397:32:32,  1.40it/s]

buffer size = 5224, epsilon = 0.09879
mean_reward :  0.0


  0%|          | 2413/2000001 [23:03<439:16:51,  1.26it/s]

buffer size = 5226, epsilon = 0.09879
mean_reward :  0.0


  0%|          | 2414/2000001 [23:03<403:48:22,  1.37it/s]

buffer size = 5228, epsilon = 0.09879
mean_reward :  0.0


  0%|          | 2415/2000001 [23:04<382:36:23,  1.45it/s]

buffer size = 5230, epsilon = 0.09879
mean_reward :  0.0


  0%|          | 2416/2000001 [23:04<364:20:30,  1.52it/s]

buffer size = 5232, epsilon = 0.09879
mean_reward :  0.0


  0%|          | 2417/2000001 [23:05<352:28:46,  1.57it/s]

buffer size = 5234, epsilon = 0.09879
mean_reward :  0.0


  0%|          | 2418/2000001 [23:06<344:04:47,  1.61it/s]

buffer size = 5236, epsilon = 0.09879
mean_reward :  0.0


  0%|          | 2419/2000001 [23:06<346:19:05,  1.60it/s]

buffer size = 5238, epsilon = 0.09879
mean_reward :  0.0


  0%|          | 2420/2000001 [23:07<341:05:58,  1.63it/s]

buffer size = 5240, epsilon = 0.09879
mean_reward :  0.0


  0%|          | 2421/2000001 [23:07<337:08:32,  1.65it/s]

buffer size = 5242, epsilon = 0.09879
mean_reward :  0.0


  0%|          | 2422/2000001 [23:08<341:17:26,  1.63it/s]

buffer size = 5244, epsilon = 0.09879
mean_reward :  0.0


  0%|          | 2423/2000001 [23:09<366:43:45,  1.51it/s]

buffer size = 5246, epsilon = 0.09879
mean_reward :  0.0


  0%|          | 2424/2000001 [23:10<399:30:30,  1.39it/s]

buffer size = 5248, epsilon = 0.09879
mean_reward :  0.0


  0%|          | 2425/2000001 [23:10<418:52:09,  1.32it/s]

buffer size = 5250, epsilon = 0.09879
mean_reward :  0.0


  0%|          | 2426/2000001 [23:11<393:14:47,  1.41it/s]

buffer size = 5252, epsilon = 0.09879
mean_reward :  0.0


  0%|          | 2427/2000001 [23:12<359:23:28,  1.54it/s]

buffer size = 5254, epsilon = 0.09879
mean_reward :  0.0


  0%|          | 2428/2000001 [23:12<332:20:21,  1.67it/s]

buffer size = 5256, epsilon = 0.09879
mean_reward :  0.0


  0%|          | 2429/2000001 [23:13<322:56:09,  1.72it/s]

buffer size = 5258, epsilon = 0.09879
mean_reward :  0.0


  0%|          | 2430/2000001 [23:13<358:22:11,  1.55it/s]

buffer size = 5260, epsilon = 0.09879
mean_reward :  0.0


  0%|          | 2431/2000001 [23:14<385:58:06,  1.44it/s]

buffer size = 5262, epsilon = 0.09879
mean_reward :  0.0


  0%|          | 2432/2000001 [23:15<407:09:14,  1.36it/s]

buffer size = 5264, epsilon = 0.09878
mean_reward :  0.0


  0%|          | 2433/2000001 [23:16<373:01:35,  1.49it/s]

buffer size = 5266, epsilon = 0.09878
mean_reward :  0.0


  0%|          | 2434/2000001 [23:16<341:52:44,  1.62it/s]

buffer size = 5268, epsilon = 0.09878
mean_reward :  0.0


  0%|          | 2435/2000001 [23:17<321:33:26,  1.73it/s]

buffer size = 5270, epsilon = 0.09878
mean_reward :  0.0


  0%|          | 2436/2000001 [23:17<310:10:37,  1.79it/s]

buffer size = 5272, epsilon = 0.09878
mean_reward :  0.0


  0%|          | 2437/2000001 [23:18<299:30:08,  1.85it/s]

buffer size = 5274, epsilon = 0.09878
mean_reward :  0.0


  0%|          | 2438/2000001 [23:18<290:19:52,  1.91it/s]

buffer size = 5276, epsilon = 0.09878
mean_reward :  0.0


  0%|          | 2439/2000001 [23:19<286:12:19,  1.94it/s]

buffer size = 5278, epsilon = 0.09878
mean_reward :  0.0


  0%|          | 2440/2000001 [23:19<282:24:26,  1.96it/s]

buffer size = 5280, epsilon = 0.09878
mean_reward :  0.0


  0%|          | 2441/2000001 [23:20<284:49:50,  1.95it/s]

buffer size = 5282, epsilon = 0.09878
mean_reward :  0.0


  0%|          | 2442/2000001 [23:20<278:52:39,  1.99it/s]

buffer size = 5284, epsilon = 0.09878
mean_reward :  0.0


  0%|          | 2443/2000001 [23:21<275:42:06,  2.01it/s]

buffer size = 5286, epsilon = 0.09878
mean_reward :  0.0


  0%|          | 2444/2000001 [23:21<273:41:56,  2.03it/s]

buffer size = 5288, epsilon = 0.09878
mean_reward :  0.0


  0%|          | 2445/2000001 [23:22<273:56:11,  2.03it/s]

buffer size = 5290, epsilon = 0.09878
mean_reward :  0.0


  0%|          | 2446/2000001 [23:22<274:25:41,  2.02it/s]

buffer size = 5292, epsilon = 0.09878
mean_reward :  0.0


  0%|          | 2447/2000001 [23:22<271:36:16,  2.04it/s]

buffer size = 5294, epsilon = 0.09878
mean_reward :  0.0


  0%|          | 2448/2000001 [23:23<273:29:16,  2.03it/s]

buffer size = 5296, epsilon = 0.09878
mean_reward :  0.0


  0%|          | 2449/2000001 [23:23<270:21:56,  2.05it/s]

buffer size = 5298, epsilon = 0.09878
mean_reward :  0.0


  0%|          | 2450/2000001 [23:24<273:34:19,  2.03it/s]

buffer size = 5300, epsilon = 0.09878
mean_reward :  0.0


  0%|          | 2451/2000001 [23:24<274:13:25,  2.02it/s]

buffer size = 5302, epsilon = 0.09878
mean_reward :  0.0


  0%|          | 2452/2000001 [23:25<277:31:26,  2.00it/s]

buffer size = 5304, epsilon = 0.09877
mean_reward :  0.0


  0%|          | 2453/2000001 [23:26<316:27:10,  1.75it/s]

buffer size = 5306, epsilon = 0.09877
mean_reward :  0.0


  0%|          | 2454/2000001 [23:27<352:15:50,  1.58it/s]

buffer size = 5308, epsilon = 0.09877
mean_reward :  0.0


  0%|          | 2455/2000001 [23:27<382:18:06,  1.45it/s]

buffer size = 5310, epsilon = 0.09877
mean_reward :  0.0


  0%|          | 2456/2000001 [23:28<362:47:03,  1.53it/s]

buffer size = 5312, epsilon = 0.09877
mean_reward :  0.0


  0%|          | 2457/2000001 [23:28<335:15:47,  1.66it/s]

buffer size = 5314, epsilon = 0.09877
mean_reward :  0.0


  0%|          | 2458/2000001 [23:29<320:27:08,  1.73it/s]

buffer size = 5316, epsilon = 0.09877
mean_reward :  0.0


  0%|          | 2459/2000001 [23:29<306:12:47,  1.81it/s]

buffer size = 5318, epsilon = 0.09877
mean_reward :  0.0


  0%|          | 2460/2000001 [23:30<298:09:53,  1.86it/s]

buffer size = 5320, epsilon = 0.09877
mean_reward :  0.0


  0%|          | 2461/2000001 [23:30<290:54:12,  1.91it/s]

buffer size = 5322, epsilon = 0.09877
mean_reward :  0.0


  0%|          | 2462/2000001 [23:31<285:58:28,  1.94it/s]

buffer size = 5324, epsilon = 0.09877
mean_reward :  0.0


  0%|          | 2463/2000001 [23:31<280:41:25,  1.98it/s]

buffer size = 5326, epsilon = 0.09877
mean_reward :  0.0


  0%|          | 2464/2000001 [23:32<278:42:48,  1.99it/s]

buffer size = 5328, epsilon = 0.09877
mean_reward :  0.0


  0%|          | 2465/2000001 [23:32<280:14:43,  1.98it/s]

buffer size = 5330, epsilon = 0.09877
mean_reward :  0.0


  0%|          | 2466/2000001 [23:33<277:01:03,  2.00it/s]

buffer size = 5332, epsilon = 0.09877
mean_reward :  0.0


  0%|          | 2467/2000001 [23:33<276:52:31,  2.00it/s]

buffer size = 5334, epsilon = 0.09877
mean_reward :  0.0


  0%|          | 2468/2000001 [23:34<276:52:40,  2.00it/s]

buffer size = 5336, epsilon = 0.09877
mean_reward :  0.0


  0%|          | 2469/2000001 [23:34<277:43:41,  2.00it/s]

buffer size = 5338, epsilon = 0.09877
mean_reward :  0.0


  0%|          | 2470/2000001 [23:35<277:57:32,  2.00it/s]

buffer size = 5340, epsilon = 0.09877
mean_reward :  0.0


  0%|          | 2471/2000001 [23:35<276:45:37,  2.00it/s]

buffer size = 5342, epsilon = 0.09877
mean_reward :  0.0


  0%|          | 2472/2000001 [23:36<274:35:57,  2.02it/s]

buffer size = 5344, epsilon = 0.09876
mean_reward :  0.0


  0%|          | 2473/2000001 [23:36<276:28:15,  2.01it/s]

buffer size = 5346, epsilon = 0.09876
mean_reward :  0.0


  0%|          | 2474/2000001 [23:37<277:15:33,  2.00it/s]

buffer size = 5348, epsilon = 0.09876
mean_reward :  0.0


  0%|          | 2475/2000001 [23:37<275:43:38,  2.01it/s]

buffer size = 5350, epsilon = 0.09876
mean_reward :  0.0


  0%|          | 2476/2000001 [23:38<299:58:19,  1.85it/s]

buffer size = 5352, epsilon = 0.09876
mean_reward :  0.0


  0%|          | 2477/2000001 [23:39<351:33:35,  1.58it/s]

buffer size = 5354, epsilon = 0.09876
mean_reward :  0.0


  0%|          | 2478/2000001 [23:40<383:50:51,  1.45it/s]

buffer size = 5356, epsilon = 0.09876
mean_reward :  0.0


  0%|          | 2479/2000001 [23:40<359:39:00,  1.54it/s]

buffer size = 5358, epsilon = 0.09876
mean_reward :  0.0


  0%|          | 2480/2000001 [23:41<336:11:46,  1.65it/s]

buffer size = 5360, epsilon = 0.09876
mean_reward :  0.0


  0%|          | 2481/2000001 [23:41<316:49:42,  1.75it/s]

buffer size = 5362, epsilon = 0.09876
mean_reward :  0.0


  0%|          | 2482/2000001 [23:42<303:54:23,  1.83it/s]

buffer size = 5364, epsilon = 0.09876
mean_reward :  0.0


  0%|          | 2483/2000001 [23:42<294:02:10,  1.89it/s]

buffer size = 5366, epsilon = 0.09876
mean_reward :  0.0


  0%|          | 2484/2000001 [23:43<289:56:48,  1.91it/s]

buffer size = 5368, epsilon = 0.09876
mean_reward :  0.0


  0%|          | 2485/2000001 [23:43<286:58:54,  1.93it/s]

buffer size = 5370, epsilon = 0.09876
mean_reward :  0.0


  0%|          | 2486/2000001 [23:44<284:06:38,  1.95it/s]

buffer size = 5372, epsilon = 0.09876
mean_reward :  0.0


  0%|          | 2487/2000001 [23:44<278:42:18,  1.99it/s]

buffer size = 5374, epsilon = 0.09876
mean_reward :  0.0


  0%|          | 2488/2000001 [23:45<279:11:15,  1.99it/s]

buffer size = 5376, epsilon = 0.09876
mean_reward :  0.0


  0%|          | 2489/2000001 [23:45<283:08:58,  1.96it/s]

buffer size = 5378, epsilon = 0.09876
mean_reward :  0.0


  0%|          | 2490/2000001 [23:46<289:56:02,  1.91it/s]

buffer size = 5380, epsilon = 0.09876
mean_reward :  0.0


  0%|          | 2491/2000001 [23:46<295:17:14,  1.88it/s]

buffer size = 5382, epsilon = 0.09876
mean_reward :  0.0


  0%|          | 2492/2000001 [23:47<298:24:00,  1.86it/s]

buffer size = 5384, epsilon = 0.09875
mean_reward :  0.0


  0%|          | 2493/2000001 [23:47<298:21:55,  1.86it/s]

buffer size = 5386, epsilon = 0.09875
mean_reward :  0.0


  0%|          | 2494/2000001 [23:48<294:56:23,  1.88it/s]

buffer size = 5388, epsilon = 0.09875
mean_reward :  0.0


  0%|          | 2495/2000001 [23:48<285:49:02,  1.94it/s]

buffer size = 5390, epsilon = 0.09875
mean_reward :  0.0


  0%|          | 2496/2000001 [23:49<283:15:44,  1.96it/s]

buffer size = 5392, epsilon = 0.09875
mean_reward :  0.0


  0%|          | 2497/2000001 [23:49<281:17:01,  1.97it/s]

buffer size = 5394, epsilon = 0.09875
mean_reward :  0.0


  0%|          | 2498/2000001 [23:50<283:35:02,  1.96it/s]

buffer size = 5396, epsilon = 0.09875
mean_reward :  0.0


  0%|          | 2499/2000001 [23:51<326:49:56,  1.70it/s]

buffer size = 5398, epsilon = 0.09875
mean_reward :  0.0


  0%|          | 2500/2000001 [23:51<361:32:41,  1.53it/s]

buffer size = 5400, epsilon = 0.09875
mean_reward :  0.0


  0%|          | 2501/2000001 [23:52<388:25:35,  1.43it/s]

buffer size = 5402, epsilon = 0.09875
mean_reward :  0.0


  0%|          | 2502/2000001 [23:53<355:50:18,  1.56it/s]

buffer size = 5404, epsilon = 0.09875
mean_reward :  0.0


  0%|          | 2503/2000001 [23:53<331:15:05,  1.68it/s]

buffer size = 5406, epsilon = 0.09875
mean_reward :  0.0


  0%|          | 2504/2000001 [23:54<313:30:00,  1.77it/s]

buffer size = 5408, epsilon = 0.09875
mean_reward :  0.0


  0%|          | 2505/2000001 [23:54<305:13:38,  1.82it/s]

buffer size = 5410, epsilon = 0.09875
mean_reward :  0.0


  0%|          | 2506/2000001 [23:55<297:15:21,  1.87it/s]

buffer size = 5412, epsilon = 0.09875
mean_reward :  0.0


  0%|          | 2507/2000001 [23:55<292:07:11,  1.90it/s]

buffer size = 5414, epsilon = 0.09875
mean_reward :  0.0


  0%|          | 2508/2000001 [23:56<286:28:05,  1.94it/s]

buffer size = 5416, epsilon = 0.09875
mean_reward :  0.0


  0%|          | 2509/2000001 [23:56<283:35:35,  1.96it/s]

buffer size = 5418, epsilon = 0.09875
mean_reward :  0.0


  0%|          | 2510/2000001 [23:57<280:09:03,  1.98it/s]

buffer size = 5420, epsilon = 0.09875
mean_reward :  0.0


  0%|          | 2511/2000001 [23:57<280:32:33,  1.98it/s]

buffer size = 5422, epsilon = 0.09874
mean_reward :  0.0


  0%|          | 2512/2000001 [23:58<278:57:54,  1.99it/s]

buffer size = 5424, epsilon = 0.09874
mean_reward :  0.0


  0%|          | 2513/2000001 [23:58<278:28:17,  1.99it/s]

buffer size = 5426, epsilon = 0.09874
mean_reward :  0.0


  0%|          | 2514/2000001 [23:59<281:39:23,  1.97it/s]

buffer size = 5428, epsilon = 0.09874
mean_reward :  0.0


  0%|          | 2515/2000001 [23:59<280:07:36,  1.98it/s]

buffer size = 5430, epsilon = 0.09874
mean_reward :  0.0


  0%|          | 2516/2000001 [24:00<277:20:13,  2.00it/s]

buffer size = 5432, epsilon = 0.09874
mean_reward :  0.0


  0%|          | 2517/2000001 [24:00<279:59:42,  1.98it/s]

buffer size = 5434, epsilon = 0.09874
mean_reward :  0.0


  0%|          | 2518/2000001 [24:01<279:58:36,  1.98it/s]

buffer size = 5436, epsilon = 0.09874
mean_reward :  0.0


  0%|          | 2519/2000001 [24:01<277:45:59,  2.00it/s]

buffer size = 5438, epsilon = 0.09874
mean_reward :  0.0


  0%|          | 2520/2000001 [24:02<275:47:59,  2.01it/s]

buffer size = 5440, epsilon = 0.09874
mean_reward :  0.0


  0%|          | 2521/2000001 [24:02<276:52:17,  2.00it/s]

buffer size = 5442, epsilon = 0.09874
mean_reward :  0.0


  0%|          | 2522/2000001 [24:03<321:12:30,  1.73it/s]

buffer size = 5444, epsilon = 0.09874
mean_reward :  0.0


  0%|          | 2523/2000001 [24:04<364:02:54,  1.52it/s]

buffer size = 5446, epsilon = 0.09874
mean_reward :  0.0


  0%|          | 2524/2000001 [24:05<392:39:46,  1.41it/s]

buffer size = 5448, epsilon = 0.09874
mean_reward :  0.0


  0%|          | 2525/2000001 [24:05<357:00:38,  1.55it/s]

buffer size = 5450, epsilon = 0.09874
mean_reward :  0.0


  0%|          | 2526/2000001 [24:06<333:37:14,  1.66it/s]

buffer size = 5452, epsilon = 0.09874
mean_reward :  0.0


  0%|          | 2527/2000001 [24:06<314:57:33,  1.76it/s]

buffer size = 5454, epsilon = 0.09874
mean_reward :  0.0


  0%|          | 2528/2000001 [24:07<307:28:26,  1.80it/s]

buffer size = 5456, epsilon = 0.09874
mean_reward :  0.0


  0%|          | 2529/2000001 [24:07<297:31:27,  1.86it/s]

buffer size = 5458, epsilon = 0.09874
mean_reward :  0.0


  0%|          | 2530/2000001 [24:08<291:41:40,  1.90it/s]

buffer size = 5460, epsilon = 0.09874
mean_reward :  0.0


  0%|          | 2531/2000001 [24:08<286:44:32,  1.94it/s]

buffer size = 5462, epsilon = 0.09874
mean_reward :  0.0


  0%|          | 2532/2000001 [24:09<285:16:28,  1.94it/s]

buffer size = 5464, epsilon = 0.09873
mean_reward :  0.0


  0%|          | 2533/2000001 [24:09<280:51:58,  1.98it/s]

buffer size = 5466, epsilon = 0.09873
mean_reward :  0.0


  0%|          | 2534/2000001 [24:10<283:35:45,  1.96it/s]

buffer size = 5468, epsilon = 0.09873
mean_reward :  0.0


  0%|          | 2535/2000001 [24:10<280:12:18,  1.98it/s]

buffer size = 5470, epsilon = 0.09873
mean_reward :  0.0


  0%|          | 2536/2000001 [24:11<281:23:16,  1.97it/s]

buffer size = 5472, epsilon = 0.09873
mean_reward :  0.0


  0%|          | 2537/2000001 [24:11<281:25:32,  1.97it/s]

buffer size = 5474, epsilon = 0.09873
mean_reward :  0.0


  0%|          | 2538/2000001 [24:12<278:35:58,  1.99it/s]

buffer size = 5476, epsilon = 0.09873
mean_reward :  0.0


  0%|          | 2539/2000001 [24:12<276:32:49,  2.01it/s]

buffer size = 5478, epsilon = 0.09873
mean_reward :  0.0


  0%|          | 2540/2000001 [24:13<278:43:42,  1.99it/s]

buffer size = 5480, epsilon = 0.09873
mean_reward :  0.0


  0%|          | 2541/2000001 [24:13<274:08:52,  2.02it/s]

buffer size = 5482, epsilon = 0.09873
mean_reward :  0.0


  0%|          | 2542/2000001 [24:14<277:11:35,  2.00it/s]

buffer size = 5484, epsilon = 0.09873
mean_reward :  0.0


  0%|          | 2543/2000001 [24:14<274:09:05,  2.02it/s]

buffer size = 5486, epsilon = 0.09873
mean_reward :  0.0


  0%|          | 2544/2000001 [24:15<280:14:07,  1.98it/s]

buffer size = 5488, epsilon = 0.09873
mean_reward :  0.0


  0%|          | 2545/2000001 [24:16<325:28:44,  1.70it/s]

buffer size = 5490, epsilon = 0.09873
mean_reward :  0.0


  0%|          | 2546/2000001 [24:16<366:41:46,  1.51it/s]

buffer size = 5492, epsilon = 0.09873
mean_reward :  0.0


  0%|          | 2547/2000001 [24:17<387:09:33,  1.43it/s]

buffer size = 5494, epsilon = 0.09873
mean_reward :  0.0


  0%|          | 2548/2000001 [24:18<353:59:36,  1.57it/s]

buffer size = 5496, epsilon = 0.09873
mean_reward :  0.0


  0%|          | 2549/2000001 [24:18<330:23:48,  1.68it/s]

buffer size = 5498, epsilon = 0.09873
mean_reward :  0.0


  0%|          | 2550/2000001 [24:19<314:29:52,  1.76it/s]

buffer size = 5500, epsilon = 0.09873
mean_reward :  0.0


  0%|          | 2551/2000001 [24:19<305:22:10,  1.82it/s]

buffer size = 5502, epsilon = 0.09873
mean_reward :  0.0


  0%|          | 2552/2000001 [24:20<298:25:49,  1.86it/s]

buffer size = 5504, epsilon = 0.09872
mean_reward :  0.0


  0%|          | 2553/2000001 [24:20<293:59:07,  1.89it/s]

buffer size = 5506, epsilon = 0.09872
mean_reward :  0.0


  0%|          | 2554/2000001 [24:21<287:48:33,  1.93it/s]

buffer size = 5508, epsilon = 0.09872
mean_reward :  0.0


  0%|          | 2555/2000001 [24:21<286:05:53,  1.94it/s]

buffer size = 5510, epsilon = 0.09872
mean_reward :  0.0


  0%|          | 2556/2000001 [24:22<284:50:01,  1.95it/s]

buffer size = 5512, epsilon = 0.09872
mean_reward :  0.0


  0%|          | 2557/2000001 [24:22<284:26:54,  1.95it/s]

buffer size = 5514, epsilon = 0.09872
mean_reward :  0.0


  0%|          | 2558/2000001 [24:23<282:12:07,  1.97it/s]

buffer size = 5516, epsilon = 0.09872
mean_reward :  0.0


  0%|          | 2559/2000001 [24:23<279:56:37,  1.98it/s]

buffer size = 5518, epsilon = 0.09872
mean_reward :  0.0


  0%|          | 2560/2000001 [24:24<277:51:45,  2.00it/s]

buffer size = 5520, epsilon = 0.09872
mean_reward :  0.0


  0%|          | 2561/2000001 [24:24<279:44:32,  1.98it/s]

buffer size = 5522, epsilon = 0.09872
mean_reward :  0.0


  0%|          | 2562/2000001 [24:25<277:18:12,  2.00it/s]

buffer size = 5524, epsilon = 0.09872
mean_reward :  0.0


  0%|          | 2563/2000001 [24:25<277:43:29,  2.00it/s]

buffer size = 5526, epsilon = 0.09872
mean_reward :  0.0


  0%|          | 2564/2000001 [24:26<282:23:28,  1.96it/s]

buffer size = 5528, epsilon = 0.09872
mean_reward :  0.0


  0%|          | 2565/2000001 [24:26<284:14:51,  1.95it/s]

buffer size = 5530, epsilon = 0.09872
mean_reward :  0.0


  0%|          | 2566/2000001 [24:27<279:53:11,  1.98it/s]

buffer size = 5532, epsilon = 0.09872
mean_reward :  0.0


  0%|          | 2567/2000001 [24:27<296:12:27,  1.87it/s]

buffer size = 5534, epsilon = 0.09872
mean_reward :  0.0


  0%|          | 2568/2000001 [24:28<350:47:35,  1.58it/s]

buffer size = 5536, epsilon = 0.09872
mean_reward :  0.0


  0%|          | 2569/2000001 [24:29<381:40:31,  1.45it/s]

buffer size = 5538, epsilon = 0.09872
mean_reward :  0.0


  0%|          | 2570/2000001 [24:30<378:18:26,  1.47it/s]

buffer size = 5540, epsilon = 0.09872
mean_reward :  0.0


  0%|          | 2571/2000001 [24:30<349:45:29,  1.59it/s]

buffer size = 5542, epsilon = 0.09872
mean_reward :  0.0


  0%|          | 2572/2000001 [24:31<330:41:24,  1.68it/s]

buffer size = 5544, epsilon = 0.09871
mean_reward :  0.0


  0%|          | 2573/2000001 [24:31<314:54:43,  1.76it/s]

buffer size = 5546, epsilon = 0.09871
mean_reward :  0.0


  0%|          | 2574/2000001 [24:32<305:42:13,  1.81it/s]

buffer size = 5548, epsilon = 0.09871
mean_reward :  0.0


  0%|          | 2575/2000001 [24:32<298:24:40,  1.86it/s]

buffer size = 5550, epsilon = 0.09871
mean_reward :  0.0


  0%|          | 2576/2000001 [24:33<295:32:12,  1.88it/s]

buffer size = 5552, epsilon = 0.09871
mean_reward :  0.0


  0%|          | 2577/2000001 [24:33<292:18:15,  1.90it/s]

buffer size = 5554, epsilon = 0.09871
mean_reward :  0.0


  0%|          | 2578/2000001 [24:34<290:40:40,  1.91it/s]

buffer size = 5556, epsilon = 0.09871
mean_reward :  0.0


  0%|          | 2579/2000001 [24:34<286:12:06,  1.94it/s]

buffer size = 5558, epsilon = 0.09871
mean_reward :  0.0


  0%|          | 2580/2000001 [24:35<288:11:39,  1.93it/s]

buffer size = 5560, epsilon = 0.09871
mean_reward :  0.0


  0%|          | 2581/2000001 [24:35<284:03:24,  1.95it/s]

buffer size = 5562, epsilon = 0.09871
mean_reward :  0.0


  0%|          | 2582/2000001 [24:36<283:17:30,  1.96it/s]

buffer size = 5564, epsilon = 0.09871
mean_reward :  0.0


  0%|          | 2583/2000001 [24:36<282:48:59,  1.96it/s]

buffer size = 5566, epsilon = 0.09871
mean_reward :  0.0


  0%|          | 2584/2000001 [24:37<285:37:30,  1.94it/s]

buffer size = 5568, epsilon = 0.09871
mean_reward :  0.0


  0%|          | 2585/2000001 [24:37<284:30:45,  1.95it/s]

buffer size = 5570, epsilon = 0.09871
mean_reward :  0.0


  0%|          | 2586/2000001 [24:38<284:19:23,  1.95it/s]

buffer size = 5572, epsilon = 0.09871
mean_reward :  0.0


  0%|          | 2587/2000001 [24:38<282:56:54,  1.96it/s]

buffer size = 5574, epsilon = 0.09871
mean_reward :  0.0


  0%|          | 2588/2000001 [24:39<286:08:37,  1.94it/s]

buffer size = 5576, epsilon = 0.09871
mean_reward :  0.0


  0%|          | 2589/2000001 [24:39<282:10:54,  1.97it/s]

buffer size = 5578, epsilon = 0.09871
mean_reward :  0.0


  0%|          | 2590/2000001 [24:40<328:57:12,  1.69it/s]

buffer size = 5580, epsilon = 0.09871
mean_reward :  0.0


  0%|          | 2591/2000001 [24:41<362:28:08,  1.53it/s]

buffer size = 5582, epsilon = 0.09871
mean_reward :  0.0


  0%|          | 2592/2000001 [24:42<387:57:01,  1.43it/s]

buffer size = 5584, epsilon = 0.09870
mean_reward :  0.0


  0%|          | 2593/2000001 [24:42<367:24:15,  1.51it/s]

buffer size = 5586, epsilon = 0.09870
mean_reward :  0.0


  0%|          | 2594/2000001 [24:43<339:38:55,  1.63it/s]

buffer size = 5588, epsilon = 0.09870
mean_reward :  0.0


  0%|          | 2595/2000001 [24:43<322:41:52,  1.72it/s]

buffer size = 5590, epsilon = 0.09870
mean_reward :  0.0


  0%|          | 2596/2000001 [24:44<310:24:33,  1.79it/s]

buffer size = 5592, epsilon = 0.09870
mean_reward :  0.0


  0%|          | 2597/2000001 [24:44<307:29:18,  1.80it/s]

buffer size = 5594, epsilon = 0.09870
mean_reward :  0.0


  0%|          | 2598/2000001 [24:45<298:50:12,  1.86it/s]

buffer size = 5596, epsilon = 0.09870
mean_reward :  0.0


  0%|          | 2599/2000001 [24:45<299:58:14,  1.85it/s]

buffer size = 5598, epsilon = 0.09870
mean_reward :  0.0


  0%|          | 2600/2000001 [24:46<294:21:12,  1.88it/s]

buffer size = 5600, epsilon = 0.09870
mean_reward :  0.0


  0%|          | 2601/2000001 [24:46<292:34:29,  1.90it/s]

buffer size = 5602, epsilon = 0.09870
mean_reward :  0.0


  0%|          | 2602/2000001 [24:47<289:44:27,  1.91it/s]

buffer size = 5604, epsilon = 0.09870
mean_reward :  0.0


  0%|          | 2603/2000001 [24:48<291:02:05,  1.91it/s]

buffer size = 5606, epsilon = 0.09870
mean_reward :  0.0


  0%|          | 2604/2000001 [24:48<289:46:22,  1.91it/s]

buffer size = 5608, epsilon = 0.09870
mean_reward :  0.0


  0%|          | 2605/2000001 [24:49<288:27:50,  1.92it/s]

buffer size = 5610, epsilon = 0.09870
mean_reward :  0.0


  0%|          | 2606/2000001 [24:49<287:51:05,  1.93it/s]

buffer size = 5612, epsilon = 0.09870
mean_reward :  0.0


  0%|          | 2607/2000001 [24:50<291:15:24,  1.90it/s]

buffer size = 5614, epsilon = 0.09870
mean_reward :  0.0


  0%|          | 2608/2000001 [24:50<289:14:19,  1.92it/s]

buffer size = 5616, epsilon = 0.09870
mean_reward :  0.0


  0%|          | 2609/2000001 [24:51<289:43:06,  1.92it/s]

buffer size = 5618, epsilon = 0.09870
mean_reward :  0.0


  0%|          | 2610/2000001 [24:51<288:48:19,  1.92it/s]

buffer size = 5620, epsilon = 0.09870
mean_reward :  0.0


  0%|          | 2611/2000001 [24:52<294:42:28,  1.88it/s]

buffer size = 5622, epsilon = 0.09870
mean_reward :  0.0


  0%|          | 2612/2000001 [24:52<322:34:59,  1.72it/s]

buffer size = 5624, epsilon = 0.09869
mean_reward :  0.0


  0%|          | 2613/2000001 [24:53<354:13:50,  1.57it/s]

buffer size = 5626, epsilon = 0.09869
mean_reward :  0.0


  0%|          | 2614/2000001 [24:54<381:31:06,  1.45it/s]

buffer size = 5628, epsilon = 0.09869
mean_reward :  0.0


  0%|          | 2615/2000001 [24:55<389:09:08,  1.43it/s]

buffer size = 5630, epsilon = 0.09869
mean_reward :  0.0


  0%|          | 2616/2000001 [24:55<355:33:51,  1.56it/s]

buffer size = 5632, epsilon = 0.09869
mean_reward :  0.0


  0%|          | 2617/2000001 [24:56<334:05:10,  1.66it/s]

buffer size = 5634, epsilon = 0.09869
mean_reward :  0.0


  0%|          | 2618/2000001 [24:56<317:38:14,  1.75it/s]

buffer size = 5636, epsilon = 0.09869
mean_reward :  0.0


  0%|          | 2619/2000001 [24:57<306:31:39,  1.81it/s]

buffer size = 5638, epsilon = 0.09869
mean_reward :  0.0


  0%|          | 2620/2000001 [24:57<300:38:33,  1.85it/s]

buffer size = 5640, epsilon = 0.09869
mean_reward :  0.0


  0%|          | 2621/2000001 [24:58<298:19:57,  1.86it/s]

buffer size = 5642, epsilon = 0.09869
mean_reward :  0.0


  0%|          | 2622/2000001 [24:58<290:42:26,  1.91it/s]

buffer size = 5644, epsilon = 0.09869
mean_reward :  0.0


  0%|          | 2623/2000001 [24:59<292:28:11,  1.90it/s]

buffer size = 5646, epsilon = 0.09869
mean_reward :  0.0


  0%|          | 2624/2000001 [24:59<288:18:16,  1.92it/s]

buffer size = 5648, epsilon = 0.09869
mean_reward :  0.0


  0%|          | 2625/2000001 [25:00<288:32:22,  1.92it/s]

buffer size = 5650, epsilon = 0.09869
mean_reward :  0.0


  0%|          | 2626/2000001 [25:00<284:53:27,  1.95it/s]

buffer size = 5652, epsilon = 0.09869
mean_reward :  0.0


  0%|          | 2627/2000001 [25:01<284:02:00,  1.95it/s]

buffer size = 5654, epsilon = 0.09869
mean_reward :  0.0


  0%|          | 2628/2000001 [25:01<280:00:24,  1.98it/s]

buffer size = 5656, epsilon = 0.09869
mean_reward :  0.0


  0%|          | 2629/2000001 [25:02<280:31:53,  1.98it/s]

buffer size = 5658, epsilon = 0.09869
mean_reward :  0.0


  0%|          | 2630/2000001 [25:02<281:12:07,  1.97it/s]

buffer size = 5660, epsilon = 0.09869
mean_reward :  0.0


  0%|          | 2631/2000001 [25:03<283:17:57,  1.96it/s]

buffer size = 5662, epsilon = 0.09869
mean_reward :  0.0


  0%|          | 2632/2000001 [25:03<279:36:46,  1.98it/s]

buffer size = 5664, epsilon = 0.09868
mean_reward :  0.0


  0%|          | 2633/2000001 [25:04<279:33:37,  1.98it/s]

buffer size = 5666, epsilon = 0.09868
mean_reward :  0.0


  0%|          | 2634/2000001 [25:04<281:50:30,  1.97it/s]

buffer size = 5668, epsilon = 0.09868
mean_reward :  0.0


  0%|          | 2635/2000001 [25:05<319:20:19,  1.74it/s]

buffer size = 5670, epsilon = 0.09868
mean_reward :  0.0


  0%|          | 2636/2000001 [25:06<359:27:38,  1.54it/s]

buffer size = 5672, epsilon = 0.09868
mean_reward :  0.0


  0%|          | 2637/2000001 [25:07<385:09:51,  1.44it/s]

buffer size = 5674, epsilon = 0.09868
mean_reward :  0.0


  0%|          | 2638/2000001 [25:07<372:55:28,  1.49it/s]

buffer size = 5676, epsilon = 0.09868
mean_reward :  0.0


  0%|          | 2639/2000001 [25:08<343:02:59,  1.62it/s]

buffer size = 5678, epsilon = 0.09868
mean_reward :  0.0


  0%|          | 2640/2000001 [25:08<325:39:10,  1.70it/s]

buffer size = 5680, epsilon = 0.09868
mean_reward :  0.0


  0%|          | 2641/2000001 [25:09<311:35:36,  1.78it/s]

buffer size = 5682, epsilon = 0.09868
mean_reward :  0.0


  0%|          | 2642/2000001 [25:09<303:03:26,  1.83it/s]

buffer size = 5684, epsilon = 0.09868
mean_reward :  0.0


  0%|          | 2643/2000001 [25:10<294:26:18,  1.88it/s]

buffer size = 5686, epsilon = 0.09868
mean_reward :  0.0


  0%|          | 2644/2000001 [25:10<290:30:54,  1.91it/s]

buffer size = 5688, epsilon = 0.09868
mean_reward :  0.0


  0%|          | 2645/2000001 [25:11<285:59:31,  1.94it/s]

buffer size = 5690, epsilon = 0.09868
mean_reward :  0.0


  0%|          | 2646/2000001 [25:11<283:57:52,  1.95it/s]

buffer size = 5692, epsilon = 0.09868
mean_reward :  0.0


  0%|          | 2647/2000001 [25:12<282:47:12,  1.96it/s]

buffer size = 5694, epsilon = 0.09868
mean_reward :  0.0


  0%|          | 2648/2000001 [25:12<281:35:53,  1.97it/s]

buffer size = 5696, epsilon = 0.09868
mean_reward :  0.0


  0%|          | 2649/2000001 [25:13<278:44:47,  1.99it/s]

buffer size = 5698, epsilon = 0.09868
mean_reward :  0.0


  0%|          | 2650/2000001 [25:13<279:14:03,  1.99it/s]

buffer size = 5700, epsilon = 0.09868
mean_reward :  0.0


  0%|          | 2651/2000001 [25:14<280:01:58,  1.98it/s]

buffer size = 5702, epsilon = 0.09867
mean_reward :  0.0


  0%|          | 2652/2000001 [25:14<280:55:36,  1.97it/s]

buffer size = 5704, epsilon = 0.09867
mean_reward :  0.0


  0%|          | 2653/2000001 [25:15<278:50:52,  1.99it/s]

buffer size = 5706, epsilon = 0.09867
mean_reward :  0.0


  0%|          | 2654/2000001 [25:15<280:18:53,  1.98it/s]

buffer size = 5708, epsilon = 0.09867
mean_reward :  0.0


  0%|          | 2655/2000001 [25:16<279:05:42,  1.99it/s]

buffer size = 5710, epsilon = 0.09867
mean_reward :  0.0


  0%|          | 2656/2000001 [25:16<280:44:22,  1.98it/s]

buffer size = 5712, epsilon = 0.09867
mean_reward :  0.0


  0%|          | 2657/2000001 [25:17<277:33:09,  2.00it/s]

buffer size = 5714, epsilon = 0.09867
mean_reward :  0.0


  0%|          | 2658/2000001 [25:18<322:57:22,  1.72it/s]

buffer size = 5716, epsilon = 0.09867
mean_reward :  0.0


  0%|          | 2659/2000001 [25:19<369:15:50,  1.50it/s]

buffer size = 5718, epsilon = 0.09867
mean_reward :  0.0


  0%|          | 2660/2000001 [25:19<396:04:20,  1.40it/s]

buffer size = 5720, epsilon = 0.09867
mean_reward :  0.0


  0%|          | 2661/2000001 [25:20<363:31:12,  1.53it/s]

buffer size = 5722, epsilon = 0.09867
mean_reward :  0.0


  0%|          | 2662/2000001 [25:20<335:51:16,  1.65it/s]

buffer size = 5724, epsilon = 0.09867
mean_reward :  0.0


  0%|          | 2663/2000001 [25:21<319:12:57,  1.74it/s]

buffer size = 5726, epsilon = 0.09867
mean_reward :  0.0


  0%|          | 2664/2000001 [25:21<305:17:29,  1.82it/s]

buffer size = 5728, epsilon = 0.09867
mean_reward :  0.0


  0%|          | 2665/2000001 [25:22<298:34:06,  1.86it/s]

buffer size = 5730, epsilon = 0.09867
mean_reward :  0.0


  0%|          | 2666/2000001 [25:22<292:45:14,  1.90it/s]

buffer size = 5732, epsilon = 0.09867
mean_reward :  0.0


  0%|          | 2667/2000001 [25:23<288:54:39,  1.92it/s]

buffer size = 5734, epsilon = 0.09867
mean_reward :  0.0


  0%|          | 2668/2000001 [25:23<285:21:05,  1.94it/s]

buffer size = 5736, epsilon = 0.09867
mean_reward :  0.0


  0%|          | 2669/2000001 [25:24<284:43:02,  1.95it/s]

buffer size = 5738, epsilon = 0.09867
mean_reward :  0.0


  0%|          | 2670/2000001 [25:24<282:30:39,  1.96it/s]

buffer size = 5740, epsilon = 0.09867
mean_reward :  0.0


  0%|          | 2671/2000001 [25:25<283:02:32,  1.96it/s]

buffer size = 5742, epsilon = 0.09867
mean_reward :  0.0


  0%|          | 2672/2000001 [25:25<279:47:10,  1.98it/s]

buffer size = 5744, epsilon = 0.09866
mean_reward :  0.0


  0%|          | 2673/2000001 [25:26<280:22:53,  1.98it/s]

buffer size = 5746, epsilon = 0.09866
mean_reward :  0.0


  0%|          | 2674/2000001 [25:26<278:49:04,  1.99it/s]

buffer size = 5748, epsilon = 0.09866
mean_reward :  0.0


  0%|          | 2675/2000001 [25:27<280:02:07,  1.98it/s]

buffer size = 5750, epsilon = 0.09866
mean_reward :  0.0


  0%|          | 2676/2000001 [25:27<279:15:45,  1.99it/s]

buffer size = 5752, epsilon = 0.09866
mean_reward :  0.0


  0%|          | 2677/2000001 [25:28<280:35:11,  1.98it/s]

buffer size = 5754, epsilon = 0.09866
mean_reward :  0.0


  0%|          | 2678/2000001 [25:28<280:49:29,  1.98it/s]

buffer size = 5756, epsilon = 0.09866
mean_reward :  0.0


  0%|          | 2679/2000001 [25:29<281:57:38,  1.97it/s]

buffer size = 5758, epsilon = 0.09866
mean_reward :  0.0


  0%|          | 2680/2000001 [25:29<281:52:27,  1.97it/s]

buffer size = 5760, epsilon = 0.09866
mean_reward :  0.0


  0%|          | 2681/2000001 [25:30<332:04:06,  1.67it/s]

buffer size = 5762, epsilon = 0.09866
mean_reward :  0.0


  0%|          | 2682/2000001 [25:31<366:35:43,  1.51it/s]

buffer size = 5764, epsilon = 0.09866
mean_reward :  0.0


  0%|          | 2683/2000001 [25:32<388:52:01,  1.43it/s]

buffer size = 5766, epsilon = 0.09866
mean_reward :  0.0


  0%|          | 2684/2000001 [25:32<359:08:07,  1.54it/s]

buffer size = 5768, epsilon = 0.09866
mean_reward :  0.0


  0%|          | 2685/2000001 [25:33<332:50:50,  1.67it/s]

buffer size = 5770, epsilon = 0.09866
mean_reward :  0.0


  0%|          | 2686/2000001 [25:33<317:06:20,  1.75it/s]

buffer size = 5772, epsilon = 0.09866
mean_reward :  0.0


  0%|          | 2687/2000001 [25:34<304:38:08,  1.82it/s]

buffer size = 5774, epsilon = 0.09866
mean_reward :  0.0


  0%|          | 2688/2000001 [25:34<301:37:12,  1.84it/s]

buffer size = 5776, epsilon = 0.09866
mean_reward :  0.0


  0%|          | 2689/2000001 [25:35<295:03:16,  1.88it/s]

buffer size = 5778, epsilon = 0.09866
mean_reward :  0.0


  0%|          | 2690/2000001 [25:35<292:25:43,  1.90it/s]

buffer size = 5780, epsilon = 0.09866
mean_reward :  0.0


  0%|          | 2691/2000001 [25:36<286:30:36,  1.94it/s]

buffer size = 5782, epsilon = 0.09866
mean_reward :  0.0


  0%|          | 2692/2000001 [25:36<289:43:32,  1.91it/s]

buffer size = 5784, epsilon = 0.09865
mean_reward :  0.0


  0%|          | 2693/2000001 [25:37<284:15:50,  1.95it/s]

buffer size = 5786, epsilon = 0.09865
mean_reward :  0.0


  0%|          | 2694/2000001 [25:37<284:58:29,  1.95it/s]

buffer size = 5788, epsilon = 0.09865
mean_reward :  0.0


  0%|          | 2695/2000001 [25:38<281:05:07,  1.97it/s]

buffer size = 5790, epsilon = 0.09865
mean_reward :  0.0


  0%|          | 2696/2000001 [25:38<284:11:53,  1.95it/s]

buffer size = 5792, epsilon = 0.09865
mean_reward :  0.0


  0%|          | 2697/2000001 [25:39<284:44:02,  1.95it/s]

buffer size = 5794, epsilon = 0.09865
mean_reward :  0.0


  0%|          | 2698/2000001 [25:39<287:17:27,  1.93it/s]

buffer size = 5796, epsilon = 0.09865
mean_reward :  0.0


  0%|          | 2699/2000001 [25:40<285:33:26,  1.94it/s]

buffer size = 5798, epsilon = 0.09865
mean_reward :  0.0


  0%|          | 2700/2000001 [25:41<285:01:06,  1.95it/s]

buffer size = 5800, epsilon = 0.09865
mean_reward :  0.0


  0%|          | 2701/2000001 [25:41<282:12:04,  1.97it/s]

buffer size = 5802, epsilon = 0.09865
mean_reward :  0.0


  0%|          | 2702/2000001 [25:42<281:43:21,  1.97it/s]

buffer size = 5804, epsilon = 0.09865
mean_reward :  0.0


  0%|          | 2703/2000001 [25:42<302:59:09,  1.83it/s]

buffer size = 5806, epsilon = 0.09865
mean_reward :  0.0


  0%|          | 2704/2000001 [25:43<350:36:24,  1.58it/s]

buffer size = 5808, epsilon = 0.09865
mean_reward :  0.0


  0%|          | 2705/2000001 [25:44<377:18:25,  1.47it/s]

buffer size = 5810, epsilon = 0.09865
mean_reward :  0.0


  0%|          | 2706/2000001 [25:44<374:39:07,  1.48it/s]

buffer size = 5812, epsilon = 0.09865
mean_reward :  0.0


  0%|          | 2707/2000001 [25:45<346:10:19,  1.60it/s]

buffer size = 5814, epsilon = 0.09865
mean_reward :  0.0


  0%|          | 2708/2000001 [25:45<329:34:53,  1.68it/s]

buffer size = 5816, epsilon = 0.09865
mean_reward :  0.0


  0%|          | 2709/2000001 [25:46<313:29:32,  1.77it/s]

buffer size = 5818, epsilon = 0.09865
mean_reward :  0.0


  0%|          | 2710/2000001 [25:46<303:20:35,  1.83it/s]

buffer size = 5820, epsilon = 0.09865
mean_reward :  0.0


  0%|          | 2711/2000001 [25:47<298:27:17,  1.86it/s]

buffer size = 5822, epsilon = 0.09865
mean_reward :  0.0


  0%|          | 2712/2000001 [25:47<291:12:42,  1.91it/s]

buffer size = 5824, epsilon = 0.09864
mean_reward :  0.0


  0%|          | 2713/2000001 [25:48<291:14:05,  1.90it/s]

buffer size = 5826, epsilon = 0.09864
mean_reward :  0.0


  0%|          | 2714/2000001 [25:49<287:50:18,  1.93it/s]

buffer size = 5828, epsilon = 0.09864
mean_reward :  0.0


  0%|          | 2715/2000001 [25:49<285:08:01,  1.95it/s]

buffer size = 5830, epsilon = 0.09864
mean_reward :  0.0


  0%|          | 2716/2000001 [25:50<289:37:04,  1.92it/s]

buffer size = 5832, epsilon = 0.09864
mean_reward :  0.0


  0%|          | 2717/2000001 [25:50<288:12:38,  1.92it/s]

buffer size = 5834, epsilon = 0.09864
mean_reward :  0.0


  0%|          | 2718/2000001 [25:51<284:11:56,  1.95it/s]

buffer size = 5836, epsilon = 0.09864
mean_reward :  0.0


  0%|          | 2719/2000001 [25:51<286:17:57,  1.94it/s]

buffer size = 5838, epsilon = 0.09864
mean_reward :  0.0


  0%|          | 2720/2000001 [25:52<284:28:29,  1.95it/s]

buffer size = 5840, epsilon = 0.09864
mean_reward :  0.0


  0%|          | 2721/2000001 [25:52<285:38:39,  1.94it/s]

buffer size = 5842, epsilon = 0.09864
mean_reward :  0.0


  0%|          | 2722/2000001 [25:53<283:01:28,  1.96it/s]

buffer size = 5844, epsilon = 0.09864
mean_reward :  0.0


  0%|          | 2723/2000001 [25:53<282:58:56,  1.96it/s]

buffer size = 5846, epsilon = 0.09864
mean_reward :  0.0


  0%|          | 2724/2000001 [25:54<282:09:50,  1.97it/s]

buffer size = 5848, epsilon = 0.09864
mean_reward :  0.0


  0%|          | 2725/2000001 [25:54<282:25:49,  1.96it/s]

buffer size = 5850, epsilon = 0.09864
mean_reward :  0.0


  0%|          | 2726/2000001 [25:55<331:54:19,  1.67it/s]

buffer size = 5852, epsilon = 0.09864
mean_reward :  0.0


  0%|          | 2727/2000001 [25:56<360:12:31,  1.54it/s]

buffer size = 5854, epsilon = 0.09864
mean_reward :  0.0


  0%|          | 2728/2000001 [25:57<385:03:29,  1.44it/s]

buffer size = 5856, epsilon = 0.09864
mean_reward :  0.0


  0%|          | 2729/2000001 [25:57<365:49:57,  1.52it/s]

buffer size = 5858, epsilon = 0.09864
mean_reward :  0.0


  0%|          | 2730/2000001 [25:58<341:24:29,  1.63it/s]

buffer size = 5860, epsilon = 0.09864
mean_reward :  0.0


  0%|          | 2731/2000001 [25:58<322:52:34,  1.72it/s]

buffer size = 5862, epsilon = 0.09864
mean_reward :  0.0


  0%|          | 2732/2000001 [25:59<309:31:29,  1.79it/s]

buffer size = 5864, epsilon = 0.09863
mean_reward :  0.0


  0%|          | 2733/2000001 [25:59<300:47:10,  1.84it/s]

buffer size = 5866, epsilon = 0.09863
mean_reward :  0.0


  0%|          | 2734/2000001 [26:00<295:11:55,  1.88it/s]

buffer size = 5868, epsilon = 0.09863
mean_reward :  0.0


  0%|          | 2735/2000001 [26:00<291:26:34,  1.90it/s]

buffer size = 5870, epsilon = 0.09863
mean_reward :  0.0


  0%|          | 2736/2000001 [26:01<287:54:44,  1.93it/s]

buffer size = 5872, epsilon = 0.09863
mean_reward :  0.0


  0%|          | 2737/2000001 [26:01<286:35:16,  1.94it/s]

buffer size = 5874, epsilon = 0.09863
mean_reward :  0.0


  0%|          | 2738/2000001 [26:02<285:43:39,  1.94it/s]

buffer size = 5876, epsilon = 0.09863
mean_reward :  0.0


  0%|          | 2739/2000001 [26:02<288:07:03,  1.93it/s]

buffer size = 5878, epsilon = 0.09863
mean_reward :  0.0


  0%|          | 2740/2000001 [26:03<285:07:38,  1.95it/s]

buffer size = 5880, epsilon = 0.09863
mean_reward :  0.0


  0%|          | 2741/2000001 [26:03<283:59:50,  1.95it/s]

buffer size = 5882, epsilon = 0.09863
mean_reward :  0.0


  0%|          | 2742/2000001 [26:04<282:00:15,  1.97it/s]

buffer size = 5884, epsilon = 0.09863
mean_reward :  0.0


  0%|          | 2743/2000001 [26:04<282:41:17,  1.96it/s]

buffer size = 5886, epsilon = 0.09863
mean_reward :  0.0


  0%|          | 2744/2000001 [26:05<284:59:43,  1.95it/s]

buffer size = 5888, epsilon = 0.09863
mean_reward :  0.0


  0%|          | 2745/2000001 [26:05<284:19:02,  1.95it/s]

buffer size = 5890, epsilon = 0.09863
mean_reward :  0.0


  0%|          | 2746/2000001 [26:06<282:13:33,  1.97it/s]

buffer size = 5892, epsilon = 0.09863
mean_reward :  0.0


  0%|          | 2747/2000001 [26:06<281:06:40,  1.97it/s]

buffer size = 5894, epsilon = 0.09863
mean_reward :  0.0


  0%|          | 2748/2000001 [26:07<282:37:51,  1.96it/s]

buffer size = 5896, epsilon = 0.09863
mean_reward :  0.0


  0%|          | 2749/2000001 [26:08<332:38:00,  1.67it/s]

buffer size = 5898, epsilon = 0.09863
mean_reward :  0.0


  0%|          | 2750/2000001 [26:08<365:29:49,  1.52it/s]

buffer size = 5900, epsilon = 0.09863
mean_reward :  0.0


  0%|          | 2751/2000001 [26:09<392:02:38,  1.42it/s]

buffer size = 5902, epsilon = 0.09863
mean_reward :  0.0


  0%|          | 2752/2000001 [26:10<366:40:22,  1.51it/s]

buffer size = 5904, epsilon = 0.09862
mean_reward :  0.0


  0%|          | 2753/2000001 [26:10<339:14:41,  1.64it/s]

buffer size = 5906, epsilon = 0.09862
mean_reward :  0.0


  0%|          | 2754/2000001 [26:11<324:25:31,  1.71it/s]

buffer size = 5908, epsilon = 0.09862
mean_reward :  0.0


  0%|          | 2755/2000001 [26:11<312:27:47,  1.78it/s]

buffer size = 5910, epsilon = 0.09862
mean_reward :  0.0


  0%|          | 2756/2000001 [26:12<304:47:02,  1.82it/s]

buffer size = 5912, epsilon = 0.09862
mean_reward :  0.0


  0%|          | 2757/2000001 [26:12<298:41:55,  1.86it/s]

buffer size = 5914, epsilon = 0.09862
mean_reward :  0.0


  0%|          | 2758/2000001 [26:13<298:20:00,  1.86it/s]

buffer size = 5916, epsilon = 0.09862
mean_reward :  0.0


  0%|          | 2759/2000001 [26:13<291:24:13,  1.90it/s]

buffer size = 5918, epsilon = 0.09862
mean_reward :  0.0


  0%|          | 2760/2000001 [26:14<292:19:25,  1.90it/s]

buffer size = 5920, epsilon = 0.09862
mean_reward :  0.0


  0%|          | 2761/2000001 [26:14<288:22:18,  1.92it/s]

buffer size = 5922, epsilon = 0.09862
mean_reward :  0.0


  0%|          | 2762/2000001 [26:15<289:10:41,  1.92it/s]

buffer size = 5924, epsilon = 0.09862
mean_reward :  0.0


  0%|          | 2763/2000001 [26:15<286:08:35,  1.94it/s]

buffer size = 5926, epsilon = 0.09862
mean_reward :  0.0


  0%|          | 2764/2000001 [26:16<283:51:05,  1.95it/s]

buffer size = 5928, epsilon = 0.09862
mean_reward :  0.0


  0%|          | 2765/2000001 [26:16<280:06:00,  1.98it/s]

buffer size = 5930, epsilon = 0.09862
mean_reward :  0.0


  0%|          | 2766/2000001 [26:17<280:27:42,  1.98it/s]

buffer size = 5932, epsilon = 0.09862
mean_reward :  0.0


  0%|          | 2767/2000001 [26:17<277:56:00,  2.00it/s]

buffer size = 5934, epsilon = 0.09862
mean_reward :  0.0


  0%|          | 2768/2000001 [26:18<283:15:12,  1.96it/s]

buffer size = 5936, epsilon = 0.09862
mean_reward :  0.0


  0%|          | 2769/2000001 [26:18<279:09:21,  1.99it/s]

buffer size = 5938, epsilon = 0.09862
mean_reward :  0.0


  0%|          | 2770/2000001 [26:19<280:55:04,  1.97it/s]

buffer size = 5940, epsilon = 0.09862
mean_reward :  0.0


  0%|          | 2771/2000001 [26:20<296:45:38,  1.87it/s]

buffer size = 5942, epsilon = 0.09862
mean_reward :  0.0


  0%|          | 2772/2000001 [26:20<348:33:26,  1.59it/s]

buffer size = 5944, epsilon = 0.09861
mean_reward :  0.0


  0%|          | 2773/2000001 [26:21<377:18:54,  1.47it/s]

buffer size = 5946, epsilon = 0.09861
mean_reward :  0.0


  0%|          | 2774/2000001 [26:22<386:22:38,  1.44it/s]

buffer size = 5948, epsilon = 0.09861
mean_reward :  0.0


  0%|          | 2775/2000001 [26:22<356:12:44,  1.56it/s]

buffer size = 5950, epsilon = 0.09861
mean_reward :  0.0


  0%|          | 2776/2000001 [26:23<336:22:45,  1.65it/s]

buffer size = 5952, epsilon = 0.09861
mean_reward :  0.0


  0%|          | 2777/2000001 [26:23<318:15:15,  1.74it/s]

buffer size = 5954, epsilon = 0.09861
mean_reward :  0.0


  0%|          | 2778/2000001 [26:24<311:17:55,  1.78it/s]

buffer size = 5956, epsilon = 0.09861
mean_reward :  0.0


  0%|          | 2779/2000001 [26:24<303:32:13,  1.83it/s]

buffer size = 5958, epsilon = 0.09861
mean_reward :  0.0


  0%|          | 2780/2000001 [26:25<299:26:56,  1.85it/s]

buffer size = 5960, epsilon = 0.09861
mean_reward :  0.0


  0%|          | 2781/2000001 [26:26<292:16:31,  1.90it/s]

buffer size = 5962, epsilon = 0.09861
mean_reward :  0.0


  0%|          | 2782/2000001 [26:26<293:53:51,  1.89it/s]

buffer size = 5964, epsilon = 0.09861
mean_reward :  0.0


  0%|          | 2783/2000001 [26:27<288:47:11,  1.92it/s]

buffer size = 5966, epsilon = 0.09861
mean_reward :  0.0


  0%|          | 2784/2000001 [26:27<290:09:19,  1.91it/s]

buffer size = 5968, epsilon = 0.09861
mean_reward :  0.0


  0%|          | 2785/2000001 [26:28<287:23:45,  1.93it/s]

buffer size = 5970, epsilon = 0.09861
mean_reward :  0.0


  0%|          | 2786/2000001 [26:28<285:41:46,  1.94it/s]

buffer size = 5972, epsilon = 0.09861
mean_reward :  0.0


  0%|          | 2787/2000001 [26:29<283:02:54,  1.96it/s]

buffer size = 5974, epsilon = 0.09861
mean_reward :  0.0


  0%|          | 2788/2000001 [26:29<283:07:27,  1.96it/s]

buffer size = 5976, epsilon = 0.09861
mean_reward :  0.0


  0%|          | 2789/2000001 [26:30<280:30:16,  1.98it/s]

buffer size = 5978, epsilon = 0.09861
mean_reward :  0.0


  0%|          | 2790/2000001 [26:30<281:29:28,  1.97it/s]

buffer size = 5980, epsilon = 0.09861
mean_reward :  0.0


  0%|          | 2791/2000001 [26:31<281:19:13,  1.97it/s]

buffer size = 5982, epsilon = 0.09861
mean_reward :  0.0


  0%|          | 2792/2000001 [26:31<286:53:37,  1.93it/s]

buffer size = 5984, epsilon = 0.09860
mean_reward :  0.0


  0%|          | 2793/2000001 [26:32<285:00:25,  1.95it/s]

buffer size = 5986, epsilon = 0.09860
mean_reward :  0.0


  0%|          | 2794/2000001 [26:32<329:33:46,  1.68it/s]

buffer size = 5988, epsilon = 0.09860
mean_reward :  0.0


  0%|          | 2795/2000001 [26:33<364:32:28,  1.52it/s]

buffer size = 5990, epsilon = 0.09860
mean_reward :  0.0


  0%|          | 2796/2000001 [26:34<388:36:08,  1.43it/s]

buffer size = 5992, epsilon = 0.09860
mean_reward :  0.0


  0%|          | 2797/2000001 [26:35<378:02:24,  1.47it/s]

buffer size = 5994, epsilon = 0.09860
mean_reward :  0.0


  0%|          | 2798/2000001 [26:35<347:26:53,  1.60it/s]

buffer size = 5996, epsilon = 0.09860
mean_reward :  0.0


  0%|          | 2799/2000001 [26:36<330:41:34,  1.68it/s]

buffer size = 5998, epsilon = 0.09860
mean_reward :  0.0


  0%|          | 2800/2000001 [26:36<317:28:17,  1.75it/s]

buffer size = 6000, epsilon = 0.09860
mean_reward :  0.0


  0%|          | 2801/2000001 [26:37<311:54:30,  1.78it/s]

buffer size = 6002, epsilon = 0.09860
mean_reward :  0.0


  0%|          | 2802/2000001 [26:37<305:09:27,  1.82it/s]

buffer size = 6004, epsilon = 0.09860
mean_reward :  0.0


  0%|          | 2803/2000001 [26:38<298:56:32,  1.86it/s]

buffer size = 6006, epsilon = 0.09860
mean_reward :  0.0


  0%|          | 2804/2000001 [26:38<293:57:04,  1.89it/s]

buffer size = 6008, epsilon = 0.09860
mean_reward :  0.0


  0%|          | 2805/2000001 [26:39<294:29:24,  1.88it/s]

buffer size = 6010, epsilon = 0.09860
mean_reward :  0.0


  0%|          | 2806/2000001 [26:39<292:34:17,  1.90it/s]

buffer size = 6012, epsilon = 0.09860
mean_reward :  0.0


  0%|          | 2807/2000001 [26:40<290:09:34,  1.91it/s]

buffer size = 6014, epsilon = 0.09860
mean_reward :  0.0


  0%|          | 2808/2000001 [26:40<289:51:32,  1.91it/s]

buffer size = 6016, epsilon = 0.09860
mean_reward :  0.0


  0%|          | 2809/2000001 [26:41<297:22:28,  1.87it/s]

buffer size = 6018, epsilon = 0.09860
mean_reward :  0.0


  0%|          | 2810/2000001 [26:41<292:40:46,  1.90it/s]

buffer size = 6020, epsilon = 0.09860
mean_reward :  0.0


  0%|          | 2811/2000001 [26:42<292:55:46,  1.89it/s]

buffer size = 6022, epsilon = 0.09860
mean_reward :  0.0


  0%|          | 2812/2000001 [26:43<290:50:35,  1.91it/s]

buffer size = 6024, epsilon = 0.09859
mean_reward :  0.0


  0%|          | 2813/2000001 [26:43<289:54:01,  1.91it/s]

buffer size = 6026, epsilon = 0.09859
mean_reward :  0.0


  0%|          | 2814/2000001 [26:44<287:30:29,  1.93it/s]

buffer size = 6028, epsilon = 0.09859
mean_reward :  0.0


  0%|          | 2815/2000001 [26:44<287:42:25,  1.93it/s]

buffer size = 6030, epsilon = 0.09859
mean_reward :  0.0


  0%|          | 2816/2000001 [26:45<323:52:42,  1.71it/s]

buffer size = 6032, epsilon = 0.09859
mean_reward :  0.0


  0%|          | 2817/2000001 [26:46<369:47:31,  1.50it/s]

buffer size = 6034, epsilon = 0.09859
mean_reward :  0.0


  0%|          | 2818/2000001 [26:46<396:23:24,  1.40it/s]

buffer size = 6036, epsilon = 0.09859
mean_reward :  0.0


  0%|          | 2819/2000001 [26:47<375:35:52,  1.48it/s]

buffer size = 6038, epsilon = 0.09859
mean_reward :  0.0


  0%|          | 2820/2000001 [26:48<348:53:57,  1.59it/s]

buffer size = 6040, epsilon = 0.09859
mean_reward :  0.0


  0%|          | 2821/2000001 [26:48<336:11:02,  1.65it/s]

buffer size = 6042, epsilon = 0.09859
mean_reward :  0.0


  0%|          | 2822/2000001 [26:49<318:55:12,  1.74it/s]

buffer size = 6044, epsilon = 0.09859
mean_reward :  0.0


  0%|          | 2823/2000001 [26:49<312:23:41,  1.78it/s]

buffer size = 6046, epsilon = 0.09859
mean_reward :  0.0


  0%|          | 2824/2000001 [26:50<304:32:49,  1.82it/s]

buffer size = 6048, epsilon = 0.09859
mean_reward :  0.0


  0%|          | 2825/2000001 [26:50<301:24:31,  1.84it/s]

buffer size = 6050, epsilon = 0.09859
mean_reward :  0.0


  0%|          | 2826/2000001 [26:51<296:47:52,  1.87it/s]

buffer size = 6052, epsilon = 0.09859
mean_reward :  0.0


  0%|          | 2827/2000001 [26:51<297:14:00,  1.87it/s]

buffer size = 6054, epsilon = 0.09859
mean_reward :  0.0


  0%|          | 2828/2000001 [26:52<292:31:58,  1.90it/s]

buffer size = 6056, epsilon = 0.09859
mean_reward :  0.0


  0%|          | 2829/2000001 [26:52<292:25:40,  1.90it/s]

buffer size = 6058, epsilon = 0.09859
mean_reward :  0.0


  0%|          | 2830/2000001 [26:53<289:30:25,  1.92it/s]

buffer size = 6060, epsilon = 0.09859
mean_reward :  0.0


  0%|          | 2831/2000001 [26:53<291:47:16,  1.90it/s]

buffer size = 6062, epsilon = 0.09859
mean_reward :  0.0


  0%|          | 2832/2000001 [26:54<290:02:11,  1.91it/s]

buffer size = 6064, epsilon = 0.09858
mean_reward :  0.0


  0%|          | 2833/2000001 [26:54<296:14:31,  1.87it/s]

buffer size = 6066, epsilon = 0.09858
mean_reward :  0.0


  0%|          | 2834/2000001 [26:55<291:27:23,  1.90it/s]

buffer size = 6068, epsilon = 0.09858
mean_reward :  0.0


  0%|          | 2835/2000001 [26:55<289:52:38,  1.91it/s]

buffer size = 6070, epsilon = 0.09858
mean_reward :  0.0


  0%|          | 2836/2000001 [26:56<288:35:47,  1.92it/s]

buffer size = 6072, epsilon = 0.09858
mean_reward :  0.0


  0%|          | 2837/2000001 [26:56<288:50:49,  1.92it/s]

buffer size = 6074, epsilon = 0.09858
mean_reward :  0.0


  0%|          | 2838/2000001 [26:57<321:49:31,  1.72it/s]

buffer size = 6076, epsilon = 0.09858
mean_reward :  0.0


  0%|          | 2839/2000001 [26:58<361:51:04,  1.53it/s]

buffer size = 6078, epsilon = 0.09858
mean_reward :  0.0


  0%|          | 2840/2000001 [26:59<389:21:42,  1.42it/s]

buffer size = 6080, epsilon = 0.09858
mean_reward :  0.0


  0%|          | 2841/2000001 [26:59<379:08:20,  1.46it/s]

buffer size = 6082, epsilon = 0.09858
mean_reward :  0.0


  0%|          | 2842/2000001 [27:00<349:01:37,  1.59it/s]

buffer size = 6084, epsilon = 0.09858
mean_reward :  0.0


  0%|          | 2843/2000001 [27:01<332:30:27,  1.67it/s]

buffer size = 6086, epsilon = 0.09858
mean_reward :  0.0


  0%|          | 2844/2000001 [27:01<319:09:11,  1.74it/s]

buffer size = 6088, epsilon = 0.09858
mean_reward :  0.0


  0%|          | 2845/2000001 [27:02<309:36:58,  1.79it/s]

buffer size = 6090, epsilon = 0.09858
mean_reward :  0.0


  0%|          | 2846/2000001 [27:02<300:49:04,  1.84it/s]

buffer size = 6092, epsilon = 0.09858
mean_reward :  0.0


  0%|          | 2847/2000001 [27:03<296:37:24,  1.87it/s]

buffer size = 6094, epsilon = 0.09858
mean_reward :  0.0


  0%|          | 2848/2000001 [27:03<291:41:57,  1.90it/s]

buffer size = 6096, epsilon = 0.09858
mean_reward :  0.0


  0%|          | 2849/2000001 [27:04<289:43:27,  1.91it/s]

buffer size = 6098, epsilon = 0.09858
mean_reward :  0.0


  0%|          | 2850/2000001 [27:04<289:30:51,  1.92it/s]

buffer size = 6100, epsilon = 0.09858
mean_reward :  0.0


  0%|          | 2851/2000001 [27:05<292:17:34,  1.90it/s]

buffer size = 6102, epsilon = 0.09858
mean_reward :  0.0


  0%|          | 2852/2000001 [27:05<287:54:29,  1.93it/s]

buffer size = 6104, epsilon = 0.09857
mean_reward :  0.0


  0%|          | 2853/2000001 [27:06<289:40:28,  1.92it/s]

buffer size = 6106, epsilon = 0.09857
mean_reward :  0.0


  0%|          | 2854/2000001 [27:06<287:34:12,  1.93it/s]

buffer size = 6108, epsilon = 0.09857
mean_reward :  0.0


  0%|          | 2855/2000001 [27:07<291:39:06,  1.90it/s]

buffer size = 6110, epsilon = 0.09857
mean_reward :  0.0


  0%|          | 2856/2000001 [27:07<290:11:07,  1.91it/s]

buffer size = 6112, epsilon = 0.09857
mean_reward :  0.0


  0%|          | 2857/2000001 [27:08<290:58:10,  1.91it/s]

buffer size = 6114, epsilon = 0.09857
mean_reward :  0.0


  0%|          | 2858/2000001 [27:08<290:07:27,  1.91it/s]

buffer size = 6116, epsilon = 0.09857
mean_reward :  0.0


  0%|          | 2859/2000001 [27:09<291:13:22,  1.90it/s]

buffer size = 6118, epsilon = 0.09857
mean_reward :  0.0


  0%|          | 2860/2000001 [27:09<300:44:28,  1.84it/s]

buffer size = 6120, epsilon = 0.09857
mean_reward :  0.0


  0%|          | 2861/2000001 [27:10<351:27:58,  1.58it/s]

buffer size = 6122, epsilon = 0.09857
mean_reward :  0.0


  0%|          | 2862/2000001 [27:11<388:13:10,  1.43it/s]

buffer size = 6124, epsilon = 0.09857
mean_reward :  0.0


  0%|          | 2863/2000001 [27:12<397:25:57,  1.40it/s]

buffer size = 6126, epsilon = 0.09857
mean_reward :  0.0


  0%|          | 2864/2000001 [27:12<364:25:30,  1.52it/s]

buffer size = 6128, epsilon = 0.09857
mean_reward :  0.0


  0%|          | 2865/2000001 [27:13<343:02:25,  1.62it/s]

buffer size = 6130, epsilon = 0.09857
mean_reward :  0.0


  0%|          | 2866/2000001 [27:13<326:32:14,  1.70it/s]

buffer size = 6132, epsilon = 0.09857
mean_reward :  0.0


  0%|          | 2867/2000001 [27:14<320:03:17,  1.73it/s]

buffer size = 6134, epsilon = 0.09857
mean_reward :  0.0


  0%|          | 2868/2000001 [27:14<307:04:41,  1.81it/s]

buffer size = 6136, epsilon = 0.09857
mean_reward :  0.0


  0%|          | 2869/2000001 [27:15<302:03:43,  1.84it/s]

buffer size = 6138, epsilon = 0.09857
mean_reward :  0.0


  0%|          | 2870/2000001 [27:16<299:23:20,  1.85it/s]

buffer size = 6140, epsilon = 0.09857
mean_reward :  0.0


  0%|          | 2871/2000001 [27:16<296:19:59,  1.87it/s]

buffer size = 6142, epsilon = 0.09856
mean_reward :  0.0


  0%|          | 2872/2000001 [27:17<291:44:25,  1.90it/s]

buffer size = 6144, epsilon = 0.09856
mean_reward :  0.0


  0%|          | 2873/2000001 [27:17<292:37:24,  1.90it/s]

buffer size = 6146, epsilon = 0.09856
mean_reward :  0.0


  0%|          | 2874/2000001 [27:18<288:08:32,  1.93it/s]

buffer size = 6148, epsilon = 0.09856
mean_reward :  0.0


  0%|          | 2875/2000001 [27:18<291:11:34,  1.91it/s]

buffer size = 6150, epsilon = 0.09856
mean_reward :  0.0


  0%|          | 2876/2000001 [27:19<288:43:18,  1.92it/s]

buffer size = 6152, epsilon = 0.09856
mean_reward :  0.0


  0%|          | 2877/2000001 [27:19<292:21:00,  1.90it/s]

buffer size = 6154, epsilon = 0.09856
mean_reward :  0.0


  0%|          | 2878/2000001 [27:20<292:39:29,  1.90it/s]

buffer size = 6156, epsilon = 0.09856
mean_reward :  0.0


  0%|          | 2879/2000001 [27:20<292:49:28,  1.89it/s]

buffer size = 6158, epsilon = 0.09856
mean_reward :  0.0


  0%|          | 2880/2000001 [27:21<289:54:55,  1.91it/s]

buffer size = 6160, epsilon = 0.09856
mean_reward :  0.0


  0%|          | 2881/2000001 [27:21<290:18:30,  1.91it/s]

buffer size = 6162, epsilon = 0.09856
mean_reward :  0.0


  0%|          | 2882/2000001 [27:22<289:09:00,  1.92it/s]

buffer size = 6164, epsilon = 0.09856
mean_reward :  0.0


  0%|          | 2883/2000001 [27:23<338:41:56,  1.64it/s]

buffer size = 6166, epsilon = 0.09856
mean_reward :  0.0


  0%|          | 2884/2000001 [27:23<378:40:01,  1.47it/s]

buffer size = 6168, epsilon = 0.09856
mean_reward :  0.0


  0%|          | 2885/2000001 [27:24<392:25:18,  1.41it/s]

buffer size = 6170, epsilon = 0.09856
mean_reward :  0.0


  0%|          | 2886/2000001 [27:25<358:46:58,  1.55it/s]

buffer size = 6172, epsilon = 0.09856
mean_reward :  0.0


  0%|          | 2887/2000001 [27:25<335:22:54,  1.65it/s]

buffer size = 6174, epsilon = 0.09856
mean_reward :  0.0


  0%|          | 2888/2000001 [27:26<319:51:11,  1.73it/s]

buffer size = 6176, epsilon = 0.09856
mean_reward :  0.0


  0%|          | 2889/2000001 [27:26<308:22:32,  1.80it/s]

buffer size = 6178, epsilon = 0.09856
mean_reward :  0.0


  0%|          | 2890/2000001 [27:27<303:06:01,  1.83it/s]

buffer size = 6180, epsilon = 0.09856
mean_reward :  0.0


  0%|          | 2891/2000001 [27:27<298:10:17,  1.86it/s]

buffer size = 6182, epsilon = 0.09856
mean_reward :  0.0


  0%|          | 2892/2000001 [27:28<293:05:35,  1.89it/s]

buffer size = 6184, epsilon = 0.09855
mean_reward :  0.0


  0%|          | 2893/2000001 [27:28<292:38:39,  1.90it/s]

buffer size = 6186, epsilon = 0.09855
mean_reward :  0.0


  0%|          | 2894/2000001 [27:29<292:52:10,  1.89it/s]

buffer size = 6188, epsilon = 0.09855
mean_reward :  0.0


  0%|          | 2895/2000001 [27:29<291:55:39,  1.90it/s]

buffer size = 6190, epsilon = 0.09855
mean_reward :  0.0


  0%|          | 2896/2000001 [27:30<292:27:10,  1.90it/s]

buffer size = 6192, epsilon = 0.09855
mean_reward :  0.0


  0%|          | 2897/2000001 [27:30<288:35:44,  1.92it/s]

buffer size = 6194, epsilon = 0.09855
mean_reward :  0.0


  0%|          | 2898/2000001 [27:31<290:47:58,  1.91it/s]

buffer size = 6196, epsilon = 0.09855
mean_reward :  0.0


  0%|          | 2899/2000001 [27:31<289:23:08,  1.92it/s]

buffer size = 6198, epsilon = 0.09855
mean_reward :  0.0


  0%|          | 2900/2000001 [27:32<289:06:40,  1.92it/s]

buffer size = 6200, epsilon = 0.09855
mean_reward :  0.0


  0%|          | 2901/2000001 [27:32<286:08:08,  1.94it/s]

buffer size = 6202, epsilon = 0.09855
mean_reward :  0.0


  0%|          | 2902/2000001 [27:33<288:13:10,  1.92it/s]

buffer size = 6204, epsilon = 0.09855
mean_reward :  0.0


  0%|          | 2903/2000001 [27:34<286:10:08,  1.94it/s]

buffer size = 6206, epsilon = 0.09855
mean_reward :  0.0


  0%|          | 2904/2000001 [27:34<286:47:44,  1.93it/s]

buffer size = 6208, epsilon = 0.09855
mean_reward :  0.0


  0%|          | 2905/2000001 [27:35<331:21:47,  1.67it/s]

buffer size = 6210, epsilon = 0.09855
mean_reward :  0.0


  0%|          | 2906/2000001 [27:36<373:39:37,  1.48it/s]

buffer size = 6212, epsilon = 0.09855
mean_reward :  0.0


  0%|          | 2907/2000001 [27:36<397:10:35,  1.40it/s]

buffer size = 6214, epsilon = 0.09855
mean_reward :  0.0


  0%|          | 2908/2000001 [27:37<365:36:43,  1.52it/s]

buffer size = 6216, epsilon = 0.09855
mean_reward :  0.0


  0%|          | 2909/2000001 [27:38<339:38:16,  1.63it/s]

buffer size = 6218, epsilon = 0.09855
mean_reward :  0.0


  0%|          | 2910/2000001 [27:38<325:33:55,  1.70it/s]

buffer size = 6220, epsilon = 0.09855
mean_reward :  0.0


  0%|          | 2911/2000001 [27:39<309:52:47,  1.79it/s]

buffer size = 6222, epsilon = 0.09855
mean_reward :  0.0


  0%|          | 2912/2000001 [27:39<307:23:33,  1.80it/s]

buffer size = 6224, epsilon = 0.09854
mean_reward :  0.0


  0%|          | 2913/2000001 [27:40<300:24:06,  1.85it/s]

buffer size = 6226, epsilon = 0.09854
mean_reward :  0.0


  0%|          | 2914/2000001 [27:40<296:50:39,  1.87it/s]

buffer size = 6228, epsilon = 0.09854
mean_reward :  0.0


  0%|          | 2915/2000001 [27:41<291:04:51,  1.91it/s]

buffer size = 6230, epsilon = 0.09854
mean_reward :  0.0


  0%|          | 2916/2000001 [27:41<290:17:47,  1.91it/s]

buffer size = 6232, epsilon = 0.09854
mean_reward :  0.0


  0%|          | 2917/2000001 [27:42<286:19:03,  1.94it/s]

buffer size = 6234, epsilon = 0.09854
mean_reward :  0.0


  0%|          | 2918/2000001 [27:42<288:22:55,  1.92it/s]

buffer size = 6236, epsilon = 0.09854
mean_reward :  0.0


  0%|          | 2919/2000001 [27:43<286:09:54,  1.94it/s]

buffer size = 6238, epsilon = 0.09854
mean_reward :  0.0


  0%|          | 2920/2000001 [27:43<288:16:16,  1.92it/s]

buffer size = 6240, epsilon = 0.09854
mean_reward :  0.0


  0%|          | 2921/2000001 [27:44<285:10:45,  1.95it/s]

buffer size = 6242, epsilon = 0.09854
mean_reward :  0.0


  0%|          | 2922/2000001 [27:44<286:32:38,  1.94it/s]

buffer size = 6244, epsilon = 0.09854
mean_reward :  0.0


  0%|          | 2923/2000001 [27:45<284:56:47,  1.95it/s]

buffer size = 6246, epsilon = 0.09854
mean_reward :  0.0


  0%|          | 2924/2000001 [27:45<288:18:57,  1.92it/s]

buffer size = 6248, epsilon = 0.09854
mean_reward :  0.0


  0%|          | 2925/2000001 [27:46<285:14:16,  1.94it/s]

buffer size = 6250, epsilon = 0.09854
mean_reward :  0.0


  0%|          | 2926/2000001 [27:46<285:32:27,  1.94it/s]

buffer size = 6252, epsilon = 0.09854
mean_reward :  0.0


  0%|          | 2927/2000001 [27:47<319:47:10,  1.73it/s]

buffer size = 6254, epsilon = 0.09854
mean_reward :  0.0


  0%|          | 2928/2000001 [27:48<361:11:34,  1.54it/s]

buffer size = 6256, epsilon = 0.09854
mean_reward :  0.0


  0%|          | 2929/2000001 [27:49<392:24:18,  1.41it/s]

buffer size = 6258, epsilon = 0.09854
mean_reward :  0.0


  0%|          | 2930/2000001 [27:49<377:46:06,  1.47it/s]

buffer size = 6260, epsilon = 0.09854
mean_reward :  0.0


  0%|          | 2931/2000001 [27:50<348:28:04,  1.59it/s]

buffer size = 6262, epsilon = 0.09854
mean_reward :  0.0


  0%|          | 2932/2000001 [27:50<329:40:59,  1.68it/s]

buffer size = 6264, epsilon = 0.09853
mean_reward :  0.0


  0%|          | 2933/2000001 [27:51<313:53:23,  1.77it/s]

buffer size = 6266, epsilon = 0.09853
mean_reward :  0.0


  0%|          | 2934/2000001 [27:51<307:06:53,  1.81it/s]

buffer size = 6268, epsilon = 0.09853
mean_reward :  0.0


  0%|          | 2935/2000001 [27:52<299:19:15,  1.85it/s]

buffer size = 6270, epsilon = 0.09853
mean_reward :  0.0


  0%|          | 2936/2000001 [27:52<296:07:55,  1.87it/s]

buffer size = 6272, epsilon = 0.09853
mean_reward :  0.0


  0%|          | 2937/2000001 [27:53<291:19:26,  1.90it/s]

buffer size = 6274, epsilon = 0.09853
mean_reward :  0.0


  0%|          | 2938/2000001 [27:53<289:36:09,  1.92it/s]

buffer size = 6276, epsilon = 0.09853
mean_reward :  0.0


  0%|          | 2939/2000001 [27:54<285:53:49,  1.94it/s]

buffer size = 6278, epsilon = 0.09853
mean_reward :  0.0


  0%|          | 2940/2000001 [27:54<288:43:59,  1.92it/s]

buffer size = 6280, epsilon = 0.09853
mean_reward :  0.0


  0%|          | 2941/2000001 [27:55<285:44:42,  1.94it/s]

buffer size = 6282, epsilon = 0.09853
mean_reward :  0.0


  0%|          | 2942/2000001 [27:55<286:17:20,  1.94it/s]

buffer size = 6284, epsilon = 0.09853
mean_reward :  0.0


  0%|          | 2943/2000001 [27:56<285:30:28,  1.94it/s]

buffer size = 6286, epsilon = 0.09853
mean_reward :  0.0


  0%|          | 2944/2000001 [27:56<286:28:02,  1.94it/s]

buffer size = 6288, epsilon = 0.09853
mean_reward :  0.0


  0%|          | 2945/2000001 [27:57<285:43:51,  1.94it/s]

buffer size = 6290, epsilon = 0.09853
mean_reward :  0.0


  0%|          | 2946/2000001 [27:57<287:42:14,  1.93it/s]

buffer size = 6292, epsilon = 0.09853
mean_reward :  0.0


  0%|          | 2947/2000001 [27:58<284:09:30,  1.95it/s]

buffer size = 6294, epsilon = 0.09853
mean_reward :  0.0


  0%|          | 2948/2000001 [27:59<285:35:53,  1.94it/s]

buffer size = 6296, epsilon = 0.09853
mean_reward :  0.0


  0%|          | 2949/2000001 [27:59<297:04:49,  1.87it/s]

buffer size = 6298, epsilon = 0.09853
mean_reward :  0.0


  0%|          | 2950/2000001 [28:00<346:52:29,  1.60it/s]

buffer size = 6300, epsilon = 0.09853
mean_reward :  0.0


  0%|          | 2951/2000001 [28:01<377:59:57,  1.47it/s]

buffer size = 6302, epsilon = 0.09853
mean_reward :  0.0


  0%|          | 2952/2000001 [28:01<384:55:24,  1.44it/s]

buffer size = 6304, epsilon = 0.09852
mean_reward :  0.0


  0%|          | 2953/2000001 [28:02<356:16:53,  1.56it/s]

buffer size = 6306, epsilon = 0.09852
mean_reward :  0.0


  0%|          | 2954/2000001 [28:03<335:11:01,  1.66it/s]

buffer size = 6308, epsilon = 0.09852
mean_reward :  0.0


  0%|          | 2955/2000001 [28:03<320:18:16,  1.73it/s]

buffer size = 6310, epsilon = 0.09852
mean_reward :  0.0


  0%|          | 2956/2000001 [28:04<310:33:46,  1.79it/s]

buffer size = 6312, epsilon = 0.09852
mean_reward :  0.0


  0%|          | 2957/2000001 [28:04<304:31:11,  1.82it/s]

buffer size = 6314, epsilon = 0.09852
mean_reward :  0.0


  0%|          | 2958/2000001 [28:05<298:11:59,  1.86it/s]

buffer size = 6316, epsilon = 0.09852
mean_reward :  0.0


  0%|          | 2959/2000001 [28:05<295:53:18,  1.87it/s]

buffer size = 6318, epsilon = 0.09852
mean_reward :  0.0


  0%|          | 2960/2000001 [28:06<290:59:36,  1.91it/s]

buffer size = 6320, epsilon = 0.09852
mean_reward :  0.0


  0%|          | 2961/2000001 [28:06<288:36:40,  1.92it/s]

buffer size = 6322, epsilon = 0.09852
mean_reward :  0.0


  0%|          | 2962/2000001 [28:07<287:43:11,  1.93it/s]

buffer size = 6324, epsilon = 0.09852
mean_reward :  0.0


  0%|          | 2963/2000001 [28:07<286:58:44,  1.93it/s]

buffer size = 6326, epsilon = 0.09852
mean_reward :  0.0


  0%|          | 2964/2000001 [28:08<283:48:21,  1.95it/s]

buffer size = 6328, epsilon = 0.09852
mean_reward :  0.0


  0%|          | 2965/2000001 [28:08<284:31:16,  1.95it/s]

buffer size = 6330, epsilon = 0.09852
mean_reward :  0.0


  0%|          | 2966/2000001 [28:09<283:34:41,  1.96it/s]

buffer size = 6332, epsilon = 0.09852
mean_reward :  0.0


  0%|          | 2967/2000001 [28:09<286:58:52,  1.93it/s]

buffer size = 6334, epsilon = 0.09852
mean_reward :  0.0


  0%|          | 2968/2000001 [28:10<286:36:28,  1.94it/s]

buffer size = 6336, epsilon = 0.09852
mean_reward :  0.0


  0%|          | 2969/2000001 [28:10<286:36:50,  1.94it/s]

buffer size = 6338, epsilon = 0.09852
mean_reward :  0.0


  0%|          | 2970/2000001 [28:11<284:30:10,  1.95it/s]

buffer size = 6340, epsilon = 0.09852
mean_reward :  0.0


  0%|          | 2971/2000001 [28:11<285:18:26,  1.94it/s]

buffer size = 6342, epsilon = 0.09852
mean_reward :  0.0


  0%|          | 2972/2000001 [28:12<327:25:16,  1.69it/s]

buffer size = 6344, epsilon = 0.09851
mean_reward :  0.0


  0%|          | 2973/2000001 [28:13<365:29:52,  1.52it/s]

buffer size = 6346, epsilon = 0.09851
mean_reward :  0.0


  0%|          | 2974/2000001 [28:14<393:13:31,  1.41it/s]

buffer size = 6348, epsilon = 0.09851
mean_reward :  0.0


  0%|          | 2975/2000001 [28:14<374:01:02,  1.48it/s]

buffer size = 6350, epsilon = 0.09851
mean_reward :  0.0


  0%|          | 2976/2000001 [28:15<346:55:40,  1.60it/s]

buffer size = 6352, epsilon = 0.09851
mean_reward :  0.0


  0%|          | 2977/2000001 [28:15<330:57:16,  1.68it/s]

buffer size = 6354, epsilon = 0.09851
mean_reward :  0.0


  0%|          | 2978/2000001 [28:16<316:13:11,  1.75it/s]

buffer size = 6356, epsilon = 0.09851
mean_reward :  0.0


  0%|          | 2979/2000001 [28:16<307:00:12,  1.81it/s]

buffer size = 6358, epsilon = 0.09851
mean_reward :  0.0


  0%|          | 2980/2000001 [28:17<300:06:37,  1.85it/s]

buffer size = 6360, epsilon = 0.09851
mean_reward :  0.0


  0%|          | 2981/2000001 [28:17<297:35:12,  1.86it/s]

buffer size = 6362, epsilon = 0.09851
mean_reward :  0.0


  0%|          | 2982/2000001 [28:18<293:19:04,  1.89it/s]

buffer size = 6364, epsilon = 0.09851
mean_reward :  0.0


  0%|          | 2983/2000001 [28:18<293:15:58,  1.89it/s]

buffer size = 6366, epsilon = 0.09851
mean_reward :  0.0


  0%|          | 2984/2000001 [28:19<289:34:29,  1.92it/s]

buffer size = 6368, epsilon = 0.09851
mean_reward :  0.0


  0%|          | 2985/2000001 [28:19<295:32:36,  1.88it/s]

buffer size = 6370, epsilon = 0.09851
mean_reward :  0.0


  0%|          | 2986/2000001 [28:20<296:12:31,  1.87it/s]

buffer size = 6372, epsilon = 0.09851
mean_reward :  0.0


  0%|          | 2987/2000001 [28:21<295:08:53,  1.88it/s]

buffer size = 6374, epsilon = 0.09851
mean_reward :  0.0


  0%|          | 2988/2000001 [28:21<293:36:39,  1.89it/s]

buffer size = 6376, epsilon = 0.09851
mean_reward :  0.0


  0%|          | 2989/2000001 [28:22<292:14:58,  1.90it/s]

buffer size = 6378, epsilon = 0.09851
mean_reward :  0.0


  0%|          | 2990/2000001 [28:22<289:45:28,  1.91it/s]

buffer size = 6380, epsilon = 0.09851
mean_reward :  0.0


  0%|          | 2991/2000001 [28:23<289:15:18,  1.92it/s]

buffer size = 6382, epsilon = 0.09851
mean_reward :  0.0


  0%|          | 2992/2000001 [28:23<287:58:40,  1.93it/s]

buffer size = 6384, epsilon = 0.09850
mean_reward :  0.0


  0%|          | 2993/2000001 [28:24<287:28:34,  1.93it/s]

buffer size = 6386, epsilon = 0.09850
mean_reward :  0.0


  0%|          | 2994/2000001 [28:24<306:50:28,  1.81it/s]

buffer size = 6388, epsilon = 0.09850
mean_reward :  0.0


  0%|          | 2995/2000001 [28:25<357:05:13,  1.55it/s]

buffer size = 6390, epsilon = 0.09850
mean_reward :  0.0


  0%|          | 2996/2000001 [28:26<380:56:24,  1.46it/s]

buffer size = 6392, epsilon = 0.09850
mean_reward :  0.0


  0%|          | 2997/2000001 [28:27<383:38:36,  1.45it/s]

buffer size = 6394, epsilon = 0.09850
mean_reward :  0.0


  0%|          | 2998/2000001 [28:27<355:37:17,  1.56it/s]

buffer size = 6396, epsilon = 0.09850
mean_reward :  0.0


  0%|          | 2999/2000001 [28:28<331:23:46,  1.67it/s]

buffer size = 6398, epsilon = 0.09850
mean_reward :  0.0


  0%|          | 3000/2000001 [28:28<318:20:10,  1.74it/s]

buffer size = 6400, epsilon = 0.09850
mean_reward :  0.0


  0%|          | 3001/2000001 [28:29<309:22:21,  1.79it/s]

buffer size = 6402, epsilon = 0.09850
mean_reward :  0.0


  0%|          | 3002/2000001 [28:29<302:46:53,  1.83it/s]

buffer size = 6404, epsilon = 0.09850
mean_reward :  0.0


  0%|          | 3003/2000001 [28:30<297:14:49,  1.87it/s]

buffer size = 6406, epsilon = 0.09850
mean_reward :  0.0


  0%|          | 3004/2000001 [28:30<296:04:20,  1.87it/s]

buffer size = 6408, epsilon = 0.09850
mean_reward :  0.0


  0%|          | 3005/2000001 [28:31<294:46:01,  1.88it/s]

buffer size = 6410, epsilon = 0.09850
mean_reward :  0.0


  0%|          | 3006/2000001 [28:31<292:23:24,  1.90it/s]

buffer size = 6412, epsilon = 0.09850
mean_reward :  0.0


  0%|          | 3007/2000001 [28:32<289:11:48,  1.92it/s]

buffer size = 6414, epsilon = 0.09850
mean_reward :  0.0


  0%|          | 3008/2000001 [28:32<291:31:56,  1.90it/s]

buffer size = 6416, epsilon = 0.09850
mean_reward :  0.0


  0%|          | 3009/2000001 [28:33<290:59:57,  1.91it/s]

buffer size = 6418, epsilon = 0.09850
mean_reward :  0.0


  0%|          | 3010/2000001 [28:33<289:40:49,  1.91it/s]

buffer size = 6420, epsilon = 0.09850
mean_reward :  0.0


  0%|          | 3011/2000001 [28:34<286:28:10,  1.94it/s]

buffer size = 6422, epsilon = 0.09849
mean_reward :  0.0


  0%|          | 3012/2000001 [28:34<290:44:15,  1.91it/s]

buffer size = 6424, epsilon = 0.09849
mean_reward :  0.0


  0%|          | 3013/2000001 [28:35<289:18:46,  1.92it/s]

buffer size = 6426, epsilon = 0.09849
mean_reward :  0.0


  0%|          | 3014/2000001 [28:35<288:56:08,  1.92it/s]

buffer size = 6428, epsilon = 0.09849
mean_reward :  0.0


  0%|          | 3015/2000001 [28:36<287:44:38,  1.93it/s]

buffer size = 6430, epsilon = 0.09849
mean_reward :  0.0


  0%|          | 3016/2000001 [28:36<289:53:31,  1.91it/s]

buffer size = 6432, epsilon = 0.09849
mean_reward :  0.0


  0%|          | 3017/2000001 [28:37<338:03:56,  1.64it/s]

buffer size = 6434, epsilon = 0.09849
mean_reward :  0.0


  0%|          | 3018/2000001 [28:38<366:37:59,  1.51it/s]

buffer size = 6436, epsilon = 0.09849
mean_reward :  0.0


  0%|          | 3019/2000001 [28:39<398:26:15,  1.39it/s]

buffer size = 6438, epsilon = 0.09849
mean_reward :  0.0


  0%|          | 3020/2000001 [28:39<373:01:32,  1.49it/s]

buffer size = 6440, epsilon = 0.09849
mean_reward :  0.0


  0%|          | 3021/2000001 [28:40<344:55:14,  1.61it/s]

buffer size = 6442, epsilon = 0.09849
mean_reward :  0.0


  0%|          | 3022/2000001 [28:41<334:08:00,  1.66it/s]

buffer size = 6444, epsilon = 0.09849
mean_reward :  0.0


  0%|          | 3023/2000001 [28:41<322:25:06,  1.72it/s]

buffer size = 6446, epsilon = 0.09849
mean_reward :  0.0


  0%|          | 3024/2000001 [28:42<313:46:04,  1.77it/s]

buffer size = 6448, epsilon = 0.09849
mean_reward :  0.0


  0%|          | 3025/2000001 [28:42<304:10:52,  1.82it/s]

buffer size = 6450, epsilon = 0.09849
mean_reward :  0.0


  0%|          | 3026/2000001 [28:43<301:27:20,  1.84it/s]

buffer size = 6452, epsilon = 0.09849
mean_reward :  0.0


  0%|          | 3027/2000001 [28:43<295:30:30,  1.88it/s]

buffer size = 6454, epsilon = 0.09849
mean_reward :  0.0


  0%|          | 3028/2000001 [28:44<297:35:53,  1.86it/s]

buffer size = 6456, epsilon = 0.09849
mean_reward :  0.0


  0%|          | 3029/2000001 [28:44<295:49:10,  1.88it/s]

buffer size = 6458, epsilon = 0.09849
mean_reward :  0.0


  0%|          | 3030/2000001 [28:45<297:36:01,  1.86it/s]

buffer size = 6460, epsilon = 0.09849
mean_reward :  0.0


  0%|          | 3031/2000001 [28:45<294:26:22,  1.88it/s]

buffer size = 6462, epsilon = 0.09849
mean_reward :  0.0


  0%|          | 3032/2000001 [28:46<294:37:37,  1.88it/s]

buffer size = 6464, epsilon = 0.09848
mean_reward :  0.0


  0%|          | 3033/2000001 [28:46<292:35:04,  1.90it/s]

buffer size = 6466, epsilon = 0.09848
mean_reward :  0.0


  0%|          | 3034/2000001 [28:47<291:36:17,  1.90it/s]

buffer size = 6468, epsilon = 0.09848
mean_reward :  0.0


  0%|          | 3035/2000001 [28:47<289:49:48,  1.91it/s]

buffer size = 6470, epsilon = 0.09848
mean_reward :  0.0


  0%|          | 3036/2000001 [28:48<288:49:24,  1.92it/s]

buffer size = 6472, epsilon = 0.09848
mean_reward :  0.0


  0%|          | 3037/2000001 [28:48<293:46:07,  1.89it/s]

buffer size = 6474, epsilon = 0.09848
mean_reward :  0.0


  0%|          | 3038/2000001 [28:49<292:04:25,  1.90it/s]

buffer size = 6476, epsilon = 0.09848
mean_reward :  0.0


  0%|          | 3039/2000001 [28:50<340:17:33,  1.63it/s]

buffer size = 6478, epsilon = 0.09848
mean_reward :  0.0


  0%|          | 3040/2000001 [28:51<367:50:25,  1.51it/s]

buffer size = 6480, epsilon = 0.09848
mean_reward :  0.0


  0%|          | 3041/2000001 [28:51<391:51:34,  1.42it/s]

buffer size = 6482, epsilon = 0.09848
mean_reward :  0.0


  0%|          | 3042/2000001 [28:52<377:05:04,  1.47it/s]

buffer size = 6484, epsilon = 0.09848
mean_reward :  0.0


  0%|          | 3043/2000001 [28:52<347:03:57,  1.60it/s]

buffer size = 6486, epsilon = 0.09848
mean_reward :  0.0


  0%|          | 3044/2000001 [28:53<331:07:39,  1.68it/s]

buffer size = 6488, epsilon = 0.09848
mean_reward :  0.0


  0%|          | 3045/2000001 [28:54<316:36:56,  1.75it/s]

buffer size = 6490, epsilon = 0.09848
mean_reward :  0.0


  0%|          | 3046/2000001 [28:54<309:55:58,  1.79it/s]

buffer size = 6492, epsilon = 0.09848
mean_reward :  0.0


  0%|          | 3047/2000001 [28:55<309:03:51,  1.79it/s]

buffer size = 6494, epsilon = 0.09848
mean_reward :  0.0


  0%|          | 3048/2000001 [28:55<303:45:14,  1.83it/s]

buffer size = 6496, epsilon = 0.09848
mean_reward :  0.0


  0%|          | 3049/2000001 [28:56<299:33:39,  1.85it/s]

buffer size = 6498, epsilon = 0.09848
mean_reward :  0.0


  0%|          | 3050/2000001 [28:56<294:51:43,  1.88it/s]

buffer size = 6500, epsilon = 0.09848
mean_reward :  0.0


  0%|          | 3051/2000001 [28:57<293:03:41,  1.89it/s]

buffer size = 6502, epsilon = 0.09848
mean_reward :  0.0


  0%|          | 3052/2000001 [28:57<289:58:32,  1.91it/s]

buffer size = 6504, epsilon = 0.09847
mean_reward :  0.0


  0%|          | 3053/2000001 [28:58<289:52:08,  1.91it/s]

buffer size = 6506, epsilon = 0.09847
mean_reward :  0.0


  0%|          | 3054/2000001 [28:58<290:23:50,  1.91it/s]

buffer size = 6508, epsilon = 0.09847
mean_reward :  0.0


  0%|          | 3055/2000001 [28:59<290:39:10,  1.91it/s]

buffer size = 6510, epsilon = 0.09847
mean_reward :  0.0


  0%|          | 3056/2000001 [28:59<288:56:34,  1.92it/s]

buffer size = 6512, epsilon = 0.09847
mean_reward :  0.0


  0%|          | 3057/2000001 [29:00<287:52:14,  1.93it/s]

buffer size = 6514, epsilon = 0.09847
mean_reward :  0.0


  0%|          | 3058/2000001 [29:00<287:42:25,  1.93it/s]

buffer size = 6516, epsilon = 0.09847
mean_reward :  0.0


  0%|          | 3059/2000001 [29:01<287:46:30,  1.93it/s]

buffer size = 6518, epsilon = 0.09847
mean_reward :  0.0


  0%|          | 3060/2000001 [29:01<286:17:39,  1.94it/s]

buffer size = 6520, epsilon = 0.09847
mean_reward :  0.0


  0%|          | 3061/2000001 [29:02<299:54:20,  1.85it/s]

buffer size = 6522, epsilon = 0.09847
mean_reward :  0.0


  0%|          | 3062/2000001 [29:03<353:28:52,  1.57it/s]

buffer size = 6524, epsilon = 0.09847
mean_reward :  0.0


  0%|          | 3063/2000001 [29:04<392:36:16,  1.41it/s]

buffer size = 6526, epsilon = 0.09847
mean_reward :  0.0


  0%|          | 3064/2000001 [29:04<388:35:43,  1.43it/s]

buffer size = 6528, epsilon = 0.09847
mean_reward :  0.0


  0%|          | 3065/2000001 [29:05<360:08:46,  1.54it/s]

buffer size = 6530, epsilon = 0.09847
mean_reward :  0.0


  0%|          | 3066/2000001 [29:05<337:42:56,  1.64it/s]

buffer size = 6532, epsilon = 0.09847
mean_reward :  0.0


  0%|          | 3067/2000001 [29:06<324:51:48,  1.71it/s]

buffer size = 6534, epsilon = 0.09847
mean_reward :  0.0


  0%|          | 3068/2000001 [29:06<315:38:18,  1.76it/s]

buffer size = 6536, epsilon = 0.09847
mean_reward :  0.0


  0%|          | 3069/2000001 [29:07<306:17:44,  1.81it/s]

buffer size = 6538, epsilon = 0.09847
mean_reward :  0.0


  0%|          | 3070/2000001 [29:08<303:45:23,  1.83it/s]

buffer size = 6540, epsilon = 0.09847
mean_reward :  0.0


  0%|          | 3071/2000001 [29:08<297:52:21,  1.86it/s]

buffer size = 6542, epsilon = 0.09847
mean_reward :  0.0


  0%|          | 3072/2000001 [29:09<297:19:10,  1.87it/s]

buffer size = 6544, epsilon = 0.09846
mean_reward :  0.0


  0%|          | 3073/2000001 [29:09<295:28:31,  1.88it/s]

buffer size = 6546, epsilon = 0.09846
mean_reward :  0.0


  0%|          | 3074/2000001 [29:10<294:15:22,  1.89it/s]

buffer size = 6548, epsilon = 0.09846
mean_reward :  0.0


  0%|          | 3075/2000001 [29:10<293:02:47,  1.89it/s]

buffer size = 6550, epsilon = 0.09846
mean_reward :  0.0


  0%|          | 3076/2000001 [29:11<292:55:14,  1.89it/s]

buffer size = 6552, epsilon = 0.09846
mean_reward :  0.0


  0%|          | 3077/2000001 [29:11<290:07:35,  1.91it/s]

buffer size = 6554, epsilon = 0.09846
mean_reward :  0.0


  0%|          | 3078/2000001 [29:12<293:18:26,  1.89it/s]

buffer size = 6556, epsilon = 0.09846
mean_reward :  0.0


  0%|          | 3079/2000001 [29:12<291:49:42,  1.90it/s]

buffer size = 6558, epsilon = 0.09846
mean_reward :  0.0


  0%|          | 3080/2000001 [29:13<293:36:12,  1.89it/s]

buffer size = 6560, epsilon = 0.09846
mean_reward :  0.0


  0%|          | 3081/2000001 [29:13<289:46:10,  1.91it/s]

buffer size = 6562, epsilon = 0.09846
mean_reward :  0.0


  0%|          | 3082/2000001 [29:14<292:10:58,  1.90it/s]

buffer size = 6564, epsilon = 0.09846
mean_reward :  0.0


  0%|          | 3083/2000001 [29:14<305:59:11,  1.81it/s]

buffer size = 6566, epsilon = 0.09846
mean_reward :  0.0


  0%|          | 3084/2000001 [29:15<348:47:56,  1.59it/s]

buffer size = 6568, epsilon = 0.09846
mean_reward :  0.0


  0%|          | 3085/2000001 [29:16<375:24:21,  1.48it/s]

buffer size = 6570, epsilon = 0.09846
mean_reward :  0.0


  0%|          | 3086/2000001 [29:17<393:36:22,  1.41it/s]

buffer size = 6572, epsilon = 0.09846
mean_reward :  0.0


  0%|          | 3087/2000001 [29:17<363:43:54,  1.53it/s]

buffer size = 6574, epsilon = 0.09846
mean_reward :  0.0


  0%|          | 3088/2000001 [29:18<339:45:08,  1.63it/s]

buffer size = 6576, epsilon = 0.09846
mean_reward :  0.0


  0%|          | 3089/2000001 [29:18<323:55:28,  1.71it/s]

buffer size = 6578, epsilon = 0.09846
mean_reward :  0.0


  0%|          | 3090/2000001 [29:19<320:47:42,  1.73it/s]

buffer size = 6580, epsilon = 0.09846
mean_reward :  0.0


  0%|          | 3091/2000001 [29:19<315:15:04,  1.76it/s]

buffer size = 6582, epsilon = 0.09846
mean_reward :  0.0


  0%|          | 3092/2000001 [29:20<309:14:07,  1.79it/s]

buffer size = 6584, epsilon = 0.09845
mean_reward :  0.0


  0%|          | 3093/2000001 [29:21<303:11:36,  1.83it/s]

buffer size = 6586, epsilon = 0.09845
mean_reward :  0.0


  0%|          | 3094/2000001 [29:21<302:16:54,  1.84it/s]

buffer size = 6588, epsilon = 0.09845
mean_reward :  0.0


  0%|          | 3095/2000001 [29:22<299:45:26,  1.85it/s]

buffer size = 6590, epsilon = 0.09845
mean_reward :  0.0


  0%|          | 3096/2000001 [29:22<299:46:27,  1.85it/s]

buffer size = 6592, epsilon = 0.09845
mean_reward :  0.0


  0%|          | 3097/2000001 [29:23<296:24:05,  1.87it/s]

buffer size = 6594, epsilon = 0.09845
mean_reward :  0.0


  0%|          | 3098/2000001 [29:23<292:44:39,  1.89it/s]

buffer size = 6596, epsilon = 0.09845
mean_reward :  0.0


  0%|          | 3099/2000001 [29:24<288:33:54,  1.92it/s]

buffer size = 6598, epsilon = 0.09845
mean_reward :  0.0


  0%|          | 3100/2000001 [29:24<290:28:42,  1.91it/s]

buffer size = 6600, epsilon = 0.09845
mean_reward :  0.0


  0%|          | 3101/2000001 [29:25<290:06:22,  1.91it/s]

buffer size = 6602, epsilon = 0.09845
mean_reward :  0.0


  0%|          | 3102/2000001 [29:25<291:49:59,  1.90it/s]

buffer size = 6604, epsilon = 0.09845
mean_reward :  0.0


  0%|          | 3103/2000001 [29:26<289:50:29,  1.91it/s]

buffer size = 6606, epsilon = 0.09845
mean_reward :  0.0


  0%|          | 3104/2000001 [29:26<289:19:58,  1.92it/s]

buffer size = 6608, epsilon = 0.09845
mean_reward :  0.0


  0%|          | 3105/2000001 [29:27<327:28:02,  1.69it/s]

buffer size = 6610, epsilon = 0.09845
mean_reward :  0.0


  0%|          | 3106/2000001 [29:28<392:16:00,  1.41it/s]

buffer size = 6612, epsilon = 0.09845
mean_reward :  0.0


  0%|          | 3107/2000001 [29:29<409:26:05,  1.35it/s]

buffer size = 6614, epsilon = 0.09845
mean_reward :  0.0


  0%|          | 3108/2000001 [29:30<418:35:18,  1.33it/s]

buffer size = 6616, epsilon = 0.09845
mean_reward :  0.0


  0%|          | 3109/2000001 [29:30<382:40:39,  1.45it/s]

buffer size = 6618, epsilon = 0.09845
mean_reward :  0.0


  0%|          | 3110/2000001 [29:31<356:19:34,  1.56it/s]

buffer size = 6620, epsilon = 0.09845
mean_reward :  0.0


  0%|          | 3111/2000001 [29:31<335:51:27,  1.65it/s]

buffer size = 6622, epsilon = 0.09845
mean_reward :  0.0


  0%|          | 3112/2000001 [29:32<322:45:26,  1.72it/s]

buffer size = 6624, epsilon = 0.09844
mean_reward :  0.0


  0%|          | 3113/2000001 [29:32<312:21:29,  1.78it/s]

buffer size = 6626, epsilon = 0.09844
mean_reward :  0.0


  0%|          | 3114/2000001 [29:33<306:22:24,  1.81it/s]

buffer size = 6628, epsilon = 0.09844
mean_reward :  0.0


  0%|          | 3115/2000001 [29:33<301:00:20,  1.84it/s]

buffer size = 6630, epsilon = 0.09844
mean_reward :  0.0


  0%|          | 3116/2000001 [29:34<298:53:56,  1.86it/s]

buffer size = 6632, epsilon = 0.09844
mean_reward :  0.0


  0%|          | 3117/2000001 [29:34<299:00:06,  1.86it/s]

buffer size = 6634, epsilon = 0.09844
mean_reward :  0.0


  0%|          | 3118/2000001 [29:35<296:09:07,  1.87it/s]

buffer size = 6636, epsilon = 0.09844
mean_reward :  0.0


  0%|          | 3119/2000001 [29:35<292:09:50,  1.90it/s]

buffer size = 6638, epsilon = 0.09844
mean_reward :  0.0


  0%|          | 3120/2000001 [29:36<291:54:51,  1.90it/s]

buffer size = 6640, epsilon = 0.09844
mean_reward :  0.0


  0%|          | 3121/2000001 [29:36<292:57:01,  1.89it/s]

buffer size = 6642, epsilon = 0.09844
mean_reward :  0.0


  0%|          | 3122/2000001 [29:37<297:49:55,  1.86it/s]

buffer size = 6644, epsilon = 0.09844
mean_reward :  0.0


  0%|          | 3123/2000001 [29:38<295:15:10,  1.88it/s]

buffer size = 6646, epsilon = 0.09844
mean_reward :  0.0


  0%|          | 3124/2000001 [29:38<296:48:41,  1.87it/s]

buffer size = 6648, epsilon = 0.09844
mean_reward :  0.0


  0%|          | 3125/2000001 [29:39<294:24:32,  1.88it/s]

buffer size = 6650, epsilon = 0.09844
mean_reward :  0.0


  0%|          | 3126/2000001 [29:39<296:28:40,  1.87it/s]

buffer size = 6652, epsilon = 0.09844
mean_reward :  0.0


  0%|          | 3127/2000001 [29:40<300:08:46,  1.85it/s]

buffer size = 6654, epsilon = 0.09844
mean_reward :  0.0


  0%|          | 3128/2000001 [29:41<352:02:06,  1.58it/s]

buffer size = 6656, epsilon = 0.09844
mean_reward :  0.0


  0%|          | 3129/2000001 [29:41<379:00:07,  1.46it/s]

buffer size = 6658, epsilon = 0.09844
mean_reward :  0.0


  0%|          | 3130/2000001 [29:42<401:24:23,  1.38it/s]

buffer size = 6660, epsilon = 0.09844
mean_reward :  0.0


  0%|          | 3131/2000001 [29:43<369:20:36,  1.50it/s]

buffer size = 6662, epsilon = 0.09844
mean_reward :  0.0


  0%|          | 3132/2000001 [29:43<346:27:40,  1.60it/s]

buffer size = 6664, epsilon = 0.09843
mean_reward :  0.0


  0%|          | 3133/2000001 [29:44<333:33:11,  1.66it/s]

buffer size = 6666, epsilon = 0.09843
mean_reward :  0.0


  0%|          | 3134/2000001 [29:44<322:27:10,  1.72it/s]

buffer size = 6668, epsilon = 0.09843
mean_reward :  0.0


  0%|          | 3135/2000001 [29:45<312:13:45,  1.78it/s]

buffer size = 6670, epsilon = 0.09843
mean_reward :  0.0


  0%|          | 3136/2000001 [29:45<310:22:17,  1.79it/s]

buffer size = 6672, epsilon = 0.09843
mean_reward :  0.0


  0%|          | 3137/2000001 [29:46<302:55:22,  1.83it/s]

buffer size = 6674, epsilon = 0.09843
mean_reward :  0.0


  0%|          | 3138/2000001 [29:46<299:25:41,  1.85it/s]

buffer size = 6676, epsilon = 0.09843
mean_reward :  0.0


  0%|          | 3139/2000001 [29:47<295:59:48,  1.87it/s]

buffer size = 6678, epsilon = 0.09843
mean_reward :  0.0


  0%|          | 3140/2000001 [29:47<295:03:05,  1.88it/s]

buffer size = 6680, epsilon = 0.09843
mean_reward :  0.0


  0%|          | 3141/2000001 [29:48<292:37:10,  1.90it/s]

buffer size = 6682, epsilon = 0.09843
mean_reward :  0.0


  0%|          | 3142/2000001 [29:49<293:02:50,  1.89it/s]

buffer size = 6684, epsilon = 0.09843
mean_reward :  0.0


  0%|          | 3143/2000001 [29:49<292:07:35,  1.90it/s]

buffer size = 6686, epsilon = 0.09843
mean_reward :  0.0


  0%|          | 3144/2000001 [29:50<298:42:44,  1.86it/s]

buffer size = 6688, epsilon = 0.09843
mean_reward :  0.0


  0%|          | 3145/2000001 [29:50<293:53:52,  1.89it/s]

buffer size = 6690, epsilon = 0.09843
mean_reward :  0.0


  0%|          | 3146/2000001 [29:51<293:14:48,  1.89it/s]

buffer size = 6692, epsilon = 0.09843
mean_reward :  0.0


  0%|          | 3147/2000001 [29:51<293:04:38,  1.89it/s]

buffer size = 6694, epsilon = 0.09843
mean_reward :  0.0


  0%|          | 3148/2000001 [29:52<295:16:04,  1.88it/s]

buffer size = 6696, epsilon = 0.09843
mean_reward :  0.0


  0%|          | 3149/2000001 [29:52<300:20:26,  1.85it/s]

buffer size = 6698, epsilon = 0.09843
mean_reward :  0.0


  0%|          | 3150/2000001 [29:53<350:46:02,  1.58it/s]

buffer size = 6700, epsilon = 0.09843
mean_reward :  0.0


  0%|          | 3151/2000001 [29:54<394:14:01,  1.41it/s]

buffer size = 6702, epsilon = 0.09843
mean_reward :  0.0


  0%|          | 3152/2000001 [29:55<403:30:30,  1.37it/s]

buffer size = 6704, epsilon = 0.09842
mean_reward :  0.0


  0%|          | 3153/2000001 [29:55<369:06:55,  1.50it/s]

buffer size = 6706, epsilon = 0.09842
mean_reward :  0.0


  0%|          | 3154/2000001 [29:56<344:40:12,  1.61it/s]

buffer size = 6708, epsilon = 0.09842
mean_reward :  0.0


  0%|          | 3155/2000001 [29:56<326:05:49,  1.70it/s]

buffer size = 6710, epsilon = 0.09842
mean_reward :  0.0


  0%|          | 3156/2000001 [29:57<319:26:49,  1.74it/s]

buffer size = 6712, epsilon = 0.09842
mean_reward :  0.0


  0%|          | 3157/2000001 [29:57<311:23:20,  1.78it/s]

buffer size = 6714, epsilon = 0.09842
mean_reward :  0.0


  0%|          | 3158/2000001 [29:58<306:21:38,  1.81it/s]

buffer size = 6716, epsilon = 0.09842
mean_reward :  0.0


  0%|          | 3159/2000001 [29:58<299:20:57,  1.85it/s]

buffer size = 6718, epsilon = 0.09842
mean_reward :  0.0


  0%|          | 3160/2000001 [29:59<300:14:46,  1.85it/s]

buffer size = 6720, epsilon = 0.09842
mean_reward :  0.0


  0%|          | 3161/2000001 [30:00<299:12:13,  1.85it/s]

buffer size = 6722, epsilon = 0.09842
mean_reward :  0.0


  0%|          | 3162/2000001 [30:00<304:38:14,  1.82it/s]

buffer size = 6724, epsilon = 0.09842
mean_reward :  0.0


  0%|          | 3163/2000001 [30:01<300:10:50,  1.85it/s]

buffer size = 6726, epsilon = 0.09842
mean_reward :  0.0


  0%|          | 3164/2000001 [30:01<295:17:05,  1.88it/s]

buffer size = 6728, epsilon = 0.09842
mean_reward :  0.0


  0%|          | 3165/2000001 [30:02<293:25:13,  1.89it/s]

buffer size = 6730, epsilon = 0.09842
mean_reward :  0.0


  0%|          | 3166/2000001 [30:02<296:10:03,  1.87it/s]

buffer size = 6732, epsilon = 0.09842
mean_reward :  0.0


  0%|          | 3167/2000001 [30:03<293:41:28,  1.89it/s]

buffer size = 6734, epsilon = 0.09842
mean_reward :  0.0


  0%|          | 3168/2000001 [30:03<293:32:41,  1.89it/s]

buffer size = 6736, epsilon = 0.09842
mean_reward :  0.0


  0%|          | 3169/2000001 [30:04<293:42:00,  1.89it/s]

buffer size = 6738, epsilon = 0.09842
mean_reward :  0.0


  0%|          | 3170/2000001 [30:04<296:13:33,  1.87it/s]

buffer size = 6740, epsilon = 0.09842
mean_reward :  0.0


  0%|          | 3171/2000001 [30:05<325:35:05,  1.70it/s]

buffer size = 6742, epsilon = 0.09842
mean_reward :  0.0


  0%|          | 3172/2000001 [30:06<375:06:06,  1.48it/s]

buffer size = 6744, epsilon = 0.09841
mean_reward :  0.0


  0%|          | 3173/2000001 [30:07<412:26:36,  1.34it/s]

buffer size = 6746, epsilon = 0.09841
mean_reward :  0.0


  0%|          | 3174/2000001 [30:07<396:52:54,  1.40it/s]

buffer size = 6748, epsilon = 0.09841
mean_reward :  0.0


  0%|          | 3175/2000001 [30:08<369:55:42,  1.50it/s]

buffer size = 6750, epsilon = 0.09841
mean_reward :  0.0


  0%|          | 3176/2000001 [30:09<345:13:47,  1.61it/s]

buffer size = 6752, epsilon = 0.09841
mean_reward :  0.0


  0%|          | 3177/2000001 [30:09<329:46:31,  1.68it/s]

buffer size = 6754, epsilon = 0.09841
mean_reward :  0.0


  0%|          | 3178/2000001 [30:10<319:47:37,  1.73it/s]

buffer size = 6756, epsilon = 0.09841
mean_reward :  0.0


  0%|          | 3179/2000001 [30:10<310:11:18,  1.79it/s]

buffer size = 6758, epsilon = 0.09841
mean_reward :  0.0


  0%|          | 3180/2000001 [30:11<304:09:25,  1.82it/s]

buffer size = 6760, epsilon = 0.09841
mean_reward :  0.0


  0%|          | 3181/2000001 [30:11<298:50:28,  1.86it/s]

buffer size = 6762, epsilon = 0.09841
mean_reward :  0.0


  0%|          | 3182/2000001 [30:12<296:59:08,  1.87it/s]

buffer size = 6764, epsilon = 0.09841
mean_reward :  0.0


  0%|          | 3183/2000001 [30:12<293:51:44,  1.89it/s]

buffer size = 6766, epsilon = 0.09841
mean_reward :  0.0


  0%|          | 3184/2000001 [30:13<297:56:57,  1.86it/s]

buffer size = 6768, epsilon = 0.09841
mean_reward :  0.0


  0%|          | 3185/2000001 [30:13<294:24:29,  1.88it/s]

buffer size = 6770, epsilon = 0.09841
mean_reward :  0.0


  0%|          | 3186/2000001 [30:14<295:36:25,  1.88it/s]

buffer size = 6772, epsilon = 0.09841
mean_reward :  0.0


  0%|          | 3187/2000001 [30:14<292:06:09,  1.90it/s]

buffer size = 6774, epsilon = 0.09841
mean_reward :  0.0


  0%|          | 3188/2000001 [30:15<296:10:39,  1.87it/s]

buffer size = 6776, epsilon = 0.09841
mean_reward :  0.0


  0%|          | 3189/2000001 [30:15<298:33:47,  1.86it/s]

buffer size = 6778, epsilon = 0.09841
mean_reward :  0.0


  0%|          | 3190/2000001 [30:16<296:13:47,  1.87it/s]

buffer size = 6780, epsilon = 0.09841
mean_reward :  0.0


  0%|          | 3191/2000001 [30:16<292:31:28,  1.90it/s]

buffer size = 6782, epsilon = 0.09841
mean_reward :  0.0


  0%|          | 3192/2000001 [30:17<296:52:10,  1.87it/s]

buffer size = 6784, epsilon = 0.09840
mean_reward :  0.0


  0%|          | 3193/2000001 [30:18<330:51:48,  1.68it/s]

buffer size = 6786, epsilon = 0.09840
mean_reward :  0.0


  0%|          | 3194/2000001 [30:19<364:59:43,  1.52it/s]

buffer size = 6788, epsilon = 0.09840
mean_reward :  0.0


  0%|          | 3195/2000001 [30:19<391:25:39,  1.42it/s]

buffer size = 6790, epsilon = 0.09840
mean_reward :  0.0


  0%|          | 3196/2000001 [30:20<392:24:51,  1.41it/s]

buffer size = 6792, epsilon = 0.09840
mean_reward :  0.0


  0%|          | 3197/2000001 [30:21<360:00:44,  1.54it/s]

buffer size = 6794, epsilon = 0.09840
mean_reward :  0.0


  0%|          | 3198/2000001 [30:21<341:01:07,  1.63it/s]

buffer size = 6796, epsilon = 0.09840
mean_reward :  0.0


  0%|          | 3199/2000001 [30:22<326:36:50,  1.70it/s]

buffer size = 6798, epsilon = 0.09840
mean_reward :  0.0


  0%|          | 3200/2000001 [30:22<317:52:13,  1.74it/s]

buffer size = 6800, epsilon = 0.09840
mean_reward :  0.0


  0%|          | 3201/2000001 [30:23<312:04:17,  1.78it/s]

buffer size = 6802, epsilon = 0.09840
mean_reward :  0.0


  0%|          | 3202/2000001 [30:23<308:34:23,  1.80it/s]

buffer size = 6804, epsilon = 0.09840
mean_reward :  0.0


  0%|          | 3203/2000001 [30:24<304:20:52,  1.82it/s]

buffer size = 6806, epsilon = 0.09840
mean_reward :  0.0


  0%|          | 3204/2000001 [30:24<302:14:10,  1.84it/s]

buffer size = 6808, epsilon = 0.09840
mean_reward :  0.0


  0%|          | 3205/2000001 [30:25<298:12:29,  1.86it/s]

buffer size = 6810, epsilon = 0.09840
mean_reward :  0.0


  0%|          | 3206/2000001 [30:25<300:25:01,  1.85it/s]

buffer size = 6812, epsilon = 0.09840
mean_reward :  0.0


  0%|          | 3207/2000001 [30:26<296:32:25,  1.87it/s]

buffer size = 6814, epsilon = 0.09840
mean_reward :  0.0


  0%|          | 3208/2000001 [30:26<297:26:22,  1.86it/s]

buffer size = 6816, epsilon = 0.09840
mean_reward :  0.0


  0%|          | 3209/2000001 [30:27<294:22:10,  1.88it/s]

buffer size = 6818, epsilon = 0.09840
mean_reward :  0.0


  0%|          | 3210/2000001 [30:28<297:15:06,  1.87it/s]

buffer size = 6820, epsilon = 0.09840
mean_reward :  0.0


  0%|          | 3211/2000001 [30:28<295:48:57,  1.88it/s]

buffer size = 6822, epsilon = 0.09840
mean_reward :  0.0


  0%|          | 3212/2000001 [30:29<296:57:56,  1.87it/s]

buffer size = 6824, epsilon = 0.09839
mean_reward :  0.0


  0%|          | 3213/2000001 [30:29<293:23:37,  1.89it/s]

buffer size = 6826, epsilon = 0.09839
mean_reward :  0.0


  0%|          | 3214/2000001 [30:30<293:23:10,  1.89it/s]

buffer size = 6828, epsilon = 0.09839
mean_reward :  0.0


  0%|          | 3215/2000001 [30:30<323:29:40,  1.71it/s]

buffer size = 6830, epsilon = 0.09839
mean_reward :  0.0


  0%|          | 3216/2000001 [30:31<364:59:04,  1.52it/s]

buffer size = 6832, epsilon = 0.09839
mean_reward :  0.0


  0%|          | 3217/2000001 [30:32<392:17:10,  1.41it/s]

buffer size = 6834, epsilon = 0.09839
mean_reward :  0.0


  0%|          | 3218/2000001 [30:33<386:46:25,  1.43it/s]

buffer size = 6836, epsilon = 0.09839
mean_reward :  0.0


  0%|          | 3219/2000001 [30:33<357:26:53,  1.55it/s]

buffer size = 6838, epsilon = 0.09839
mean_reward :  0.0


  0%|          | 3220/2000001 [30:34<338:32:16,  1.64it/s]

buffer size = 6840, epsilon = 0.09839
mean_reward :  0.0


  0%|          | 3221/2000001 [30:34<325:39:47,  1.70it/s]

buffer size = 6842, epsilon = 0.09839
mean_reward :  0.0


  0%|          | 3222/2000001 [30:35<317:21:12,  1.75it/s]

buffer size = 6844, epsilon = 0.09839
mean_reward :  0.0


  0%|          | 3223/2000001 [30:35<307:21:39,  1.80it/s]

buffer size = 6846, epsilon = 0.09839
mean_reward :  0.0


  0%|          | 3224/2000001 [30:36<302:32:33,  1.83it/s]

buffer size = 6848, epsilon = 0.09839
mean_reward :  0.0


  0%|          | 3225/2000001 [30:36<304:09:45,  1.82it/s]

buffer size = 6850, epsilon = 0.09839
mean_reward :  0.0


  0%|          | 3226/2000001 [30:37<304:04:38,  1.82it/s]

buffer size = 6852, epsilon = 0.09839
mean_reward :  0.0


  0%|          | 3227/2000001 [30:37<298:59:50,  1.86it/s]

buffer size = 6854, epsilon = 0.09839
mean_reward :  0.0


  0%|          | 3228/2000001 [30:38<299:58:51,  1.85it/s]

buffer size = 6856, epsilon = 0.09839
mean_reward :  0.0


  0%|          | 3229/2000001 [30:39<297:19:53,  1.87it/s]

buffer size = 6858, epsilon = 0.09839
mean_reward :  0.0


  0%|          | 3230/2000001 [30:39<297:45:19,  1.86it/s]

buffer size = 6860, epsilon = 0.09839
mean_reward :  0.0


  0%|          | 3231/2000001 [30:40<294:45:29,  1.88it/s]

buffer size = 6862, epsilon = 0.09839
mean_reward :  0.0


  0%|          | 3232/2000001 [30:40<294:52:53,  1.88it/s]

buffer size = 6864, epsilon = 0.09838
mean_reward :  0.0


  0%|          | 3233/2000001 [30:41<293:40:59,  1.89it/s]

buffer size = 6866, epsilon = 0.09838
mean_reward :  0.0


  0%|          | 3234/2000001 [30:41<292:15:40,  1.90it/s]

buffer size = 6868, epsilon = 0.09838
mean_reward :  0.0


  0%|          | 3235/2000001 [30:42<290:54:04,  1.91it/s]

buffer size = 6870, epsilon = 0.09838
mean_reward :  0.0


  0%|          | 3236/2000001 [30:42<292:14:33,  1.90it/s]

buffer size = 6872, epsilon = 0.09838
mean_reward :  0.0


  0%|          | 3237/2000001 [30:43<332:43:25,  1.67it/s]

buffer size = 6874, epsilon = 0.09838
mean_reward :  0.0


  0%|          | 3238/2000001 [30:44<373:30:29,  1.48it/s]

buffer size = 6876, epsilon = 0.09838
mean_reward :  0.0


  0%|          | 3239/2000001 [30:45<409:27:25,  1.35it/s]

buffer size = 6878, epsilon = 0.09838
mean_reward :  0.0


  0%|          | 3240/2000001 [30:45<379:33:43,  1.46it/s]

buffer size = 6880, epsilon = 0.09838
mean_reward :  0.0


  0%|          | 3241/2000001 [30:46<351:09:25,  1.58it/s]

buffer size = 6882, epsilon = 0.09838
mean_reward :  0.0


  0%|          | 3242/2000001 [30:46<333:47:05,  1.66it/s]

buffer size = 6884, epsilon = 0.09838
mean_reward :  0.0


  0%|          | 3243/2000001 [30:47<320:06:06,  1.73it/s]

buffer size = 6886, epsilon = 0.09838
mean_reward :  0.0


  0%|          | 3244/2000001 [30:47<314:33:01,  1.76it/s]

buffer size = 6888, epsilon = 0.09838
mean_reward :  0.0


  0%|          | 3245/2000001 [30:48<307:08:58,  1.81it/s]

buffer size = 6890, epsilon = 0.09838
mean_reward :  0.0


  0%|          | 3246/2000001 [30:48<303:09:23,  1.83it/s]

buffer size = 6892, epsilon = 0.09838
mean_reward :  0.0


  0%|          | 3247/2000001 [30:49<301:33:44,  1.84it/s]

buffer size = 6894, epsilon = 0.09838
mean_reward :  0.0


  0%|          | 3248/2000001 [30:50<302:50:55,  1.83it/s]

buffer size = 6896, epsilon = 0.09838
mean_reward :  0.0


  0%|          | 3249/2000001 [30:50<299:09:02,  1.85it/s]

buffer size = 6898, epsilon = 0.09838
mean_reward :  0.0


  0%|          | 3250/2000001 [30:51<294:37:29,  1.88it/s]

buffer size = 6900, epsilon = 0.09838
mean_reward :  0.0


  0%|          | 3251/2000001 [30:51<294:12:14,  1.89it/s]

buffer size = 6902, epsilon = 0.09838
mean_reward :  0.0


  0%|          | 3252/2000001 [30:52<296:10:39,  1.87it/s]

buffer size = 6904, epsilon = 0.09837
mean_reward :  0.0


  0%|          | 3253/2000001 [30:52<295:48:28,  1.88it/s]

buffer size = 6906, epsilon = 0.09837
mean_reward :  0.0


  0%|          | 3254/2000001 [30:53<295:35:55,  1.88it/s]

buffer size = 6908, epsilon = 0.09837
mean_reward :  0.0


  0%|          | 3255/2000001 [30:53<292:08:00,  1.90it/s]

buffer size = 6910, epsilon = 0.09837
mean_reward :  0.0


  0%|          | 3256/2000001 [30:54<294:32:32,  1.88it/s]

buffer size = 6912, epsilon = 0.09837
mean_reward :  0.0


  0%|          | 3257/2000001 [30:54<298:40:13,  1.86it/s]

buffer size = 6914, epsilon = 0.09837
mean_reward :  0.0


  0%|          | 3258/2000001 [30:55<305:42:34,  1.81it/s]

buffer size = 6916, epsilon = 0.09837
mean_reward :  0.0


  0%|          | 3259/2000001 [30:56<345:11:49,  1.61it/s]

buffer size = 6918, epsilon = 0.09837
mean_reward :  0.0


  0%|          | 3260/2000001 [30:56<369:31:17,  1.50it/s]

buffer size = 6920, epsilon = 0.09837
mean_reward :  0.0


  0%|          | 3261/2000001 [30:57<397:46:16,  1.39it/s]

buffer size = 6922, epsilon = 0.09837
mean_reward :  0.0


  0%|          | 3262/2000001 [30:58<386:41:07,  1.43it/s]

buffer size = 6924, epsilon = 0.09837
mean_reward :  0.0


  0%|          | 3263/2000001 [30:58<354:32:09,  1.56it/s]

buffer size = 6926, epsilon = 0.09837
mean_reward :  0.0


  0%|          | 3264/2000001 [30:59<340:07:38,  1.63it/s]

buffer size = 6928, epsilon = 0.09837
mean_reward :  0.0


  0%|          | 3265/2000001 [31:00<324:37:27,  1.71it/s]

buffer size = 6930, epsilon = 0.09837
mean_reward :  0.0


  0%|          | 3266/2000001 [31:00<318:04:37,  1.74it/s]

buffer size = 6932, epsilon = 0.09837
mean_reward :  0.0


  0%|          | 3267/2000001 [31:01<309:20:51,  1.79it/s]

buffer size = 6934, epsilon = 0.09837
mean_reward :  0.0


  0%|          | 3268/2000001 [31:01<305:12:24,  1.82it/s]

buffer size = 6936, epsilon = 0.09837
mean_reward :  0.0


  0%|          | 3269/2000001 [31:02<299:03:03,  1.85it/s]

buffer size = 6938, epsilon = 0.09837
mean_reward :  0.0


  0%|          | 3270/2000001 [31:02<300:42:06,  1.84it/s]

buffer size = 6940, epsilon = 0.09837
mean_reward :  0.0


  0%|          | 3271/2000001 [31:03<294:44:48,  1.88it/s]

buffer size = 6942, epsilon = 0.09837
mean_reward :  0.0


  0%|          | 3272/2000001 [31:03<295:15:33,  1.88it/s]

buffer size = 6944, epsilon = 0.09836
mean_reward :  0.0


  0%|          | 3273/2000001 [31:04<293:17:10,  1.89it/s]

buffer size = 6946, epsilon = 0.09836
mean_reward :  0.0


  0%|          | 3274/2000001 [31:04<298:17:05,  1.86it/s]

buffer size = 6948, epsilon = 0.09836
mean_reward :  0.0


  0%|          | 3275/2000001 [31:05<298:32:30,  1.86it/s]

buffer size = 6950, epsilon = 0.09836
mean_reward :  0.0


  0%|          | 3276/2000001 [31:05<298:50:05,  1.86it/s]

buffer size = 6952, epsilon = 0.09836
mean_reward :  0.0


  0%|          | 3277/2000001 [31:06<295:46:59,  1.88it/s]

buffer size = 6954, epsilon = 0.09836
mean_reward :  0.0


  0%|          | 3278/2000001 [31:06<299:40:53,  1.85it/s]

buffer size = 6956, epsilon = 0.09836
mean_reward :  0.0


  0%|          | 3279/2000001 [31:07<297:35:23,  1.86it/s]

buffer size = 6958, epsilon = 0.09836
mean_reward :  0.0


  0%|          | 3280/2000001 [31:08<298:22:44,  1.86it/s]

buffer size = 6960, epsilon = 0.09836
mean_reward :  0.0


  0%|          | 3281/2000001 [31:08<335:53:49,  1.65it/s]

buffer size = 6962, epsilon = 0.09836
mean_reward :  0.0


  0%|          | 3282/2000001 [31:09<380:49:01,  1.46it/s]

buffer size = 6964, epsilon = 0.09836
mean_reward :  0.0


  0%|          | 3283/2000001 [31:10<406:58:35,  1.36it/s]

buffer size = 6966, epsilon = 0.09836
mean_reward :  0.0


  0%|          | 3284/2000001 [31:11<384:27:47,  1.44it/s]

buffer size = 6968, epsilon = 0.09836
mean_reward :  0.0


  0%|          | 3285/2000001 [31:11<356:06:03,  1.56it/s]

buffer size = 6970, epsilon = 0.09836
mean_reward :  0.0


  0%|          | 3286/2000001 [31:12<337:05:01,  1.65it/s]

buffer size = 6972, epsilon = 0.09836
mean_reward :  0.0


  0%|          | 3287/2000001 [31:12<323:43:04,  1.71it/s]

buffer size = 6974, epsilon = 0.09836
mean_reward :  0.0


  0%|          | 3288/2000001 [31:13<316:17:42,  1.75it/s]

buffer size = 6976, epsilon = 0.09836
mean_reward :  0.0


  0%|          | 3289/2000001 [31:13<307:48:31,  1.80it/s]

buffer size = 6978, epsilon = 0.09836
mean_reward :  0.0


  0%|          | 3290/2000001 [31:14<304:19:48,  1.82it/s]

buffer size = 6980, epsilon = 0.09836
mean_reward :  0.0


  0%|          | 3291/2000001 [31:14<301:44:41,  1.84it/s]

buffer size = 6982, epsilon = 0.09836
mean_reward :  0.0


  0%|          | 3292/2000001 [31:15<299:38:31,  1.85it/s]

buffer size = 6984, epsilon = 0.09835
mean_reward :  0.0


  0%|          | 3293/2000001 [31:15<295:14:26,  1.88it/s]

buffer size = 6986, epsilon = 0.09835
mean_reward :  0.0


  0%|          | 3294/2000001 [31:16<297:47:58,  1.86it/s]

buffer size = 6988, epsilon = 0.09835
mean_reward :  0.0


  0%|          | 3295/2000001 [31:16<292:14:19,  1.90it/s]

buffer size = 6990, epsilon = 0.09835
mean_reward :  0.0


  0%|          | 3296/2000001 [31:17<292:50:53,  1.89it/s]

buffer size = 6992, epsilon = 0.09835
mean_reward :  0.0


  0%|          | 3297/2000001 [31:17<291:25:37,  1.90it/s]

buffer size = 6994, epsilon = 0.09835
mean_reward :  0.0


  0%|          | 3298/2000001 [31:18<293:36:19,  1.89it/s]

buffer size = 6996, epsilon = 0.09835
mean_reward :  0.0


  0%|          | 3299/2000001 [31:19<291:36:17,  1.90it/s]

buffer size = 6998, epsilon = 0.09835
mean_reward :  0.0


  0%|          | 3300/2000001 [31:19<293:47:58,  1.89it/s]

buffer size = 7000, epsilon = 0.09835
mean_reward :  0.0


  0%|          | 3301/2000001 [31:20<294:42:49,  1.88it/s]

buffer size = 7002, epsilon = 0.09835
mean_reward :  0.0


  0%|          | 3302/2000001 [31:20<298:49:40,  1.86it/s]

buffer size = 7004, epsilon = 0.09835
mean_reward :  0.0


  0%|          | 3303/2000001 [31:21<344:48:43,  1.61it/s]

buffer size = 7006, epsilon = 0.09835
mean_reward :  0.0


  0%|          | 3304/2000001 [31:22<374:32:32,  1.48it/s]

buffer size = 7008, epsilon = 0.09835
mean_reward :  0.0


  0%|          | 3305/2000001 [31:23<401:25:34,  1.38it/s]

buffer size = 7010, epsilon = 0.09835
mean_reward :  0.0


  0%|          | 3306/2000001 [31:23<382:44:25,  1.45it/s]

buffer size = 7012, epsilon = 0.09835
mean_reward :  0.0


  0%|          | 3307/2000001 [31:24<354:26:05,  1.56it/s]

buffer size = 7014, epsilon = 0.09835
mean_reward :  0.0


  0%|          | 3308/2000001 [31:24<338:09:47,  1.64it/s]

buffer size = 7016, epsilon = 0.09835
mean_reward :  0.0


  0%|          | 3309/2000001 [31:25<321:57:03,  1.72it/s]

buffer size = 7018, epsilon = 0.09835
mean_reward :  0.0


  0%|          | 3310/2000001 [31:25<312:50:13,  1.77it/s]

buffer size = 7020, epsilon = 0.09835
mean_reward :  0.0


  0%|          | 3311/2000001 [31:26<307:17:54,  1.80it/s]

buffer size = 7022, epsilon = 0.09835
mean_reward :  0.0


  0%|          | 3312/2000001 [31:26<302:41:09,  1.83it/s]

buffer size = 7024, epsilon = 0.09834
mean_reward :  0.0


  0%|          | 3313/2000001 [31:27<296:56:48,  1.87it/s]

buffer size = 7026, epsilon = 0.09834
mean_reward :  0.0


  0%|          | 3314/2000001 [31:27<298:38:59,  1.86it/s]

buffer size = 7028, epsilon = 0.09834
mean_reward :  0.0


  0%|          | 3315/2000001 [31:28<295:17:26,  1.88it/s]

buffer size = 7030, epsilon = 0.09834
mean_reward :  0.0


  0%|          | 3316/2000001 [31:28<296:33:42,  1.87it/s]

buffer size = 7032, epsilon = 0.09834
mean_reward :  0.0


  0%|          | 3317/2000001 [31:29<295:05:01,  1.88it/s]

buffer size = 7034, epsilon = 0.09834
mean_reward :  0.0


  0%|          | 3318/2000001 [31:30<293:07:45,  1.89it/s]

buffer size = 7036, epsilon = 0.09834
mean_reward :  0.0


  0%|          | 3319/2000001 [31:30<293:33:52,  1.89it/s]

buffer size = 7038, epsilon = 0.09834
mean_reward :  0.0


  0%|          | 3320/2000001 [31:31<293:32:09,  1.89it/s]

buffer size = 7040, epsilon = 0.09834
mean_reward :  0.0


  0%|          | 3321/2000001 [31:31<292:28:02,  1.90it/s]

buffer size = 7042, epsilon = 0.09834
mean_reward :  0.0


  0%|          | 3322/2000001 [31:32<293:47:19,  1.89it/s]

buffer size = 7044, epsilon = 0.09834
mean_reward :  0.0


  0%|          | 3323/2000001 [31:32<292:41:21,  1.89it/s]

buffer size = 7046, epsilon = 0.09834
mean_reward :  0.0


  0%|          | 3324/2000001 [31:33<295:41:31,  1.88it/s]

buffer size = 7048, epsilon = 0.09834
mean_reward :  0.0


  0%|          | 3325/2000001 [31:33<333:10:20,  1.66it/s]

buffer size = 7050, epsilon = 0.09834
mean_reward :  0.0


  0%|          | 3326/2000001 [31:34<372:59:35,  1.49it/s]

buffer size = 7052, epsilon = 0.09834
mean_reward :  0.0


  0%|          | 3327/2000001 [31:35<399:36:44,  1.39it/s]

buffer size = 7054, epsilon = 0.09834
mean_reward :  0.0


  0%|          | 3328/2000001 [31:36<376:32:53,  1.47it/s]

buffer size = 7056, epsilon = 0.09834
mean_reward :  0.0


  0%|          | 3329/2000001 [31:36<352:58:36,  1.57it/s]

buffer size = 7058, epsilon = 0.09834
mean_reward :  0.0


  0%|          | 3330/2000001 [31:37<336:38:07,  1.65it/s]

buffer size = 7060, epsilon = 0.09834
mean_reward :  0.0


  0%|          | 3331/2000001 [31:37<323:41:25,  1.71it/s]

buffer size = 7062, epsilon = 0.09834
mean_reward :  0.0


  0%|          | 3332/2000001 [31:38<315:57:02,  1.76it/s]

buffer size = 7064, epsilon = 0.09833
mean_reward :  0.0


  0%|          | 3333/2000001 [31:38<308:43:19,  1.80it/s]

buffer size = 7066, epsilon = 0.09833
mean_reward :  0.0


  0%|          | 3334/2000001 [31:39<307:55:37,  1.80it/s]

buffer size = 7068, epsilon = 0.09833
mean_reward :  0.0


  0%|          | 3335/2000001 [31:39<301:39:01,  1.84it/s]

buffer size = 7070, epsilon = 0.09833
mean_reward :  0.0


  0%|          | 3336/2000001 [31:40<300:09:08,  1.85it/s]

buffer size = 7072, epsilon = 0.09833
mean_reward :  0.0


  0%|          | 3337/2000001 [31:41<295:14:06,  1.88it/s]

buffer size = 7074, epsilon = 0.09833
mean_reward :  0.0


  0%|          | 3338/2000001 [31:41<294:29:04,  1.88it/s]

buffer size = 7076, epsilon = 0.09833
mean_reward :  0.0


  0%|          | 3339/2000001 [31:42<292:56:32,  1.89it/s]

buffer size = 7078, epsilon = 0.09833
mean_reward :  0.0


  0%|          | 3340/2000001 [31:42<295:02:10,  1.88it/s]

buffer size = 7080, epsilon = 0.09833
mean_reward :  0.0


  0%|          | 3341/2000001 [31:43<296:09:20,  1.87it/s]

buffer size = 7082, epsilon = 0.09833
mean_reward :  0.0


  0%|          | 3342/2000001 [31:43<296:35:47,  1.87it/s]

buffer size = 7084, epsilon = 0.09833
mean_reward :  0.0


  0%|          | 3343/2000001 [31:44<295:40:37,  1.88it/s]

buffer size = 7086, epsilon = 0.09833
mean_reward :  0.0


  0%|          | 3344/2000001 [31:44<293:04:11,  1.89it/s]

buffer size = 7088, epsilon = 0.09833
mean_reward :  0.0


  0%|          | 3345/2000001 [31:45<293:13:47,  1.89it/s]

buffer size = 7090, epsilon = 0.09833
mean_reward :  0.0


  0%|          | 3346/2000001 [31:45<294:41:42,  1.88it/s]

buffer size = 7092, epsilon = 0.09833
mean_reward :  0.0


  0%|          | 3347/2000001 [31:46<338:26:02,  1.64it/s]

buffer size = 7094, epsilon = 0.09833
mean_reward :  0.0


  0%|          | 3348/2000001 [31:47<386:05:33,  1.44it/s]

buffer size = 7096, epsilon = 0.09833
mean_reward :  0.0


  0%|          | 3349/2000001 [31:48<403:07:43,  1.38it/s]

buffer size = 7098, epsilon = 0.09833
mean_reward :  0.0


  0%|          | 3350/2000001 [31:48<377:24:10,  1.47it/s]

buffer size = 7100, epsilon = 0.09833
mean_reward :  0.0


  0%|          | 3351/2000001 [31:49<352:16:13,  1.57it/s]

buffer size = 7102, epsilon = 0.09833
mean_reward :  0.0


  0%|          | 3352/2000001 [31:49<342:58:04,  1.62it/s]

buffer size = 7104, epsilon = 0.09832
mean_reward :  0.0


  0%|          | 3353/2000001 [31:50<326:32:46,  1.70it/s]

buffer size = 7106, epsilon = 0.09832
mean_reward :  0.0


  0%|          | 3354/2000001 [31:51<316:14:25,  1.75it/s]

buffer size = 7108, epsilon = 0.09832
mean_reward :  0.0


  0%|          | 3355/2000001 [31:51<309:57:06,  1.79it/s]

buffer size = 7110, epsilon = 0.09832
mean_reward :  0.0


  0%|          | 3356/2000001 [31:52<307:29:15,  1.80it/s]

buffer size = 7112, epsilon = 0.09832
mean_reward :  0.0


  0%|          | 3357/2000001 [31:52<303:44:29,  1.83it/s]

buffer size = 7114, epsilon = 0.09832
mean_reward :  0.0


  0%|          | 3358/2000001 [31:53<304:49:18,  1.82it/s]

buffer size = 7116, epsilon = 0.09832
mean_reward :  0.0


  0%|          | 3359/2000001 [31:53<301:04:04,  1.84it/s]

buffer size = 7118, epsilon = 0.09832
mean_reward :  0.0


  0%|          | 3360/2000001 [31:54<299:13:47,  1.85it/s]

buffer size = 7120, epsilon = 0.09832
mean_reward :  0.0


  0%|          | 3361/2000001 [31:54<300:11:53,  1.85it/s]

buffer size = 7122, epsilon = 0.09832
mean_reward :  0.0


  0%|          | 3362/2000001 [31:55<301:13:01,  1.84it/s]

buffer size = 7124, epsilon = 0.09832
mean_reward :  0.0


  0%|          | 3363/2000001 [31:55<297:01:48,  1.87it/s]

buffer size = 7126, epsilon = 0.09832
mean_reward :  0.0


  0%|          | 3364/2000001 [31:56<297:17:42,  1.87it/s]

buffer size = 7128, epsilon = 0.09832
mean_reward :  0.0


  0%|          | 3365/2000001 [31:56<294:53:53,  1.88it/s]

buffer size = 7130, epsilon = 0.09832
mean_reward :  0.0


  0%|          | 3366/2000001 [31:57<297:45:44,  1.86it/s]

buffer size = 7132, epsilon = 0.09832
mean_reward :  0.0


  0%|          | 3367/2000001 [31:57<298:17:19,  1.86it/s]

buffer size = 7134, epsilon = 0.09832
mean_reward :  0.0


  0%|          | 3368/2000001 [31:58<310:55:46,  1.78it/s]

buffer size = 7136, epsilon = 0.09832
mean_reward :  0.0


  0%|          | 3369/2000001 [31:59<362:05:35,  1.53it/s]

buffer size = 7138, epsilon = 0.09832
mean_reward :  0.0


  0%|          | 3370/2000001 [32:00<404:12:35,  1.37it/s]

buffer size = 7140, epsilon = 0.09832
mean_reward :  0.0


  0%|          | 3371/2000001 [32:01<398:39:23,  1.39it/s]

buffer size = 7142, epsilon = 0.09831
mean_reward :  0.0


  0%|          | 3372/2000001 [32:01<369:50:17,  1.50it/s]

buffer size = 7144, epsilon = 0.09831
mean_reward :  0.0


  0%|          | 3373/2000001 [32:02<347:45:03,  1.59it/s]

buffer size = 7146, epsilon = 0.09831
mean_reward :  0.0


  0%|          | 3374/2000001 [32:02<336:01:55,  1.65it/s]

buffer size = 7148, epsilon = 0.09831
mean_reward :  0.0


  0%|          | 3375/2000001 [32:03<324:52:33,  1.71it/s]

buffer size = 7150, epsilon = 0.09831
mean_reward :  0.0


  0%|          | 3376/2000001 [32:03<317:56:20,  1.74it/s]

buffer size = 7152, epsilon = 0.09831
mean_reward :  0.0


  0%|          | 3377/2000001 [32:04<308:49:21,  1.80it/s]

buffer size = 7154, epsilon = 0.09831
mean_reward :  0.0


  0%|          | 3378/2000001 [32:04<307:18:43,  1.80it/s]

buffer size = 7156, epsilon = 0.09831
mean_reward :  0.0


  0%|          | 3379/2000001 [32:05<308:25:55,  1.80it/s]

buffer size = 7158, epsilon = 0.09831
mean_reward :  0.0


  0%|          | 3380/2000001 [32:05<305:13:35,  1.82it/s]

buffer size = 7160, epsilon = 0.09831
mean_reward :  0.0


  0%|          | 3381/2000001 [32:06<301:37:14,  1.84it/s]

buffer size = 7162, epsilon = 0.09831
mean_reward :  0.0


  0%|          | 3382/2000001 [32:07<302:56:46,  1.83it/s]

buffer size = 7164, epsilon = 0.09831
mean_reward :  0.0


  0%|          | 3383/2000001 [32:07<304:51:29,  1.82it/s]

buffer size = 7166, epsilon = 0.09831
mean_reward :  0.0


  0%|          | 3384/2000001 [32:08<298:31:09,  1.86it/s]

buffer size = 7168, epsilon = 0.09831
mean_reward :  0.0


  0%|          | 3385/2000001 [32:08<296:46:03,  1.87it/s]

buffer size = 7170, epsilon = 0.09831
mean_reward :  0.0


  0%|          | 3386/2000001 [32:09<292:52:00,  1.89it/s]

buffer size = 7172, epsilon = 0.09831
mean_reward :  0.0


  0%|          | 3387/2000001 [32:09<298:00:40,  1.86it/s]

buffer size = 7174, epsilon = 0.09831
mean_reward :  0.0


  0%|          | 3388/2000001 [32:10<297:48:09,  1.86it/s]

buffer size = 7176, epsilon = 0.09831
mean_reward :  0.0


  0%|          | 3389/2000001 [32:10<298:24:03,  1.86it/s]

buffer size = 7178, epsilon = 0.09831
mean_reward :  0.0


  0%|          | 3390/2000001 [32:11<343:05:27,  1.62it/s]

buffer size = 7180, epsilon = 0.09831
mean_reward :  0.0


  0%|          | 3391/2000001 [32:12<387:10:21,  1.43it/s]

buffer size = 7182, epsilon = 0.09831
mean_reward :  0.0


  0%|          | 3392/2000001 [32:13<410:47:05,  1.35it/s]

buffer size = 7184, epsilon = 0.09830
mean_reward :  0.0


  0%|          | 3393/2000001 [32:13<379:19:28,  1.46it/s]

buffer size = 7186, epsilon = 0.09830
mean_reward :  0.0


  0%|          | 3394/2000001 [32:14<353:40:33,  1.57it/s]

buffer size = 7188, epsilon = 0.09830
mean_reward :  0.0


  0%|          | 3395/2000001 [32:14<338:21:43,  1.64it/s]

buffer size = 7190, epsilon = 0.09830
mean_reward :  0.0


  0%|          | 3396/2000001 [32:15<328:06:01,  1.69it/s]

buffer size = 7192, epsilon = 0.09830
mean_reward :  0.0


  0%|          | 3397/2000001 [32:16<322:47:42,  1.72it/s]

buffer size = 7194, epsilon = 0.09830
mean_reward :  0.0


  0%|          | 3398/2000001 [32:16<314:27:18,  1.76it/s]

buffer size = 7196, epsilon = 0.09830
mean_reward :  0.0


  0%|          | 3399/2000001 [32:17<308:52:42,  1.80it/s]

buffer size = 7198, epsilon = 0.09830
mean_reward :  0.0


  0%|          | 3400/2000001 [32:17<306:24:32,  1.81it/s]

buffer size = 7200, epsilon = 0.09830
mean_reward :  0.0


  0%|          | 3401/2000001 [32:18<304:02:17,  1.82it/s]

buffer size = 7202, epsilon = 0.09830
mean_reward :  0.0


  0%|          | 3402/2000001 [32:18<303:45:56,  1.83it/s]

buffer size = 7204, epsilon = 0.09830
mean_reward :  0.0


  0%|          | 3403/2000001 [32:19<302:15:05,  1.83it/s]

buffer size = 7206, epsilon = 0.09830
mean_reward :  0.0


  0%|          | 3404/2000001 [32:19<308:07:42,  1.80it/s]

buffer size = 7208, epsilon = 0.09830
mean_reward :  0.0


  0%|          | 3405/2000001 [32:20<303:18:07,  1.83it/s]

buffer size = 7210, epsilon = 0.09830
mean_reward :  0.0


  0%|          | 3406/2000001 [32:20<299:54:17,  1.85it/s]

buffer size = 7212, epsilon = 0.09830
mean_reward :  0.0


  0%|          | 3407/2000001 [32:21<301:42:53,  1.84it/s]

buffer size = 7214, epsilon = 0.09830
mean_reward :  0.0


  0%|          | 3408/2000001 [32:22<303:23:27,  1.83it/s]

buffer size = 7216, epsilon = 0.09830
mean_reward :  0.0


  0%|          | 3409/2000001 [32:22<303:36:37,  1.83it/s]

buffer size = 7218, epsilon = 0.09830
mean_reward :  0.0


  0%|          | 3410/2000001 [32:23<299:32:35,  1.85it/s]

buffer size = 7220, epsilon = 0.09830
mean_reward :  0.0


  0%|          | 3411/2000001 [32:23<346:24:30,  1.60it/s]

buffer size = 7222, epsilon = 0.09830
mean_reward :  0.0


  0%|          | 3412/2000001 [32:24<391:41:40,  1.42it/s]

buffer size = 7224, epsilon = 0.09829
mean_reward :  0.0


  0%|          | 3413/2000001 [32:25<412:52:52,  1.34it/s]

buffer size = 7226, epsilon = 0.09829
mean_reward :  0.0


  0%|          | 3414/2000001 [32:26<386:29:52,  1.43it/s]

buffer size = 7228, epsilon = 0.09829
mean_reward :  0.0


  0%|          | 3415/2000001 [32:26<359:26:37,  1.54it/s]

buffer size = 7230, epsilon = 0.09829
mean_reward :  0.0


  0%|          | 3416/2000001 [32:27<339:41:20,  1.63it/s]

buffer size = 7232, epsilon = 0.09829
mean_reward :  0.0


  0%|          | 3417/2000001 [32:27<329:21:24,  1.68it/s]

buffer size = 7234, epsilon = 0.09829
mean_reward :  0.0


  0%|          | 3418/2000001 [32:28<318:29:28,  1.74it/s]

buffer size = 7236, epsilon = 0.09829
mean_reward :  0.0


  0%|          | 3419/2000001 [32:28<313:55:29,  1.77it/s]

buffer size = 7238, epsilon = 0.09829
mean_reward :  0.0


  0%|          | 3420/2000001 [32:29<310:19:52,  1.79it/s]

buffer size = 7240, epsilon = 0.09829
mean_reward :  0.0


  0%|          | 3421/2000001 [32:29<306:42:34,  1.81it/s]

buffer size = 7242, epsilon = 0.09829
mean_reward :  0.0


  0%|          | 3422/2000001 [32:30<307:00:02,  1.81it/s]

buffer size = 7244, epsilon = 0.09829
mean_reward :  0.0


  0%|          | 3423/2000001 [32:31<303:32:41,  1.83it/s]

buffer size = 7246, epsilon = 0.09829
mean_reward :  0.0


  0%|          | 3424/2000001 [32:31<301:45:45,  1.84it/s]

buffer size = 7248, epsilon = 0.09829
mean_reward :  0.0


  0%|          | 3425/2000001 [32:32<302:36:17,  1.83it/s]

buffer size = 7250, epsilon = 0.09829
mean_reward :  0.0


  0%|          | 3426/2000001 [32:32<304:46:21,  1.82it/s]

buffer size = 7252, epsilon = 0.09829
mean_reward :  0.0


  0%|          | 3427/2000001 [32:33<300:50:38,  1.84it/s]

buffer size = 7254, epsilon = 0.09829
mean_reward :  0.0


  0%|          | 3428/2000001 [32:33<303:04:27,  1.83it/s]

buffer size = 7256, epsilon = 0.09829
mean_reward :  0.0


  0%|          | 3429/2000001 [32:34<300:48:18,  1.84it/s]

buffer size = 7258, epsilon = 0.09829
mean_reward :  0.0


  0%|          | 3430/2000001 [32:34<302:50:38,  1.83it/s]

buffer size = 7260, epsilon = 0.09829
mean_reward :  0.0


  0%|          | 3431/2000001 [32:35<300:39:58,  1.84it/s]

buffer size = 7262, epsilon = 0.09829
mean_reward :  0.0


  0%|          | 3432/2000001 [32:36<309:54:14,  1.79it/s]

buffer size = 7264, epsilon = 0.09828
mean_reward :  0.0


  0%|          | 3433/2000001 [32:36<368:33:26,  1.50it/s]

buffer size = 7266, epsilon = 0.09828
mean_reward :  0.0


  0%|          | 3434/2000001 [32:37<386:50:11,  1.43it/s]

buffer size = 7268, epsilon = 0.09828
mean_reward :  0.0


  0%|          | 3435/2000001 [32:38<397:56:35,  1.39it/s]

buffer size = 7270, epsilon = 0.09828
mean_reward :  0.0


  0%|          | 3436/2000001 [32:39<367:33:13,  1.51it/s]

buffer size = 7272, epsilon = 0.09828
mean_reward :  0.0


  0%|          | 3437/2000001 [32:39<347:58:13,  1.59it/s]

buffer size = 7274, epsilon = 0.09828
mean_reward :  0.0


  0%|          | 3438/2000001 [32:40<335:15:44,  1.65it/s]

buffer size = 7276, epsilon = 0.09828
mean_reward :  0.0


  0%|          | 3439/2000001 [32:40<324:19:15,  1.71it/s]

buffer size = 7278, epsilon = 0.09828
mean_reward :  0.0


  0%|          | 3440/2000001 [32:41<319:30:02,  1.74it/s]

buffer size = 7280, epsilon = 0.09828
mean_reward :  0.0


  0%|          | 3441/2000001 [32:41<312:30:55,  1.77it/s]

buffer size = 7282, epsilon = 0.09828
mean_reward :  0.0


  0%|          | 3442/2000001 [32:42<312:00:28,  1.78it/s]

buffer size = 7284, epsilon = 0.09828
mean_reward :  0.0


  0%|          | 3443/2000001 [32:42<308:39:10,  1.80it/s]

buffer size = 7286, epsilon = 0.09828
mean_reward :  0.0


  0%|          | 3444/2000001 [32:43<306:28:35,  1.81it/s]

buffer size = 7288, epsilon = 0.09828
mean_reward :  0.0


  0%|          | 3445/2000001 [32:43<303:34:40,  1.83it/s]

buffer size = 7290, epsilon = 0.09828
mean_reward :  0.0


  0%|          | 3446/2000001 [32:44<304:20:40,  1.82it/s]

buffer size = 7292, epsilon = 0.09828
mean_reward :  0.0


  0%|          | 3447/2000001 [32:44<297:40:55,  1.86it/s]

buffer size = 7294, epsilon = 0.09828
mean_reward :  0.0


  0%|          | 3448/2000001 [32:45<298:42:41,  1.86it/s]

buffer size = 7296, epsilon = 0.09828
mean_reward :  0.0


  0%|          | 3449/2000001 [32:46<297:50:34,  1.86it/s]

buffer size = 7298, epsilon = 0.09828
mean_reward :  0.0


  0%|          | 3450/2000001 [32:46<297:52:26,  1.86it/s]

buffer size = 7300, epsilon = 0.09828
mean_reward :  0.0


  0%|          | 3451/2000001 [32:47<295:23:32,  1.88it/s]

buffer size = 7302, epsilon = 0.09828
mean_reward :  0.0


  0%|          | 3452/2000001 [32:47<297:16:29,  1.87it/s]

buffer size = 7304, epsilon = 0.09827
mean_reward :  0.0


  0%|          | 3453/2000001 [32:48<292:32:16,  1.90it/s]

buffer size = 7306, epsilon = 0.09827
mean_reward :  0.0


  0%|          | 3454/2000001 [32:48<333:18:21,  1.66it/s]

buffer size = 7308, epsilon = 0.09827
mean_reward :  0.0


  0%|          | 3455/2000001 [32:49<376:56:57,  1.47it/s]

buffer size = 7310, epsilon = 0.09827
mean_reward :  0.0


  0%|          | 3456/2000001 [32:50<397:23:14,  1.40it/s]

buffer size = 7312, epsilon = 0.09827
mean_reward :  0.0


  0%|          | 3457/2000001 [32:51<393:25:53,  1.41it/s]

buffer size = 7314, epsilon = 0.09827
mean_reward :  0.0


  0%|          | 3458/2000001 [32:51<367:59:20,  1.51it/s]

buffer size = 7316, epsilon = 0.09827
mean_reward :  0.0


  0%|          | 3459/2000001 [32:52<346:08:20,  1.60it/s]

buffer size = 7318, epsilon = 0.09827
mean_reward :  0.0


  0%|          | 3460/2000001 [32:52<332:45:51,  1.67it/s]

buffer size = 7320, epsilon = 0.09827
mean_reward :  0.0


  0%|          | 3461/2000001 [32:53<322:24:27,  1.72it/s]

buffer size = 7322, epsilon = 0.09827
mean_reward :  0.0


  0%|          | 3462/2000001 [32:54<316:38:51,  1.75it/s]

buffer size = 7324, epsilon = 0.09827
mean_reward :  0.0


  0%|          | 3463/2000001 [32:54<310:00:36,  1.79it/s]

buffer size = 7326, epsilon = 0.09827
mean_reward :  0.0


  0%|          | 3464/2000001 [32:55<310:54:34,  1.78it/s]

buffer size = 7328, epsilon = 0.09827
mean_reward :  0.0


  0%|          | 3465/2000001 [32:55<304:54:10,  1.82it/s]

buffer size = 7330, epsilon = 0.09827
mean_reward :  0.0


  0%|          | 3466/2000001 [32:56<304:24:44,  1.82it/s]

buffer size = 7332, epsilon = 0.09827
mean_reward :  0.0


  0%|          | 3467/2000001 [32:56<301:14:06,  1.84it/s]

buffer size = 7334, epsilon = 0.09827
mean_reward :  0.0


  0%|          | 3468/2000001 [32:57<299:34:46,  1.85it/s]

buffer size = 7336, epsilon = 0.09827
mean_reward :  0.0


  0%|          | 3469/2000001 [32:57<299:31:59,  1.85it/s]

buffer size = 7338, epsilon = 0.09827
mean_reward :  0.0


  0%|          | 3470/2000001 [32:58<302:12:56,  1.84it/s]

buffer size = 7340, epsilon = 0.09827
mean_reward :  0.0


  0%|          | 3471/2000001 [32:58<299:36:25,  1.85it/s]

buffer size = 7342, epsilon = 0.09827
mean_reward :  0.0


  0%|          | 3472/2000001 [32:59<301:50:04,  1.84it/s]

buffer size = 7344, epsilon = 0.09826
mean_reward :  0.0


  0%|          | 3473/2000001 [32:59<302:48:09,  1.83it/s]

buffer size = 7346, epsilon = 0.09826
mean_reward :  0.0


  0%|          | 3474/2000001 [33:00<297:10:59,  1.87it/s]

buffer size = 7348, epsilon = 0.09826
mean_reward :  0.0


  0%|          | 3475/2000001 [33:01<296:55:41,  1.87it/s]

buffer size = 7350, epsilon = 0.09826
mean_reward :  0.0


  0%|          | 3476/2000001 [33:01<336:23:52,  1.65it/s]

buffer size = 7352, epsilon = 0.09826
mean_reward :  0.0


  0%|          | 3477/2000001 [33:02<383:50:22,  1.44it/s]

buffer size = 7354, epsilon = 0.09826
mean_reward :  0.0


  0%|          | 3478/2000001 [33:03<413:31:25,  1.34it/s]

buffer size = 7356, epsilon = 0.09826
mean_reward :  0.0


  0%|          | 3479/2000001 [33:04<384:35:32,  1.44it/s]

buffer size = 7358, epsilon = 0.09826
mean_reward :  0.0


  0%|          | 3480/2000001 [33:04<358:52:07,  1.55it/s]

buffer size = 7360, epsilon = 0.09826
mean_reward :  0.0


  0%|          | 3481/2000001 [33:05<340:44:13,  1.63it/s]

buffer size = 7362, epsilon = 0.09826
mean_reward :  0.0


  0%|          | 3482/2000001 [33:05<329:55:23,  1.68it/s]

buffer size = 7364, epsilon = 0.09826
mean_reward :  0.0


  0%|          | 3483/2000001 [33:06<315:37:52,  1.76it/s]

buffer size = 7366, epsilon = 0.09826
mean_reward :  0.0


  0%|          | 3484/2000001 [33:06<309:47:44,  1.79it/s]

buffer size = 7368, epsilon = 0.09826
mean_reward :  0.0


  0%|          | 3485/2000001 [33:07<303:15:13,  1.83it/s]

buffer size = 7370, epsilon = 0.09826
mean_reward :  0.0


  0%|          | 3486/2000001 [33:07<299:31:24,  1.85it/s]

buffer size = 7372, epsilon = 0.09826
mean_reward :  0.0


  0%|          | 3487/2000001 [33:08<293:27:25,  1.89it/s]

buffer size = 7374, epsilon = 0.09826
mean_reward :  0.0


  0%|          | 3488/2000001 [33:08<294:12:29,  1.89it/s]

buffer size = 7376, epsilon = 0.09826
mean_reward :  0.0


  0%|          | 3489/2000001 [33:09<291:28:56,  1.90it/s]

buffer size = 7378, epsilon = 0.09826
mean_reward :  0.0


  0%|          | 3490/2000001 [33:09<292:46:45,  1.89it/s]

buffer size = 7380, epsilon = 0.09826
mean_reward :  0.0


  0%|          | 3491/2000001 [33:10<290:32:01,  1.91it/s]

buffer size = 7382, epsilon = 0.09826
mean_reward :  0.0


  0%|          | 3492/2000001 [33:10<288:38:22,  1.92it/s]

buffer size = 7384, epsilon = 0.09825
mean_reward :  0.0


  0%|          | 3493/2000001 [33:11<286:50:50,  1.93it/s]

buffer size = 7386, epsilon = 0.09825
mean_reward :  0.0


  0%|          | 3494/2000001 [33:11<287:54:35,  1.93it/s]

buffer size = 7388, epsilon = 0.09825
mean_reward :  0.0


  0%|          | 3495/2000001 [33:12<288:23:02,  1.92it/s]

buffer size = 7390, epsilon = 0.09825
mean_reward :  0.0


  0%|          | 3496/2000001 [33:13<291:14:14,  1.90it/s]

buffer size = 7392, epsilon = 0.09825
mean_reward :  0.0


  0%|          | 3497/2000001 [33:13<288:26:49,  1.92it/s]

buffer size = 7394, epsilon = 0.09825
mean_reward :  0.0


  0%|          | 3498/2000001 [33:14<329:34:53,  1.68it/s]

buffer size = 7396, epsilon = 0.09825
mean_reward :  0.0


  0%|          | 3499/2000001 [33:15<360:59:52,  1.54it/s]

buffer size = 7398, epsilon = 0.09825
mean_reward :  0.0


  0%|          | 3500/2000001 [33:15<388:04:50,  1.43it/s]

buffer size = 7400, epsilon = 0.09825
mean_reward :  0.0


  0%|          | 3501/2000001 [33:16<380:33:05,  1.46it/s]

buffer size = 7402, epsilon = 0.09825
mean_reward :  0.0


  0%|          | 3502/2000001 [33:17<356:58:39,  1.55it/s]

buffer size = 7404, epsilon = 0.09825
mean_reward :  0.0


  0%|          | 3503/2000001 [33:17<337:42:14,  1.64it/s]

buffer size = 7406, epsilon = 0.09825
mean_reward :  0.0


  0%|          | 3504/2000001 [33:18<327:02:06,  1.70it/s]

buffer size = 7408, epsilon = 0.09825
mean_reward :  0.0


  0%|          | 3505/2000001 [33:18<315:01:35,  1.76it/s]

buffer size = 7410, epsilon = 0.09825
mean_reward :  0.0


  0%|          | 3506/2000001 [33:19<311:25:19,  1.78it/s]

buffer size = 7412, epsilon = 0.09825
mean_reward :  0.0


  0%|          | 3507/2000001 [33:19<309:51:39,  1.79it/s]

buffer size = 7414, epsilon = 0.09825
mean_reward :  0.0


  0%|          | 3508/2000001 [33:20<311:53:18,  1.78it/s]

buffer size = 7416, epsilon = 0.09825
mean_reward :  0.0


  0%|          | 3509/2000001 [33:20<304:46:32,  1.82it/s]

buffer size = 7418, epsilon = 0.09825
mean_reward :  0.0


  0%|          | 3510/2000001 [33:21<301:22:35,  1.84it/s]

buffer size = 7420, epsilon = 0.09825
mean_reward :  0.0


  0%|          | 3511/2000001 [33:21<297:57:15,  1.86it/s]

buffer size = 7422, epsilon = 0.09824
mean_reward :  0.0


  0%|          | 3512/2000001 [33:22<295:59:23,  1.87it/s]

buffer size = 7424, epsilon = 0.09824
mean_reward :  0.0


  0%|          | 3513/2000001 [33:22<293:14:21,  1.89it/s]

buffer size = 7426, epsilon = 0.09824
mean_reward :  0.0


  0%|          | 3514/2000001 [33:23<292:04:19,  1.90it/s]

buffer size = 7428, epsilon = 0.09824
mean_reward :  0.0


  0%|          | 3515/2000001 [33:24<287:27:07,  1.93it/s]

buffer size = 7430, epsilon = 0.09824
mean_reward :  0.0


  0%|          | 3516/2000001 [33:24<290:51:32,  1.91it/s]

buffer size = 7432, epsilon = 0.09824
mean_reward :  0.0


  0%|          | 3517/2000001 [33:25<292:13:39,  1.90it/s]

buffer size = 7434, epsilon = 0.09824
mean_reward :  0.0


  0%|          | 3518/2000001 [33:25<292:35:40,  1.90it/s]

buffer size = 7436, epsilon = 0.09824
mean_reward :  0.0


  0%|          | 3519/2000001 [33:26<289:40:44,  1.91it/s]

buffer size = 7438, epsilon = 0.09824
mean_reward :  0.0


  0%|          | 3520/2000001 [33:26<327:28:02,  1.69it/s]

buffer size = 7440, epsilon = 0.09824
mean_reward :  0.0


  0%|          | 3521/2000001 [33:27<367:13:19,  1.51it/s]

buffer size = 7442, epsilon = 0.09824
mean_reward :  0.0


  0%|          | 3522/2000001 [33:28<399:24:03,  1.39it/s]

buffer size = 7444, epsilon = 0.09824
mean_reward :  0.0


  0%|          | 3523/2000001 [33:29<378:57:26,  1.46it/s]

buffer size = 7446, epsilon = 0.09824
mean_reward :  0.0


  0%|          | 3524/2000001 [33:29<355:27:09,  1.56it/s]

buffer size = 7448, epsilon = 0.09824
mean_reward :  0.0


  0%|          | 3525/2000001 [33:30<336:09:52,  1.65it/s]

buffer size = 7450, epsilon = 0.09824
mean_reward :  0.0


  0%|          | 3526/2000001 [33:30<326:22:37,  1.70it/s]

buffer size = 7452, epsilon = 0.09824
mean_reward :  0.0


  0%|          | 3527/2000001 [33:31<316:17:55,  1.75it/s]

buffer size = 7454, epsilon = 0.09824
mean_reward :  0.0


  0%|          | 3528/2000001 [33:31<307:50:14,  1.80it/s]

buffer size = 7456, epsilon = 0.09824
mean_reward :  0.0


  0%|          | 3529/2000001 [33:32<303:10:49,  1.83it/s]

buffer size = 7458, epsilon = 0.09824
mean_reward :  0.0


  0%|          | 3530/2000001 [33:32<299:49:40,  1.85it/s]

buffer size = 7460, epsilon = 0.09824
mean_reward :  0.0


  0%|          | 3531/2000001 [33:33<295:08:37,  1.88it/s]

buffer size = 7462, epsilon = 0.09824
mean_reward :  0.0


  0%|          | 3532/2000001 [33:33<296:05:36,  1.87it/s]

buffer size = 7464, epsilon = 0.09823
mean_reward :  0.0


  0%|          | 3533/2000001 [33:34<294:16:42,  1.88it/s]

buffer size = 7466, epsilon = 0.09823
mean_reward :  0.0


  0%|          | 3534/2000001 [33:35<299:27:04,  1.85it/s]

buffer size = 7468, epsilon = 0.09823
mean_reward :  0.0


  0%|          | 3535/2000001 [33:35<297:26:12,  1.86it/s]

buffer size = 7470, epsilon = 0.09823
mean_reward :  0.0


  0%|          | 3536/2000001 [33:36<296:29:43,  1.87it/s]

buffer size = 7472, epsilon = 0.09823
mean_reward :  0.0


  0%|          | 3537/2000001 [33:36<297:14:02,  1.87it/s]

buffer size = 7474, epsilon = 0.09823
mean_reward :  0.0


  0%|          | 3538/2000001 [33:37<298:00:03,  1.86it/s]

buffer size = 7476, epsilon = 0.09823
mean_reward :  0.0


  0%|          | 3539/2000001 [33:37<296:00:21,  1.87it/s]

buffer size = 7478, epsilon = 0.09823
mean_reward :  0.0


  0%|          | 3540/2000001 [33:38<299:33:52,  1.85it/s]

buffer size = 7480, epsilon = 0.09823
mean_reward :  0.0


  0%|          | 3541/2000001 [33:38<297:27:56,  1.86it/s]

buffer size = 7482, epsilon = 0.09823
mean_reward :  0.0


  0%|          | 3542/2000001 [33:39<347:38:47,  1.60it/s]

buffer size = 7484, epsilon = 0.09823
mean_reward :  0.0


  0%|          | 3543/2000001 [33:40<378:53:11,  1.46it/s]

buffer size = 7486, epsilon = 0.09823
mean_reward :  0.0


  0%|          | 3544/2000001 [33:41<398:48:53,  1.39it/s]

buffer size = 7488, epsilon = 0.09823
mean_reward :  0.0


  0%|          | 3545/2000001 [33:41<383:37:22,  1.45it/s]

buffer size = 7490, epsilon = 0.09823
mean_reward :  0.0


  0%|          | 3546/2000001 [33:42<357:49:49,  1.55it/s]

buffer size = 7492, epsilon = 0.09823
mean_reward :  0.0


  0%|          | 3547/2000001 [33:42<337:59:10,  1.64it/s]

buffer size = 7494, epsilon = 0.09823
mean_reward :  0.0


  0%|          | 3548/2000001 [33:43<325:35:34,  1.70it/s]

buffer size = 7496, epsilon = 0.09823
mean_reward :  0.0


  0%|          | 3549/2000001 [33:43<315:48:59,  1.76it/s]

buffer size = 7498, epsilon = 0.09823
mean_reward :  0.0


  0%|          | 3550/2000001 [33:44<308:28:24,  1.80it/s]

buffer size = 7500, epsilon = 0.09823
mean_reward :  0.0


  0%|          | 3551/2000001 [33:45<302:51:30,  1.83it/s]

buffer size = 7502, epsilon = 0.09823
mean_reward :  0.0


  0%|          | 3552/2000001 [33:45<301:53:26,  1.84it/s]

buffer size = 7504, epsilon = 0.09822
mean_reward :  0.0


  0%|          | 3553/2000001 [33:46<296:31:46,  1.87it/s]

buffer size = 7506, epsilon = 0.09822
mean_reward :  0.0


  0%|          | 3554/2000001 [33:46<296:02:35,  1.87it/s]

buffer size = 7508, epsilon = 0.09822
mean_reward :  0.0


  0%|          | 3555/2000001 [33:47<291:31:11,  1.90it/s]

buffer size = 7510, epsilon = 0.09822
mean_reward :  0.0


  0%|          | 3556/2000001 [33:47<295:43:47,  1.88it/s]

buffer size = 7512, epsilon = 0.09822
mean_reward :  0.0


  0%|          | 3557/2000001 [33:48<294:58:31,  1.88it/s]

buffer size = 7514, epsilon = 0.09822
mean_reward :  0.0


  0%|          | 3558/2000001 [33:48<295:52:42,  1.87it/s]

buffer size = 7516, epsilon = 0.09822
mean_reward :  0.0


  0%|          | 3559/2000001 [33:49<294:56:32,  1.88it/s]

buffer size = 7518, epsilon = 0.09822
mean_reward :  0.0


  0%|          | 3560/2000001 [33:49<297:43:11,  1.86it/s]

buffer size = 7520, epsilon = 0.09822
mean_reward :  0.0


  0%|          | 3561/2000001 [33:50<297:24:58,  1.86it/s]

buffer size = 7522, epsilon = 0.09822
mean_reward :  0.0


  0%|          | 3562/2000001 [33:50<295:47:45,  1.87it/s]

buffer size = 7524, epsilon = 0.09822
mean_reward :  0.0


  0%|          | 3563/2000001 [33:51<294:21:38,  1.88it/s]

buffer size = 7526, epsilon = 0.09822
mean_reward :  0.0


  0%|          | 3564/2000001 [33:52<334:59:28,  1.66it/s]

buffer size = 7528, epsilon = 0.09822
mean_reward :  0.0


  0%|          | 3565/2000001 [33:52<362:51:01,  1.53it/s]

buffer size = 7530, epsilon = 0.09822
mean_reward :  0.0


  0%|          | 3566/2000001 [33:53<379:00:41,  1.46it/s]

buffer size = 7532, epsilon = 0.09822
mean_reward :  0.0


  0%|          | 3567/2000001 [33:54<381:32:09,  1.45it/s]

buffer size = 7534, epsilon = 0.09822
mean_reward :  0.0


  0%|          | 3568/2000001 [33:54<356:14:10,  1.56it/s]

buffer size = 7536, epsilon = 0.09822
mean_reward :  0.0


  0%|          | 3569/2000001 [33:55<337:26:07,  1.64it/s]

buffer size = 7538, epsilon = 0.09822
mean_reward :  0.0


  0%|          | 3570/2000001 [33:55<326:41:12,  1.70it/s]

buffer size = 7540, epsilon = 0.09822
mean_reward :  0.0


  0%|          | 3571/2000001 [33:56<315:45:31,  1.76it/s]

buffer size = 7542, epsilon = 0.09822
mean_reward :  0.0


  0%|          | 3572/2000001 [33:57<309:59:49,  1.79it/s]

buffer size = 7544, epsilon = 0.09821
mean_reward :  0.0


  0%|          | 3573/2000001 [33:57<304:38:51,  1.82it/s]

buffer size = 7546, epsilon = 0.09821
mean_reward :  0.0


  0%|          | 3574/2000001 [33:58<304:47:59,  1.82it/s]

buffer size = 7548, epsilon = 0.09821
mean_reward :  0.0


  0%|          | 3575/2000001 [33:58<300:57:35,  1.84it/s]

buffer size = 7550, epsilon = 0.09821
mean_reward :  0.0


  0%|          | 3576/2000001 [33:59<306:09:16,  1.81it/s]

buffer size = 7552, epsilon = 0.09821
mean_reward :  0.0


  0%|          | 3577/2000001 [33:59<303:47:22,  1.83it/s]

buffer size = 7554, epsilon = 0.09821
mean_reward :  0.0


  0%|          | 3578/2000001 [34:00<305:26:24,  1.82it/s]

buffer size = 7556, epsilon = 0.09821
mean_reward :  0.0


  0%|          | 3579/2000001 [34:00<301:35:54,  1.84it/s]

buffer size = 7558, epsilon = 0.09821
mean_reward :  0.0


  0%|          | 3580/2000001 [34:01<300:49:29,  1.84it/s]

buffer size = 7560, epsilon = 0.09821
mean_reward :  0.0


  0%|          | 3581/2000001 [34:01<299:42:11,  1.85it/s]

buffer size = 7562, epsilon = 0.09821
mean_reward :  0.0


  0%|          | 3582/2000001 [34:02<301:10:39,  1.84it/s]

buffer size = 7564, epsilon = 0.09821
mean_reward :  0.0


  0%|          | 3583/2000001 [34:03<300:17:35,  1.85it/s]

buffer size = 7566, epsilon = 0.09821
mean_reward :  0.0


  0%|          | 3584/2000001 [34:03<302:07:45,  1.84it/s]

buffer size = 7568, epsilon = 0.09821
mean_reward :  0.0


  0%|          | 3585/2000001 [34:04<301:21:51,  1.84it/s]

buffer size = 7570, epsilon = 0.09821
mean_reward :  0.0


  0%|          | 3586/2000001 [34:04<345:53:00,  1.60it/s]

buffer size = 7572, epsilon = 0.09821
mean_reward :  0.0


  0%|          | 3587/2000001 [34:05<378:51:38,  1.46it/s]

buffer size = 7574, epsilon = 0.09821
mean_reward :  0.0


  0%|          | 3588/2000001 [34:06<399:47:16,  1.39it/s]

buffer size = 7576, epsilon = 0.09821
mean_reward :  0.0


  0%|          | 3589/2000001 [34:07<384:00:21,  1.44it/s]

buffer size = 7578, epsilon = 0.09821
mean_reward :  0.0


  0%|          | 3590/2000001 [34:07<357:18:39,  1.55it/s]

buffer size = 7580, epsilon = 0.09821
mean_reward :  0.0


  0%|          | 3591/2000001 [34:08<336:04:42,  1.65it/s]

buffer size = 7582, epsilon = 0.09821
mean_reward :  0.0


  0%|          | 3592/2000001 [34:08<323:09:52,  1.72it/s]

buffer size = 7584, epsilon = 0.09820
mean_reward :  0.0


  0%|          | 3593/2000001 [34:09<311:52:28,  1.78it/s]

buffer size = 7586, epsilon = 0.09820
mean_reward :  0.0


  0%|          | 3594/2000001 [34:09<305:54:37,  1.81it/s]

buffer size = 7588, epsilon = 0.09820
mean_reward :  0.0


  0%|          | 3595/2000001 [34:10<301:01:12,  1.84it/s]

buffer size = 7590, epsilon = 0.09820
mean_reward :  0.0


  0%|          | 3596/2000001 [34:10<296:25:54,  1.87it/s]

buffer size = 7592, epsilon = 0.09820
mean_reward :  0.0


  0%|          | 3597/2000001 [34:11<292:45:13,  1.89it/s]

buffer size = 7594, epsilon = 0.09820
mean_reward :  0.0


  0%|          | 3598/2000001 [34:11<291:35:40,  1.90it/s]

buffer size = 7596, epsilon = 0.09820
mean_reward :  0.0


  0%|          | 3599/2000001 [34:12<289:40:08,  1.91it/s]

buffer size = 7598, epsilon = 0.09820
mean_reward :  0.0


  0%|          | 3600/2000001 [34:12<294:38:17,  1.88it/s]

buffer size = 7600, epsilon = 0.09820
mean_reward :  0.0


  0%|          | 3601/2000001 [34:13<296:19:52,  1.87it/s]

buffer size = 7602, epsilon = 0.09820
mean_reward :  0.0


  0%|          | 3602/2000001 [34:13<294:20:40,  1.88it/s]

buffer size = 7604, epsilon = 0.09820
mean_reward :  0.0


  0%|          | 3603/2000001 [34:14<291:33:13,  1.90it/s]

buffer size = 7606, epsilon = 0.09820
mean_reward :  0.0


  0%|          | 3604/2000001 [34:15<294:48:48,  1.88it/s]

buffer size = 7608, epsilon = 0.09820
mean_reward :  0.0


  0%|          | 3605/2000001 [34:15<295:51:27,  1.87it/s]

buffer size = 7610, epsilon = 0.09820
mean_reward :  0.0


  0%|          | 3606/2000001 [34:16<294:28:25,  1.88it/s]

buffer size = 7612, epsilon = 0.09820
mean_reward :  0.0


  0%|          | 3607/2000001 [34:16<292:31:14,  1.90it/s]

buffer size = 7614, epsilon = 0.09820
mean_reward :  0.0


  0%|          | 3608/2000001 [34:17<319:58:26,  1.73it/s]

buffer size = 7616, epsilon = 0.09820
mean_reward :  0.0


  0%|          | 3609/2000001 [34:18<366:44:46,  1.51it/s]

buffer size = 7618, epsilon = 0.09820
mean_reward :  0.0


  0%|          | 3610/2000001 [34:19<394:53:07,  1.40it/s]

buffer size = 7620, epsilon = 0.09820
mean_reward :  0.0


  0%|          | 3611/2000001 [34:19<381:41:31,  1.45it/s]

buffer size = 7622, epsilon = 0.09820
mean_reward :  0.0


  0%|          | 3612/2000001 [34:20<359:50:52,  1.54it/s]

buffer size = 7624, epsilon = 0.09819
mean_reward :  0.0


  0%|          | 3613/2000001 [34:20<335:56:28,  1.65it/s]

buffer size = 7626, epsilon = 0.09819
mean_reward :  0.0


  0%|          | 3614/2000001 [34:21<321:56:20,  1.72it/s]

buffer size = 7628, epsilon = 0.09819
mean_reward :  0.0


  0%|          | 3615/2000001 [34:21<312:59:10,  1.77it/s]

buffer size = 7630, epsilon = 0.09819
mean_reward :  0.0


  0%|          | 3616/2000001 [34:22<310:13:17,  1.79it/s]

buffer size = 7632, epsilon = 0.09819
mean_reward :  0.0


  0%|          | 3617/2000001 [34:22<304:48:58,  1.82it/s]

buffer size = 7634, epsilon = 0.09819
mean_reward :  0.0


  0%|          | 3618/2000001 [34:23<304:34:21,  1.82it/s]

buffer size = 7636, epsilon = 0.09819
mean_reward :  0.0


  0%|          | 3619/2000001 [34:23<302:14:39,  1.83it/s]

buffer size = 7638, epsilon = 0.09819
mean_reward :  0.0


  0%|          | 3620/2000001 [34:24<299:39:07,  1.85it/s]

buffer size = 7640, epsilon = 0.09819
mean_reward :  0.0


  0%|          | 3621/2000001 [34:24<299:16:01,  1.85it/s]

buffer size = 7642, epsilon = 0.09819
mean_reward :  0.0


  0%|          | 3622/2000001 [34:25<301:32:53,  1.84it/s]

buffer size = 7644, epsilon = 0.09819
mean_reward :  0.0


  0%|          | 3623/2000001 [34:26<304:07:42,  1.82it/s]

buffer size = 7646, epsilon = 0.09819
mean_reward :  0.0


  0%|          | 3624/2000001 [34:26<302:37:02,  1.83it/s]

buffer size = 7648, epsilon = 0.09819
mean_reward :  0.0


  0%|          | 3625/2000001 [34:27<299:38:15,  1.85it/s]

buffer size = 7650, epsilon = 0.09819
mean_reward :  0.0


  0%|          | 3626/2000001 [34:27<299:39:11,  1.85it/s]

buffer size = 7652, epsilon = 0.09819
mean_reward :  0.0


  0%|          | 3627/2000001 [34:28<298:36:18,  1.86it/s]

buffer size = 7654, epsilon = 0.09819
mean_reward :  0.0


  0%|          | 3628/2000001 [34:28<297:57:40,  1.86it/s]

buffer size = 7656, epsilon = 0.09819
mean_reward :  0.0


  0%|          | 3629/2000001 [34:29<298:45:35,  1.86it/s]

buffer size = 7658, epsilon = 0.09819
mean_reward :  0.0


  0%|          | 3630/2000001 [34:30<345:43:16,  1.60it/s]

buffer size = 7660, epsilon = 0.09819
mean_reward :  0.0


  0%|          | 3631/2000001 [34:31<392:38:21,  1.41it/s]

buffer size = 7662, epsilon = 0.09819
mean_reward :  0.0


  0%|          | 3632/2000001 [34:31<411:59:05,  1.35it/s]

buffer size = 7664, epsilon = 0.09818
mean_reward :  0.0


  0%|          | 3633/2000001 [34:32<378:41:00,  1.46it/s]

buffer size = 7666, epsilon = 0.09818
mean_reward :  0.0


  0%|          | 3634/2000001 [34:32<356:00:18,  1.56it/s]

buffer size = 7668, epsilon = 0.09818
mean_reward :  0.0


  0%|          | 3635/2000001 [34:33<338:13:45,  1.64it/s]

buffer size = 7670, epsilon = 0.09818
mean_reward :  0.0


  0%|          | 3636/2000001 [34:34<327:47:49,  1.69it/s]

buffer size = 7672, epsilon = 0.09818
mean_reward :  0.0


  0%|          | 3637/2000001 [34:34<315:05:10,  1.76it/s]

buffer size = 7674, epsilon = 0.09818
mean_reward :  0.0


  0%|          | 3638/2000001 [34:35<309:07:38,  1.79it/s]

buffer size = 7676, epsilon = 0.09818
mean_reward :  0.0


  0%|          | 3639/2000001 [34:35<303:10:24,  1.83it/s]

buffer size = 7678, epsilon = 0.09818
mean_reward :  0.0


  0%|          | 3640/2000001 [34:36<299:30:05,  1.85it/s]

buffer size = 7680, epsilon = 0.09818
mean_reward :  0.0


  0%|          | 3641/2000001 [34:36<299:55:20,  1.85it/s]

buffer size = 7682, epsilon = 0.09818
mean_reward :  0.0


  0%|          | 3642/2000001 [34:37<299:48:37,  1.85it/s]

buffer size = 7684, epsilon = 0.09818
mean_reward :  0.0


  0%|          | 3643/2000001 [34:37<298:59:29,  1.85it/s]

buffer size = 7686, epsilon = 0.09818
mean_reward :  0.0


  0%|          | 3644/2000001 [34:38<301:29:30,  1.84it/s]

buffer size = 7688, epsilon = 0.09818
mean_reward :  0.0


  0%|          | 3645/2000001 [34:38<298:01:02,  1.86it/s]

buffer size = 7690, epsilon = 0.09818
mean_reward :  0.0


  0%|          | 3646/2000001 [34:39<300:15:44,  1.85it/s]

buffer size = 7692, epsilon = 0.09818
mean_reward :  0.0


  0%|          | 3647/2000001 [34:39<297:25:55,  1.86it/s]

buffer size = 7694, epsilon = 0.09818
mean_reward :  0.0


  0%|          | 3648/2000001 [34:40<297:19:13,  1.87it/s]

buffer size = 7696, epsilon = 0.09818
mean_reward :  0.0


  0%|          | 3649/2000001 [34:40<295:51:04,  1.87it/s]

buffer size = 7698, epsilon = 0.09818
mean_reward :  0.0


  0%|          | 3650/2000001 [34:41<299:32:21,  1.85it/s]

buffer size = 7700, epsilon = 0.09818
mean_reward :  0.0


  0%|          | 3651/2000001 [34:42<315:25:32,  1.76it/s]

buffer size = 7702, epsilon = 0.09818
mean_reward :  0.0


  0%|          | 3652/2000001 [34:43<361:50:52,  1.53it/s]

buffer size = 7704, epsilon = 0.09817
mean_reward :  0.0


  0%|          | 3653/2000001 [34:43<393:02:33,  1.41it/s]

buffer size = 7706, epsilon = 0.09817
mean_reward :  0.0


  0%|          | 3654/2000001 [34:44<407:08:07,  1.36it/s]

buffer size = 7708, epsilon = 0.09817
mean_reward :  0.0


  0%|          | 3655/2000001 [34:45<371:07:10,  1.49it/s]

buffer size = 7710, epsilon = 0.09817
mean_reward :  0.0


  0%|          | 3656/2000001 [34:45<350:39:05,  1.58it/s]

buffer size = 7712, epsilon = 0.09817
mean_reward :  0.0


  0%|          | 3657/2000001 [34:46<336:48:25,  1.65it/s]

buffer size = 7714, epsilon = 0.09817
mean_reward :  0.0


  0%|          | 3658/2000001 [34:46<323:04:16,  1.72it/s]

buffer size = 7716, epsilon = 0.09817
mean_reward :  0.0


  0%|          | 3659/2000001 [34:47<313:50:08,  1.77it/s]

buffer size = 7718, epsilon = 0.09817
mean_reward :  0.0


  0%|          | 3660/2000001 [34:47<310:13:11,  1.79it/s]

buffer size = 7720, epsilon = 0.09817
mean_reward :  0.0


  0%|          | 3661/2000001 [34:48<308:35:13,  1.80it/s]

buffer size = 7722, epsilon = 0.09817
mean_reward :  0.0


  0%|          | 3662/2000001 [34:48<304:55:05,  1.82it/s]

buffer size = 7724, epsilon = 0.09817
mean_reward :  0.0


  0%|          | 3663/2000001 [34:49<303:26:51,  1.83it/s]

buffer size = 7726, epsilon = 0.09817
mean_reward :  0.0


  0%|          | 3664/2000001 [34:50<303:59:08,  1.82it/s]

buffer size = 7728, epsilon = 0.09817
mean_reward :  0.0


  0%|          | 3665/2000001 [34:50<305:08:41,  1.82it/s]

buffer size = 7730, epsilon = 0.09817
mean_reward :  0.0


  0%|          | 3666/2000001 [34:51<299:36:22,  1.85it/s]

buffer size = 7732, epsilon = 0.09817
mean_reward :  0.0


  0%|          | 3667/2000001 [34:51<300:10:04,  1.85it/s]

buffer size = 7734, epsilon = 0.09817
mean_reward :  0.0


  0%|          | 3668/2000001 [34:52<296:49:22,  1.87it/s]

buffer size = 7736, epsilon = 0.09817
mean_reward :  0.0


  0%|          | 3669/2000001 [34:52<297:49:31,  1.86it/s]

buffer size = 7738, epsilon = 0.09817
mean_reward :  0.0


  0%|          | 3670/2000001 [34:53<295:54:44,  1.87it/s]

buffer size = 7740, epsilon = 0.09817
mean_reward :  0.0


  0%|          | 3671/2000001 [34:53<295:26:21,  1.88it/s]

buffer size = 7742, epsilon = 0.09817
mean_reward :  0.0


  0%|          | 3672/2000001 [34:54<292:54:59,  1.89it/s]

buffer size = 7744, epsilon = 0.09816
mean_reward :  0.0


  0%|          | 3673/2000001 [34:55<331:08:18,  1.67it/s]

buffer size = 7746, epsilon = 0.09816
mean_reward :  0.0


  0%|          | 3674/2000001 [34:55<362:01:17,  1.53it/s]

buffer size = 7748, epsilon = 0.09816
mean_reward :  0.0


  0%|          | 3675/2000001 [34:56<384:00:05,  1.44it/s]

buffer size = 7750, epsilon = 0.09816
mean_reward :  0.0


  0%|          | 3676/2000001 [34:57<387:46:12,  1.43it/s]

buffer size = 7752, epsilon = 0.09816
mean_reward :  0.0


  0%|          | 3677/2000001 [34:57<363:16:39,  1.53it/s]

buffer size = 7754, epsilon = 0.09816
mean_reward :  0.0


  0%|          | 3678/2000001 [34:58<343:15:59,  1.62it/s]

buffer size = 7756, epsilon = 0.09816
mean_reward :  0.0


  0%|          | 3679/2000001 [34:58<331:24:52,  1.67it/s]

buffer size = 7758, epsilon = 0.09816
mean_reward :  0.0


  0%|          | 3680/2000001 [34:59<323:01:28,  1.72it/s]

buffer size = 7760, epsilon = 0.09816
mean_reward :  0.0


  0%|          | 3681/2000001 [35:00<317:51:07,  1.74it/s]

buffer size = 7762, epsilon = 0.09816
mean_reward :  0.0


  0%|          | 3682/2000001 [35:00<312:36:20,  1.77it/s]

buffer size = 7764, epsilon = 0.09816
mean_reward :  0.0


  0%|          | 3683/2000001 [35:01<310:02:03,  1.79it/s]

buffer size = 7766, epsilon = 0.09816
mean_reward :  0.0


  0%|          | 3684/2000001 [35:01<305:13:47,  1.82it/s]

buffer size = 7768, epsilon = 0.09816
mean_reward :  0.0


  0%|          | 3685/2000001 [35:02<303:04:15,  1.83it/s]

buffer size = 7770, epsilon = 0.09816
mean_reward :  0.0


  0%|          | 3686/2000001 [35:02<301:53:02,  1.84it/s]

buffer size = 7772, epsilon = 0.09816
mean_reward :  0.0


  0%|          | 3687/2000001 [35:03<303:24:38,  1.83it/s]

buffer size = 7774, epsilon = 0.09816
mean_reward :  0.0


  0%|          | 3688/2000001 [35:03<301:35:57,  1.84it/s]

buffer size = 7776, epsilon = 0.09816
mean_reward :  0.0


  0%|          | 3689/2000001 [35:04<300:09:36,  1.85it/s]

buffer size = 7778, epsilon = 0.09816
mean_reward :  0.0


  0%|          | 3690/2000001 [35:04<296:50:54,  1.87it/s]

buffer size = 7780, epsilon = 0.09816
mean_reward :  0.0


  0%|          | 3691/2000001 [35:05<297:06:15,  1.87it/s]

buffer size = 7782, epsilon = 0.09816
mean_reward :  0.0


  0%|          | 3692/2000001 [35:05<297:52:59,  1.86it/s]

buffer size = 7784, epsilon = 0.09815
mean_reward :  0.0


  0%|          | 3693/2000001 [35:06<301:36:11,  1.84it/s]

buffer size = 7786, epsilon = 0.09815
mean_reward :  0.0


  0%|          | 3694/2000001 [35:07<299:44:54,  1.85it/s]

buffer size = 7788, epsilon = 0.09815
mean_reward :  0.0


  0%|          | 3695/2000001 [35:07<342:14:29,  1.62it/s]

buffer size = 7790, epsilon = 0.09815
mean_reward :  0.0


  0%|          | 3696/2000001 [35:08<374:52:24,  1.48it/s]

buffer size = 7792, epsilon = 0.09815
mean_reward :  0.0


  0%|          | 3697/2000001 [35:09<400:08:00,  1.39it/s]

buffer size = 7794, epsilon = 0.09815
mean_reward :  0.0


  0%|          | 3698/2000001 [35:10<389:04:44,  1.43it/s]

buffer size = 7796, epsilon = 0.09815
mean_reward :  0.0


  0%|          | 3699/2000001 [35:10<363:09:02,  1.53it/s]

buffer size = 7798, epsilon = 0.09815
mean_reward :  0.0


  0%|          | 3700/2000001 [35:11<343:43:19,  1.61it/s]

buffer size = 7800, epsilon = 0.09815
mean_reward :  0.0


  0%|          | 3701/2000001 [35:11<330:10:05,  1.68it/s]

buffer size = 7802, epsilon = 0.09815
mean_reward :  0.0


  0%|          | 3702/2000001 [35:12<320:40:54,  1.73it/s]

buffer size = 7804, epsilon = 0.09815
mean_reward :  0.0


  0%|          | 3703/2000001 [35:12<316:41:16,  1.75it/s]

buffer size = 7806, epsilon = 0.09815
mean_reward :  0.0


  0%|          | 3704/2000001 [35:13<311:09:50,  1.78it/s]

buffer size = 7808, epsilon = 0.09815
mean_reward :  0.0


  0%|          | 3705/2000001 [35:13<309:05:57,  1.79it/s]

buffer size = 7810, epsilon = 0.09815
mean_reward :  0.0


  0%|          | 3706/2000001 [35:14<307:28:51,  1.80it/s]

buffer size = 7812, epsilon = 0.09815
mean_reward :  0.0


  0%|          | 3707/2000001 [35:15<306:00:58,  1.81it/s]

buffer size = 7814, epsilon = 0.09815
mean_reward :  0.0


  0%|          | 3708/2000001 [35:15<302:44:13,  1.83it/s]

buffer size = 7816, epsilon = 0.09815
mean_reward :  0.0


  0%|          | 3709/2000001 [35:16<302:47:56,  1.83it/s]

buffer size = 7818, epsilon = 0.09815
mean_reward :  0.0


  0%|          | 3710/2000001 [35:16<302:23:58,  1.83it/s]

buffer size = 7820, epsilon = 0.09815
mean_reward :  0.0


  0%|          | 3711/2000001 [35:17<300:17:08,  1.85it/s]

buffer size = 7822, epsilon = 0.09815
mean_reward :  0.0


  0%|          | 3712/2000001 [35:17<301:35:40,  1.84it/s]

buffer size = 7824, epsilon = 0.09814
mean_reward :  0.0


  0%|          | 3713/2000001 [35:18<301:41:39,  1.84it/s]

buffer size = 7826, epsilon = 0.09814
mean_reward :  0.0


  0%|          | 3714/2000001 [35:18<301:51:25,  1.84it/s]

buffer size = 7828, epsilon = 0.09814
mean_reward :  0.0


  0%|          | 3715/2000001 [35:19<303:02:41,  1.83it/s]

buffer size = 7830, epsilon = 0.09814
mean_reward :  0.0


  0%|          | 3716/2000001 [35:19<306:58:24,  1.81it/s]

buffer size = 7832, epsilon = 0.09814
mean_reward :  0.0


  0%|          | 3717/2000001 [35:20<353:51:16,  1.57it/s]

buffer size = 7834, epsilon = 0.09814
mean_reward :  0.0


  0%|          | 3718/2000001 [35:21<390:25:00,  1.42it/s]

buffer size = 7836, epsilon = 0.09814
mean_reward :  0.0


  0%|          | 3719/2000001 [35:22<413:54:50,  1.34it/s]

buffer size = 7838, epsilon = 0.09814
mean_reward :  0.0


  0%|          | 3720/2000001 [35:23<379:52:30,  1.46it/s]

buffer size = 7840, epsilon = 0.09814
mean_reward :  0.0


  0%|          | 3721/2000001 [35:23<353:23:07,  1.57it/s]

buffer size = 7842, epsilon = 0.09814
mean_reward :  0.0


  0%|          | 3722/2000001 [35:24<337:56:31,  1.64it/s]

buffer size = 7844, epsilon = 0.09814
mean_reward :  0.0


  0%|          | 3723/2000001 [35:24<329:06:52,  1.68it/s]

buffer size = 7846, epsilon = 0.09814
mean_reward :  0.0


  0%|          | 3724/2000001 [35:25<319:37:29,  1.73it/s]

buffer size = 7848, epsilon = 0.09814
mean_reward :  0.0


  0%|          | 3725/2000001 [35:25<314:51:19,  1.76it/s]

buffer size = 7850, epsilon = 0.09814
mean_reward :  0.0


  0%|          | 3726/2000001 [35:26<311:20:10,  1.78it/s]

buffer size = 7852, epsilon = 0.09814
mean_reward :  0.0


  0%|          | 3727/2000001 [35:26<307:03:08,  1.81it/s]

buffer size = 7854, epsilon = 0.09814
mean_reward :  0.0


  0%|          | 3728/2000001 [35:27<305:44:49,  1.81it/s]

buffer size = 7856, epsilon = 0.09814
mean_reward :  0.0


  0%|          | 3729/2000001 [35:27<303:33:55,  1.83it/s]

buffer size = 7858, epsilon = 0.09814
mean_reward :  0.0


  0%|          | 3730/2000001 [35:28<303:36:25,  1.83it/s]

buffer size = 7860, epsilon = 0.09814
mean_reward :  0.0


  0%|          | 3731/2000001 [35:28<300:55:38,  1.84it/s]

buffer size = 7862, epsilon = 0.09814
mean_reward :  0.0


  0%|          | 3732/2000001 [35:29<304:00:18,  1.82it/s]

buffer size = 7864, epsilon = 0.09813
mean_reward :  0.0


  0%|          | 3733/2000001 [35:30<301:18:54,  1.84it/s]

buffer size = 7866, epsilon = 0.09813
mean_reward :  0.0


  0%|          | 3734/2000001 [35:30<301:27:20,  1.84it/s]

buffer size = 7868, epsilon = 0.09813
mean_reward :  0.0


  0%|          | 3735/2000001 [35:31<298:19:38,  1.86it/s]

buffer size = 7870, epsilon = 0.09813
mean_reward :  0.0


  0%|          | 3736/2000001 [35:31<299:29:01,  1.85it/s]

buffer size = 7872, epsilon = 0.09813
mean_reward :  0.0


  0%|          | 3737/2000001 [35:32<299:42:04,  1.85it/s]

buffer size = 7874, epsilon = 0.09813
mean_reward :  0.0


  0%|          | 3738/2000001 [35:33<337:03:09,  1.65it/s]

buffer size = 7876, epsilon = 0.09813
mean_reward :  0.0


  0%|          | 3739/2000001 [35:33<372:31:52,  1.49it/s]

buffer size = 7878, epsilon = 0.09813
mean_reward :  0.0


  0%|          | 3740/2000001 [35:34<408:44:39,  1.36it/s]

buffer size = 7880, epsilon = 0.09813
mean_reward :  0.0


  0%|          | 3741/2000001 [35:35<391:01:00,  1.42it/s]

buffer size = 7882, epsilon = 0.09813
mean_reward :  0.0


  0%|          | 3742/2000001 [35:35<365:16:18,  1.52it/s]

buffer size = 7884, epsilon = 0.09813
mean_reward :  0.0


  0%|          | 3743/2000001 [35:36<344:26:13,  1.61it/s]

buffer size = 7886, epsilon = 0.09813
mean_reward :  0.0


  0%|          | 3744/2000001 [35:36<331:20:40,  1.67it/s]

buffer size = 7888, epsilon = 0.09813
mean_reward :  0.0


  0%|          | 3745/2000001 [35:37<324:03:15,  1.71it/s]

buffer size = 7890, epsilon = 0.09813
mean_reward :  0.0


  0%|          | 3746/2000001 [35:38<317:01:49,  1.75it/s]

buffer size = 7892, epsilon = 0.09813
mean_reward :  0.0


  0%|          | 3747/2000001 [35:38<312:29:09,  1.77it/s]

buffer size = 7894, epsilon = 0.09813
mean_reward :  0.0


  0%|          | 3748/2000001 [35:39<309:50:05,  1.79it/s]

buffer size = 7896, epsilon = 0.09813
mean_reward :  0.0


  0%|          | 3749/2000001 [35:39<305:52:19,  1.81it/s]

buffer size = 7898, epsilon = 0.09813
mean_reward :  0.0


  0%|          | 3750/2000001 [35:40<305:04:48,  1.82it/s]

buffer size = 7900, epsilon = 0.09813
mean_reward :  0.0


  0%|          | 3751/2000001 [35:40<300:29:57,  1.85it/s]

buffer size = 7902, epsilon = 0.09813
mean_reward :  0.0


  0%|          | 3752/2000001 [35:41<300:13:34,  1.85it/s]

buffer size = 7904, epsilon = 0.09812
mean_reward :  0.0


  0%|          | 3753/2000001 [35:41<298:51:12,  1.86it/s]

buffer size = 7906, epsilon = 0.09812
mean_reward :  0.0


  0%|          | 3754/2000001 [35:42<298:51:19,  1.86it/s]

buffer size = 7908, epsilon = 0.09812
mean_reward :  0.0


  0%|          | 3755/2000001 [35:42<297:22:45,  1.86it/s]

buffer size = 7910, epsilon = 0.09812
mean_reward :  0.0


  0%|          | 3756/2000001 [35:43<298:03:56,  1.86it/s]

buffer size = 7912, epsilon = 0.09812
mean_reward :  0.0


  0%|          | 3757/2000001 [35:43<296:18:12,  1.87it/s]

buffer size = 7914, epsilon = 0.09812
mean_reward :  0.0


  0%|          | 3758/2000001 [35:44<296:31:29,  1.87it/s]

buffer size = 7916, epsilon = 0.09812
mean_reward :  0.0


  0%|          | 3759/2000001 [35:45<295:38:04,  1.88it/s]

buffer size = 7918, epsilon = 0.09812
mean_reward :  0.0


  0%|          | 3760/2000001 [35:45<338:07:47,  1.64it/s]

buffer size = 7920, epsilon = 0.09812
mean_reward :  0.0


  0%|          | 3761/2000001 [35:46<373:53:50,  1.48it/s]

buffer size = 7922, epsilon = 0.09812
mean_reward :  0.0


  0%|          | 3762/2000001 [35:47<401:09:42,  1.38it/s]

buffer size = 7924, epsilon = 0.09812
mean_reward :  0.0


  0%|          | 3763/2000001 [35:48<386:35:44,  1.43it/s]

buffer size = 7926, epsilon = 0.09812
mean_reward :  0.0


  0%|          | 3764/2000001 [35:48<358:50:30,  1.55it/s]

buffer size = 7928, epsilon = 0.09812
mean_reward :  0.0


  0%|          | 3765/2000001 [35:49<339:53:30,  1.63it/s]

buffer size = 7930, epsilon = 0.09812
mean_reward :  0.0


  0%|          | 3766/2000001 [35:49<330:00:44,  1.68it/s]

buffer size = 7932, epsilon = 0.09812
mean_reward :  0.0


  0%|          | 3767/2000001 [35:50<317:59:04,  1.74it/s]

buffer size = 7934, epsilon = 0.09812
mean_reward :  0.0


  0%|          | 3768/2000001 [35:50<314:59:31,  1.76it/s]

buffer size = 7936, epsilon = 0.09812
mean_reward :  0.0


  0%|          | 3769/2000001 [35:51<309:56:56,  1.79it/s]

buffer size = 7938, epsilon = 0.09812
mean_reward :  0.0


  0%|          | 3770/2000001 [35:51<308:46:37,  1.80it/s]

buffer size = 7940, epsilon = 0.09812
mean_reward :  0.0


  0%|          | 3771/2000001 [35:52<305:12:24,  1.82it/s]

buffer size = 7942, epsilon = 0.09812
mean_reward :  0.0


  0%|          | 3772/2000001 [35:53<305:41:02,  1.81it/s]

buffer size = 7944, epsilon = 0.09811
mean_reward :  0.0


  0%|          | 3773/2000001 [35:53<307:45:12,  1.80it/s]

buffer size = 7946, epsilon = 0.09811
mean_reward :  0.0


  0%|          | 3774/2000001 [35:54<305:16:03,  1.82it/s]

buffer size = 7948, epsilon = 0.09811
mean_reward :  0.0


  0%|          | 3775/2000001 [35:54<306:28:39,  1.81it/s]

buffer size = 7950, epsilon = 0.09811
mean_reward :  0.0


  0%|          | 3776/2000001 [35:55<306:22:36,  1.81it/s]

buffer size = 7952, epsilon = 0.09811
mean_reward :  0.0


  0%|          | 3777/2000001 [35:55<304:36:37,  1.82it/s]

buffer size = 7954, epsilon = 0.09811
mean_reward :  0.0


  0%|          | 3778/2000001 [35:56<301:11:58,  1.84it/s]

buffer size = 7956, epsilon = 0.09811
mean_reward :  0.0


  0%|          | 3779/2000001 [35:56<300:00:39,  1.85it/s]

buffer size = 7958, epsilon = 0.09811
mean_reward :  0.0


  0%|          | 3780/2000001 [35:57<297:33:02,  1.86it/s]

buffer size = 7960, epsilon = 0.09811
mean_reward :  0.0


  0%|          | 3781/2000001 [35:57<306:36:24,  1.81it/s]

buffer size = 7962, epsilon = 0.09811
mean_reward :  0.0


  0%|          | 3782/2000001 [35:58<359:07:50,  1.54it/s]

buffer size = 7964, epsilon = 0.09811
mean_reward :  0.0


  0%|          | 3783/2000001 [35:59<396:54:47,  1.40it/s]

buffer size = 7966, epsilon = 0.09811
mean_reward :  0.0


  0%|          | 3784/2000001 [36:00<398:04:44,  1.39it/s]

buffer size = 7968, epsilon = 0.09811
mean_reward :  0.0


  0%|          | 3785/2000001 [36:00<368:02:17,  1.51it/s]

buffer size = 7970, epsilon = 0.09811
mean_reward :  0.0


  0%|          | 3786/2000001 [36:01<347:19:21,  1.60it/s]

buffer size = 7972, epsilon = 0.09811
mean_reward :  0.0


  0%|          | 3787/2000001 [36:02<333:14:36,  1.66it/s]

buffer size = 7974, epsilon = 0.09811
mean_reward :  0.0


  0%|          | 3788/2000001 [36:02<324:52:04,  1.71it/s]

buffer size = 7976, epsilon = 0.09811
mean_reward :  0.0


  0%|          | 3789/2000001 [36:03<317:16:14,  1.75it/s]

buffer size = 7978, epsilon = 0.09811
mean_reward :  0.0


  0%|          | 3790/2000001 [36:03<309:50:28,  1.79it/s]

buffer size = 7980, epsilon = 0.09811
mean_reward :  0.0


  0%|          | 3791/2000001 [36:04<307:22:03,  1.80it/s]

buffer size = 7982, epsilon = 0.09811
mean_reward :  0.0


  0%|          | 3792/2000001 [36:04<303:35:31,  1.83it/s]

buffer size = 7984, epsilon = 0.09810
mean_reward :  0.0


  0%|          | 3793/2000001 [36:05<306:54:59,  1.81it/s]

buffer size = 7986, epsilon = 0.09810
mean_reward :  0.0


  0%|          | 3794/2000001 [36:05<302:43:29,  1.83it/s]

buffer size = 7988, epsilon = 0.09810
mean_reward :  0.0


  0%|          | 3795/2000001 [36:06<303:20:06,  1.83it/s]

buffer size = 7990, epsilon = 0.09810
mean_reward :  0.0


  0%|          | 3796/2000001 [36:06<301:15:49,  1.84it/s]

buffer size = 7992, epsilon = 0.09810
mean_reward :  0.0


  0%|          | 3797/2000001 [36:07<300:59:17,  1.84it/s]

buffer size = 7994, epsilon = 0.09810
mean_reward :  0.0


  0%|          | 3798/2000001 [36:07<295:57:23,  1.87it/s]

buffer size = 7996, epsilon = 0.09810
mean_reward :  0.0


  0%|          | 3799/2000001 [36:08<298:45:37,  1.86it/s]

buffer size = 7998, epsilon = 0.09810
mean_reward :  0.0


  0%|          | 3800/2000001 [36:09<296:06:01,  1.87it/s]

buffer size = 8000, epsilon = 0.09810
mean_reward :  0.0


  0%|          | 3801/2000001 [36:09<299:01:05,  1.85it/s]

buffer size = 8002, epsilon = 0.09810
mean_reward :  0.0


  0%|          | 3802/2000001 [36:10<297:49:20,  1.86it/s]

buffer size = 8004, epsilon = 0.09810
mean_reward :  0.0


  0%|          | 3803/2000001 [36:10<342:47:40,  1.62it/s]

buffer size = 8006, epsilon = 0.09810
mean_reward :  0.0


  0%|          | 3804/2000001 [36:11<374:40:23,  1.48it/s]

buffer size = 8008, epsilon = 0.09810
mean_reward :  0.0


  0%|          | 3805/2000001 [36:12<407:33:27,  1.36it/s]

buffer size = 8010, epsilon = 0.09810
mean_reward :  0.0


  0%|          | 3806/2000001 [36:13<383:42:18,  1.45it/s]

buffer size = 8012, epsilon = 0.09810
mean_reward :  0.0


  0%|          | 3807/2000001 [36:13<359:51:07,  1.54it/s]

buffer size = 8014, epsilon = 0.09810
mean_reward :  0.0


  0%|          | 3808/2000001 [36:14<339:36:49,  1.63it/s]

buffer size = 8016, epsilon = 0.09810
mean_reward :  0.0


  0%|          | 3809/2000001 [36:14<328:10:47,  1.69it/s]

buffer size = 8018, epsilon = 0.09810
mean_reward :  0.0


  0%|          | 3810/2000001 [36:15<318:00:01,  1.74it/s]

buffer size = 8020, epsilon = 0.09810
mean_reward :  0.0


  0%|          | 3811/2000001 [36:15<313:54:04,  1.77it/s]

buffer size = 8022, epsilon = 0.09810
mean_reward :  0.0


  0%|          | 3812/2000001 [36:16<308:26:07,  1.80it/s]

buffer size = 8024, epsilon = 0.09809
mean_reward :  0.0


  0%|          | 3813/2000001 [36:16<305:43:08,  1.81it/s]

buffer size = 8026, epsilon = 0.09809
mean_reward :  0.0


  0%|          | 3814/2000001 [36:17<305:16:10,  1.82it/s]

buffer size = 8028, epsilon = 0.09809
mean_reward :  0.0


  0%|          | 3815/2000001 [36:18<305:22:38,  1.82it/s]

buffer size = 8030, epsilon = 0.09809
mean_reward :  0.0


  0%|          | 3816/2000001 [36:18<303:17:23,  1.83it/s]

buffer size = 8032, epsilon = 0.09809
mean_reward :  0.0


  0%|          | 3817/2000001 [36:19<302:18:26,  1.83it/s]

buffer size = 8034, epsilon = 0.09809
mean_reward :  0.0


  0%|          | 3818/2000001 [36:19<302:02:15,  1.84it/s]

buffer size = 8036, epsilon = 0.09809
mean_reward :  0.0


  0%|          | 3819/2000001 [36:20<304:23:54,  1.82it/s]

buffer size = 8038, epsilon = 0.09809
mean_reward :  0.0


  0%|          | 3820/2000001 [36:20<305:23:08,  1.82it/s]

buffer size = 8040, epsilon = 0.09809
mean_reward :  0.0


  0%|          | 3821/2000001 [36:21<304:30:02,  1.82it/s]

buffer size = 8042, epsilon = 0.09809
mean_reward :  0.0


  0%|          | 3822/2000001 [36:21<304:28:38,  1.82it/s]

buffer size = 8044, epsilon = 0.09809
mean_reward :  0.0


  0%|          | 3823/2000001 [36:22<301:11:19,  1.84it/s]

buffer size = 8046, epsilon = 0.09809
mean_reward :  0.0


  0%|          | 3824/2000001 [36:23<315:05:52,  1.76it/s]

buffer size = 8048, epsilon = 0.09809
mean_reward :  0.0


  0%|          | 3825/2000001 [36:23<356:58:32,  1.55it/s]

buffer size = 8050, epsilon = 0.09809
mean_reward :  0.0


  0%|          | 3826/2000001 [36:24<388:37:52,  1.43it/s]

buffer size = 8052, epsilon = 0.09809
mean_reward :  0.0


  0%|          | 3827/2000001 [36:25<408:35:25,  1.36it/s]

buffer size = 8054, epsilon = 0.09809
mean_reward :  0.0


  0%|          | 3828/2000001 [36:26<374:00:21,  1.48it/s]

buffer size = 8056, epsilon = 0.09809
mean_reward :  0.0


  0%|          | 3829/2000001 [36:26<351:45:17,  1.58it/s]

buffer size = 8058, epsilon = 0.09809
mean_reward :  0.0


  0%|          | 3830/2000001 [36:27<338:38:02,  1.64it/s]

buffer size = 8060, epsilon = 0.09809
mean_reward :  0.0


  0%|          | 3831/2000001 [36:27<326:55:21,  1.70it/s]

buffer size = 8062, epsilon = 0.09809
mean_reward :  0.0


  0%|          | 3832/2000001 [36:28<320:24:39,  1.73it/s]

buffer size = 8064, epsilon = 0.09808
mean_reward :  0.0


  0%|          | 3833/2000001 [36:28<312:25:38,  1.77it/s]

buffer size = 8066, epsilon = 0.09808
mean_reward :  0.0


  0%|          | 3834/2000001 [36:29<310:04:39,  1.79it/s]

buffer size = 8068, epsilon = 0.09808
mean_reward :  0.0


  0%|          | 3835/2000001 [36:29<306:42:08,  1.81it/s]

buffer size = 8070, epsilon = 0.09808
mean_reward :  0.0


  0%|          | 3836/2000001 [36:30<307:10:31,  1.81it/s]

buffer size = 8072, epsilon = 0.09808
mean_reward :  0.0


  0%|          | 3837/2000001 [36:30<302:24:17,  1.83it/s]

buffer size = 8074, epsilon = 0.09808
mean_reward :  0.0


  0%|          | 3838/2000001 [36:31<304:58:56,  1.82it/s]

buffer size = 8076, epsilon = 0.09808
mean_reward :  0.0


  0%|          | 3839/2000001 [36:32<302:58:19,  1.83it/s]

buffer size = 8078, epsilon = 0.09808
mean_reward :  0.0


  0%|          | 3840/2000001 [36:32<306:47:26,  1.81it/s]

buffer size = 8080, epsilon = 0.09808
mean_reward :  0.0


  0%|          | 3841/2000001 [36:33<302:19:08,  1.83it/s]

buffer size = 8082, epsilon = 0.09808
mean_reward :  0.0


  0%|          | 3842/2000001 [36:33<301:04:35,  1.84it/s]

buffer size = 8084, epsilon = 0.09808
mean_reward :  0.0


  0%|          | 3843/2000001 [36:34<299:12:20,  1.85it/s]

buffer size = 8086, epsilon = 0.09808
mean_reward :  0.0


  0%|          | 3844/2000001 [36:34<301:15:53,  1.84it/s]

buffer size = 8088, epsilon = 0.09808
mean_reward :  0.0


  0%|          | 3845/2000001 [36:35<300:17:58,  1.85it/s]

buffer size = 8090, epsilon = 0.09808
mean_reward :  0.0


  0%|          | 3846/2000001 [36:36<340:42:02,  1.63it/s]

buffer size = 8092, epsilon = 0.09808
mean_reward :  0.0


  0%|          | 3847/2000001 [36:36<379:01:39,  1.46it/s]

buffer size = 8094, epsilon = 0.09808
mean_reward :  0.0


  0%|          | 3848/2000001 [36:37<410:23:14,  1.35it/s]

buffer size = 8096, epsilon = 0.09808
mean_reward :  0.0


  0%|          | 3849/2000001 [36:38<387:17:46,  1.43it/s]

buffer size = 8098, epsilon = 0.09808
mean_reward :  0.0


  0%|          | 3850/2000001 [36:38<362:50:22,  1.53it/s]

buffer size = 8100, epsilon = 0.09808
mean_reward :  0.0


  0%|          | 3851/2000001 [36:39<344:30:22,  1.61it/s]

buffer size = 8102, epsilon = 0.09808
mean_reward :  0.0


  0%|          | 3852/2000001 [36:40<331:32:23,  1.67it/s]

buffer size = 8104, epsilon = 0.09807
mean_reward :  0.0


  0%|          | 3853/2000001 [36:40<322:08:29,  1.72it/s]

buffer size = 8106, epsilon = 0.09807
mean_reward :  0.0


  0%|          | 3854/2000001 [36:41<317:19:41,  1.75it/s]

buffer size = 8108, epsilon = 0.09807
mean_reward :  0.0


  0%|          | 3855/2000001 [36:41<322:56:57,  1.72it/s]

buffer size = 8110, epsilon = 0.09807
mean_reward :  0.0


  0%|          | 3856/2000001 [36:42<318:54:55,  1.74it/s]

buffer size = 8112, epsilon = 0.09807
mean_reward :  0.0


  0%|          | 3857/2000001 [36:42<313:51:42,  1.77it/s]

buffer size = 8114, epsilon = 0.09807
mean_reward :  0.0


  0%|          | 3858/2000001 [36:43<310:21:02,  1.79it/s]

buffer size = 8116, epsilon = 0.09807
mean_reward :  0.0


  0%|          | 3859/2000001 [36:43<306:58:40,  1.81it/s]

buffer size = 8118, epsilon = 0.09807
mean_reward :  0.0


  0%|          | 3860/2000001 [36:44<303:43:21,  1.83it/s]

buffer size = 8120, epsilon = 0.09807
mean_reward :  0.0


  0%|          | 3861/2000001 [36:45<304:26:25,  1.82it/s]

buffer size = 8122, epsilon = 0.09807
mean_reward :  0.0


  0%|          | 3862/2000001 [36:45<301:42:45,  1.84it/s]

buffer size = 8124, epsilon = 0.09807
mean_reward :  0.0


  0%|          | 3863/2000001 [36:46<304:51:23,  1.82it/s]

buffer size = 8126, epsilon = 0.09807
mean_reward :  0.0


  0%|          | 3864/2000001 [36:46<301:13:11,  1.84it/s]

buffer size = 8128, epsilon = 0.09807
mean_reward :  0.0


  0%|          | 3865/2000001 [36:47<302:43:23,  1.83it/s]

buffer size = 8130, epsilon = 0.09807
mean_reward :  0.0


  0%|          | 3866/2000001 [36:47<301:17:49,  1.84it/s]

buffer size = 8132, epsilon = 0.09807
mean_reward :  0.0


  0%|          | 3867/2000001 [36:48<336:54:19,  1.65it/s]

buffer size = 8134, epsilon = 0.09807
mean_reward :  0.0


  0%|          | 3868/2000001 [36:49<368:39:19,  1.50it/s]

buffer size = 8136, epsilon = 0.09807
mean_reward :  0.0


  0%|          | 3869/2000001 [36:50<390:07:37,  1.42it/s]

buffer size = 8138, epsilon = 0.09807
mean_reward :  0.0


  0%|          | 3870/2000001 [36:50<397:39:05,  1.39it/s]

buffer size = 8140, epsilon = 0.09807
mean_reward :  0.0


  0%|          | 3871/2000001 [36:51<369:20:03,  1.50it/s]

buffer size = 8142, epsilon = 0.09806
mean_reward :  0.0


  0%|          | 3872/2000001 [36:51<348:22:04,  1.59it/s]

buffer size = 8144, epsilon = 0.09806
mean_reward :  0.0


  0%|          | 3873/2000001 [36:52<338:10:02,  1.64it/s]

buffer size = 8146, epsilon = 0.09806
mean_reward :  0.0


  0%|          | 3874/2000001 [36:53<324:50:51,  1.71it/s]

buffer size = 8148, epsilon = 0.09806
mean_reward :  0.0


  0%|          | 3875/2000001 [36:53<319:23:33,  1.74it/s]

buffer size = 8150, epsilon = 0.09806
mean_reward :  0.0


  0%|          | 3876/2000001 [36:54<312:24:57,  1.77it/s]

buffer size = 8152, epsilon = 0.09806
mean_reward :  0.0


  0%|          | 3877/2000001 [36:54<310:55:44,  1.78it/s]

buffer size = 8154, epsilon = 0.09806
mean_reward :  0.0


  0%|          | 3878/2000001 [36:55<309:31:15,  1.79it/s]

buffer size = 8156, epsilon = 0.09806
mean_reward :  0.0


  0%|          | 3879/2000001 [36:55<306:52:21,  1.81it/s]

buffer size = 8158, epsilon = 0.09806
mean_reward :  0.0


  0%|          | 3880/2000001 [36:56<305:26:08,  1.82it/s]

buffer size = 8160, epsilon = 0.09806
mean_reward :  0.0


  0%|          | 3881/2000001 [36:56<302:53:24,  1.83it/s]

buffer size = 8162, epsilon = 0.09806
mean_reward :  0.0


  0%|          | 3882/2000001 [36:57<301:48:01,  1.84it/s]

buffer size = 8164, epsilon = 0.09806
mean_reward :  0.0


  0%|          | 3883/2000001 [36:57<303:49:33,  1.82it/s]

buffer size = 8166, epsilon = 0.09806
mean_reward :  0.0


  0%|          | 3884/2000001 [36:58<302:53:39,  1.83it/s]

buffer size = 8168, epsilon = 0.09806
mean_reward :  0.0


  0%|          | 3885/2000001 [36:59<307:10:32,  1.81it/s]

buffer size = 8170, epsilon = 0.09806
mean_reward :  0.0


  0%|          | 3886/2000001 [36:59<307:43:44,  1.80it/s]

buffer size = 8172, epsilon = 0.09806
mean_reward :  0.0


  0%|          | 3887/2000001 [37:00<304:58:12,  1.82it/s]

buffer size = 8174, epsilon = 0.09806
mean_reward :  0.0


  0%|          | 3888/2000001 [37:00<306:08:42,  1.81it/s]

buffer size = 8176, epsilon = 0.09806
mean_reward :  0.0


  0%|          | 3889/2000001 [37:01<343:08:36,  1.62it/s]

buffer size = 8178, epsilon = 0.09806
mean_reward :  0.0


  0%|          | 3890/2000001 [37:02<373:23:58,  1.48it/s]

buffer size = 8180, epsilon = 0.09806
mean_reward :  0.0


  0%|          | 3891/2000001 [37:03<395:58:34,  1.40it/s]

buffer size = 8182, epsilon = 0.09806
mean_reward :  0.0


  0%|          | 3892/2000001 [37:03<398:07:15,  1.39it/s]

buffer size = 8184, epsilon = 0.09805
mean_reward :  0.0


  0%|          | 3893/2000001 [37:04<369:02:39,  1.50it/s]

buffer size = 8186, epsilon = 0.09805
mean_reward :  0.0


  0%|          | 3894/2000001 [37:04<352:45:34,  1.57it/s]

buffer size = 8188, epsilon = 0.09805
mean_reward :  0.0


  0%|          | 3895/2000001 [37:05<338:00:43,  1.64it/s]

buffer size = 8190, epsilon = 0.09805
mean_reward :  0.0


  0%|          | 3896/2000001 [37:06<328:08:53,  1.69it/s]

buffer size = 8192, epsilon = 0.09805
mean_reward :  0.0


  0%|          | 3897/2000001 [37:06<319:54:46,  1.73it/s]

buffer size = 8194, epsilon = 0.09805
mean_reward :  0.0


  0%|          | 3898/2000001 [37:07<319:02:31,  1.74it/s]

buffer size = 8196, epsilon = 0.09805
mean_reward :  0.0


  0%|          | 3899/2000001 [37:07<313:35:31,  1.77it/s]

buffer size = 8198, epsilon = 0.09805
mean_reward :  0.0


  0%|          | 3900/2000001 [37:08<310:19:49,  1.79it/s]

buffer size = 8200, epsilon = 0.09805
mean_reward :  0.0


  0%|          | 3901/2000001 [37:08<307:08:30,  1.81it/s]

buffer size = 8202, epsilon = 0.09805
mean_reward :  0.0


  0%|          | 3902/2000001 [37:09<303:49:53,  1.82it/s]

buffer size = 8204, epsilon = 0.09805
mean_reward :  0.0


  0%|          | 3903/2000001 [37:09<301:05:23,  1.84it/s]

buffer size = 8206, epsilon = 0.09805
mean_reward :  0.0


  0%|          | 3904/2000001 [37:10<302:54:20,  1.83it/s]

buffer size = 8208, epsilon = 0.09805
mean_reward :  0.0


  0%|          | 3905/2000001 [37:10<299:35:13,  1.85it/s]

buffer size = 8210, epsilon = 0.09805
mean_reward :  0.0


  0%|          | 3906/2000001 [37:11<303:26:23,  1.83it/s]

buffer size = 8212, epsilon = 0.09805
mean_reward :  0.0


  0%|          | 3907/2000001 [37:11<301:10:30,  1.84it/s]

buffer size = 8214, epsilon = 0.09805
mean_reward :  0.0


  0%|          | 3908/2000001 [37:12<302:44:25,  1.83it/s]

buffer size = 8216, epsilon = 0.09805
mean_reward :  0.0


  0%|          | 3909/2000001 [37:13<299:44:42,  1.85it/s]

buffer size = 8218, epsilon = 0.09805
mean_reward :  0.0


  0%|          | 3910/2000001 [37:13<303:43:18,  1.83it/s]

buffer size = 8220, epsilon = 0.09805
mean_reward :  0.0


  0%|          | 3911/2000001 [37:14<353:19:52,  1.57it/s]

buffer size = 8222, epsilon = 0.09805
mean_reward :  0.0


  0%|          | 3912/2000001 [37:15<385:21:18,  1.44it/s]

buffer size = 8224, epsilon = 0.09804
mean_reward :  0.0


  0%|          | 3913/2000001 [37:16<416:31:54,  1.33it/s]

buffer size = 8226, epsilon = 0.09804
mean_reward :  0.0


  0%|          | 3914/2000001 [37:16<382:46:54,  1.45it/s]

buffer size = 8228, epsilon = 0.09804
mean_reward :  0.0


  0%|          | 3915/2000001 [37:17<355:26:48,  1.56it/s]

buffer size = 8230, epsilon = 0.09804
mean_reward :  0.0


  0%|          | 3916/2000001 [37:17<341:15:26,  1.62it/s]

buffer size = 8232, epsilon = 0.09804
mean_reward :  0.0


  0%|          | 3917/2000001 [37:18<325:57:42,  1.70it/s]

buffer size = 8234, epsilon = 0.09804
mean_reward :  0.0


  0%|          | 3918/2000001 [37:18<319:06:28,  1.74it/s]

buffer size = 8236, epsilon = 0.09804
mean_reward :  0.0


  0%|          | 3919/2000001 [37:19<313:04:04,  1.77it/s]

buffer size = 8238, epsilon = 0.09804
mean_reward :  0.0


  0%|          | 3920/2000001 [37:19<311:52:09,  1.78it/s]

buffer size = 8240, epsilon = 0.09804
mean_reward :  0.0


  0%|          | 3921/2000001 [37:20<305:46:41,  1.81it/s]

buffer size = 8242, epsilon = 0.09804
mean_reward :  0.0


  0%|          | 3922/2000001 [37:21<308:10:02,  1.80it/s]

buffer size = 8244, epsilon = 0.09804
mean_reward :  0.0


  0%|          | 3923/2000001 [37:21<306:32:21,  1.81it/s]

buffer size = 8246, epsilon = 0.09804
mean_reward :  0.0


  0%|          | 3924/2000001 [37:22<304:48:34,  1.82it/s]

buffer size = 8248, epsilon = 0.09804
mean_reward :  0.0


  0%|          | 3925/2000001 [37:22<301:50:40,  1.84it/s]

buffer size = 8250, epsilon = 0.09804
mean_reward :  0.0


  0%|          | 3926/2000001 [37:23<300:32:51,  1.84it/s]

buffer size = 8252, epsilon = 0.09804
mean_reward :  0.0


  0%|          | 3927/2000001 [37:23<298:45:43,  1.86it/s]

buffer size = 8254, epsilon = 0.09804
mean_reward :  0.0


  0%|          | 3928/2000001 [37:24<297:21:02,  1.86it/s]

buffer size = 8256, epsilon = 0.09804
mean_reward :  0.0


  0%|          | 3929/2000001 [37:24<300:06:53,  1.85it/s]

buffer size = 8258, epsilon = 0.09804
mean_reward :  0.0


  0%|          | 3930/2000001 [37:25<300:58:55,  1.84it/s]

buffer size = 8260, epsilon = 0.09804
mean_reward :  0.0


  0%|          | 3931/2000001 [37:25<302:10:51,  1.83it/s]

buffer size = 8262, epsilon = 0.09804
mean_reward :  0.0


  0%|          | 3932/2000001 [37:26<336:11:51,  1.65it/s]

buffer size = 8264, epsilon = 0.09803
mean_reward :  0.0


  0%|          | 3933/2000001 [37:27<364:22:38,  1.52it/s]

buffer size = 8266, epsilon = 0.09803
mean_reward :  0.0


  0%|          | 3934/2000001 [37:28<396:36:56,  1.40it/s]

buffer size = 8268, epsilon = 0.09803
mean_reward :  0.0


  0%|          | 3935/2000001 [37:29<393:40:03,  1.41it/s]

buffer size = 8270, epsilon = 0.09803
mean_reward :  0.0


  0%|          | 3936/2000001 [37:29<364:00:23,  1.52it/s]

buffer size = 8272, epsilon = 0.09803
mean_reward :  0.0


  0%|          | 3937/2000001 [37:30<346:37:15,  1.60it/s]

buffer size = 8274, epsilon = 0.09803
mean_reward :  0.0


  0%|          | 3938/2000001 [37:30<330:50:47,  1.68it/s]

buffer size = 8276, epsilon = 0.09803
mean_reward :  0.0


  0%|          | 3939/2000001 [37:31<322:12:39,  1.72it/s]

buffer size = 8278, epsilon = 0.09803
mean_reward :  0.0


  0%|          | 3940/2000001 [37:31<314:12:49,  1.76it/s]

buffer size = 8280, epsilon = 0.09803
mean_reward :  0.0


  0%|          | 3941/2000001 [37:32<312:00:37,  1.78it/s]

buffer size = 8282, epsilon = 0.09803
mean_reward :  0.0


  0%|          | 3942/2000001 [37:32<309:38:22,  1.79it/s]

buffer size = 8284, epsilon = 0.09803
mean_reward :  0.0


  0%|          | 3943/2000001 [37:33<309:03:07,  1.79it/s]

buffer size = 8286, epsilon = 0.09803
mean_reward :  0.0


  0%|          | 3944/2000001 [37:33<305:32:28,  1.81it/s]

buffer size = 8288, epsilon = 0.09803
mean_reward :  0.0


  0%|          | 3945/2000001 [37:34<303:46:28,  1.83it/s]

buffer size = 8290, epsilon = 0.09803
mean_reward :  0.0


  0%|          | 3946/2000001 [37:35<303:58:08,  1.82it/s]

buffer size = 8292, epsilon = 0.09803
mean_reward :  0.0


  0%|          | 3947/2000001 [37:35<304:58:53,  1.82it/s]

buffer size = 8294, epsilon = 0.09803
mean_reward :  0.0


  0%|          | 3948/2000001 [37:36<301:39:56,  1.84it/s]

buffer size = 8296, epsilon = 0.09803
mean_reward :  0.0


  0%|          | 3949/2000001 [37:36<304:13:57,  1.82it/s]

buffer size = 8298, epsilon = 0.09803
mean_reward :  0.0


  0%|          | 3950/2000001 [37:37<302:16:22,  1.83it/s]

buffer size = 8300, epsilon = 0.09803
mean_reward :  0.0


  0%|          | 3951/2000001 [37:37<302:45:20,  1.83it/s]

buffer size = 8302, epsilon = 0.09803
mean_reward :  0.0


  0%|          | 3952/2000001 [37:38<298:57:10,  1.85it/s]

buffer size = 8304, epsilon = 0.09802
mean_reward :  0.0


  0%|          | 3953/2000001 [37:38<304:02:07,  1.82it/s]

buffer size = 8306, epsilon = 0.09802
mean_reward :  0.0


  0%|          | 3954/2000001 [37:39<352:37:16,  1.57it/s]

buffer size = 8308, epsilon = 0.09802
mean_reward :  0.0


  0%|          | 3955/2000001 [37:40<384:21:55,  1.44it/s]

buffer size = 8310, epsilon = 0.09802
mean_reward :  0.0


  0%|          | 3956/2000001 [37:41<408:36:06,  1.36it/s]

buffer size = 8312, epsilon = 0.09802
mean_reward :  0.0


  0%|          | 3957/2000001 [37:41<381:58:28,  1.45it/s]

buffer size = 8314, epsilon = 0.09802
mean_reward :  0.0


  0%|          | 3958/2000001 [37:42<358:18:30,  1.55it/s]

buffer size = 8316, epsilon = 0.09802
mean_reward :  0.0


  0%|          | 3959/2000001 [37:43<341:23:07,  1.62it/s]

buffer size = 8318, epsilon = 0.09802
mean_reward :  0.0


  0%|          | 3960/2000001 [37:43<329:30:36,  1.68it/s]

buffer size = 8320, epsilon = 0.09802
mean_reward :  0.0


  0%|          | 3961/2000001 [37:44<323:24:07,  1.71it/s]

buffer size = 8322, epsilon = 0.09802
mean_reward :  0.0


  0%|          | 3962/2000001 [37:44<316:24:20,  1.75it/s]

buffer size = 8324, epsilon = 0.09802
mean_reward :  0.0


  0%|          | 3963/2000001 [37:45<312:31:49,  1.77it/s]

buffer size = 8326, epsilon = 0.09802
mean_reward :  0.0


  0%|          | 3964/2000001 [37:45<309:33:27,  1.79it/s]

buffer size = 8328, epsilon = 0.09802
mean_reward :  0.0


  0%|          | 3965/2000001 [37:46<309:41:21,  1.79it/s]

buffer size = 8330, epsilon = 0.09802
mean_reward :  0.0


  0%|          | 3966/2000001 [37:46<305:16:45,  1.82it/s]

buffer size = 8332, epsilon = 0.09802
mean_reward :  0.0


  0%|          | 3967/2000001 [37:47<304:11:20,  1.82it/s]

buffer size = 8334, epsilon = 0.09802
mean_reward :  0.0


  0%|          | 3968/2000001 [37:47<301:53:36,  1.84it/s]

buffer size = 8336, epsilon = 0.09802
mean_reward :  0.0


  0%|          | 3969/2000001 [37:48<301:25:29,  1.84it/s]

buffer size = 8338, epsilon = 0.09802
mean_reward :  0.0


  0%|          | 3970/2000001 [37:48<301:47:12,  1.84it/s]

buffer size = 8340, epsilon = 0.09802
mean_reward :  0.0


  0%|          | 3971/2000001 [37:49<300:57:06,  1.84it/s]

buffer size = 8342, epsilon = 0.09802
mean_reward :  0.0


  0%|          | 3972/2000001 [37:50<304:24:15,  1.82it/s]

buffer size = 8344, epsilon = 0.09801
mean_reward :  0.0


  0%|          | 3973/2000001 [37:50<302:26:24,  1.83it/s]

buffer size = 8346, epsilon = 0.09801
mean_reward :  0.0


  0%|          | 3974/2000001 [37:51<306:05:14,  1.81it/s]

buffer size = 8348, epsilon = 0.09801
mean_reward :  0.0


  0%|          | 3975/2000001 [37:51<340:20:16,  1.63it/s]

buffer size = 8350, epsilon = 0.09801
mean_reward :  0.0


  0%|          | 3976/2000001 [37:52<372:14:03,  1.49it/s]

buffer size = 8352, epsilon = 0.09801
mean_reward :  0.0


  0%|          | 3977/2000001 [37:53<388:59:47,  1.43it/s]

buffer size = 8354, epsilon = 0.09801
mean_reward :  0.0


  0%|          | 3978/2000001 [37:54<399:53:48,  1.39it/s]

buffer size = 8356, epsilon = 0.09801
mean_reward :  0.0


  0%|          | 3979/2000001 [37:54<370:05:29,  1.50it/s]

buffer size = 8358, epsilon = 0.09801
mean_reward :  0.0


  0%|          | 3980/2000001 [37:55<353:04:34,  1.57it/s]

buffer size = 8360, epsilon = 0.09801
mean_reward :  0.0


  0%|          | 3981/2000001 [37:55<335:48:52,  1.65it/s]

buffer size = 8362, epsilon = 0.09801
mean_reward :  0.0


  0%|          | 3982/2000001 [37:56<326:21:47,  1.70it/s]

buffer size = 8364, epsilon = 0.09801
mean_reward :  0.0


  0%|          | 3983/2000001 [37:57<317:30:13,  1.75it/s]

buffer size = 8366, epsilon = 0.09801
mean_reward :  0.0


  0%|          | 3984/2000001 [37:57<315:04:26,  1.76it/s]

buffer size = 8368, epsilon = 0.09801
mean_reward :  0.0


  0%|          | 3985/2000001 [37:58<311:15:34,  1.78it/s]

buffer size = 8370, epsilon = 0.09801
mean_reward :  0.0


  0%|          | 3986/2000001 [37:58<310:23:24,  1.79it/s]

buffer size = 8372, epsilon = 0.09801
mean_reward :  0.0


  0%|          | 3987/2000001 [37:59<309:09:34,  1.79it/s]

buffer size = 8374, epsilon = 0.09801
mean_reward :  0.0


  0%|          | 3988/2000001 [37:59<309:57:21,  1.79it/s]

buffer size = 8376, epsilon = 0.09801
mean_reward :  0.0


  0%|          | 3989/2000001 [38:00<304:21:02,  1.82it/s]

buffer size = 8378, epsilon = 0.09801
mean_reward :  0.0


  0%|          | 3990/2000001 [38:00<305:54:48,  1.81it/s]

buffer size = 8380, epsilon = 0.09801
mean_reward :  0.0


  0%|          | 3991/2000001 [38:01<304:06:53,  1.82it/s]

buffer size = 8382, epsilon = 0.09801
mean_reward :  0.0


  0%|          | 3992/2000001 [38:01<304:09:35,  1.82it/s]

buffer size = 8384, epsilon = 0.09800
mean_reward :  0.0


  0%|          | 3993/2000001 [38:02<305:01:46,  1.82it/s]

buffer size = 8386, epsilon = 0.09800
mean_reward :  0.0


  0%|          | 3994/2000001 [38:03<306:14:52,  1.81it/s]

buffer size = 8388, epsilon = 0.09800
mean_reward :  0.0


  0%|          | 3995/2000001 [38:03<306:30:52,  1.81it/s]

buffer size = 8390, epsilon = 0.09800
mean_reward :  0.0


  0%|          | 3996/2000001 [38:04<306:38:07,  1.81it/s]

buffer size = 8392, epsilon = 0.09800
mean_reward :  0.0


  0%|          | 3997/2000001 [38:05<351:51:39,  1.58it/s]

buffer size = 8394, epsilon = 0.09800
mean_reward :  0.0


  0%|          | 3998/2000001 [38:05<393:35:20,  1.41it/s]

buffer size = 8396, epsilon = 0.09800
mean_reward :  0.0


  0%|          | 3999/2000001 [38:06<414:25:03,  1.34it/s]

buffer size = 8398, epsilon = 0.09800
mean_reward :  0.0


  0%|          | 4000/2000001 [38:07<388:47:08,  1.43it/s]

buffer size = 8400, epsilon = 0.09800
mean_reward :  0.0


  0%|          | 4001/2000001 [38:07<365:42:54,  1.52it/s]

buffer size = 8402, epsilon = 0.09800
mean_reward :  0.0


  0%|          | 4002/2000001 [38:08<347:34:34,  1.60it/s]

buffer size = 8404, epsilon = 0.09800
mean_reward :  0.0


  0%|          | 4003/2000001 [38:09<336:57:52,  1.65it/s]

buffer size = 8406, epsilon = 0.09800
mean_reward :  0.0


  0%|          | 4004/2000001 [38:09<329:14:43,  1.68it/s]

buffer size = 8408, epsilon = 0.09800
mean_reward :  0.0


  0%|          | 4005/2000001 [38:10<322:13:31,  1.72it/s]

buffer size = 8410, epsilon = 0.09800
mean_reward :  0.0


  0%|          | 4006/2000001 [38:10<316:14:01,  1.75it/s]

buffer size = 8412, epsilon = 0.09800
mean_reward :  0.0


  0%|          | 4007/2000001 [38:11<313:33:55,  1.77it/s]

buffer size = 8414, epsilon = 0.09800
mean_reward :  0.0


  0%|          | 4008/2000001 [38:11<307:27:53,  1.80it/s]

buffer size = 8416, epsilon = 0.09800
mean_reward :  0.0


  0%|          | 4009/2000001 [38:12<305:41:13,  1.81it/s]

buffer size = 8418, epsilon = 0.09800
mean_reward :  0.0


  0%|          | 4010/2000001 [38:12<305:23:23,  1.82it/s]

buffer size = 8420, epsilon = 0.09800
mean_reward :  0.0


  0%|          | 4011/2000001 [38:13<304:35:19,  1.82it/s]

buffer size = 8422, epsilon = 0.09799
mean_reward :  0.0


  0%|          | 4012/2000001 [38:13<304:10:06,  1.82it/s]

buffer size = 8424, epsilon = 0.09799
mean_reward :  0.0


  0%|          | 4013/2000001 [38:14<306:22:46,  1.81it/s]

buffer size = 8426, epsilon = 0.09799
mean_reward :  0.0


  0%|          | 4014/2000001 [38:15<306:18:05,  1.81it/s]

buffer size = 8428, epsilon = 0.09799
mean_reward :  0.0


  0%|          | 4015/2000001 [38:15<307:43:51,  1.80it/s]

buffer size = 8430, epsilon = 0.09799
mean_reward :  0.0


  0%|          | 4016/2000001 [38:16<304:55:41,  1.82it/s]

buffer size = 8432, epsilon = 0.09799
mean_reward :  0.0


  0%|          | 4017/2000001 [38:16<306:13:53,  1.81it/s]

buffer size = 8434, epsilon = 0.09799
mean_reward :  0.0


  0%|          | 4018/2000001 [38:18<431:15:55,  1.29it/s]

buffer size = 8436, epsilon = 0.09799
mean_reward :  0.0


  0%|          | 4019/2000001 [38:18<437:41:08,  1.27it/s]

buffer size = 8438, epsilon = 0.09799
mean_reward :  0.0


  0%|          | 4020/2000001 [38:19<425:54:18,  1.30it/s]

buffer size = 8440, epsilon = 0.09799
mean_reward :  0.0


  0%|          | 4021/2000001 [38:20<391:16:13,  1.42it/s]

buffer size = 8442, epsilon = 0.09799
mean_reward :  0.0


  0%|          | 4022/2000001 [38:20<366:17:41,  1.51it/s]

buffer size = 8444, epsilon = 0.09799
mean_reward :  0.0


  0%|          | 4023/2000001 [38:21<348:50:23,  1.59it/s]

buffer size = 8446, epsilon = 0.09799
mean_reward :  0.0


  0%|          | 4024/2000001 [38:21<337:05:02,  1.64it/s]

buffer size = 8448, epsilon = 0.09799
mean_reward :  0.0


  0%|          | 4025/2000001 [38:22<325:40:09,  1.70it/s]

buffer size = 8450, epsilon = 0.09799
mean_reward :  0.0


  0%|          | 4026/2000001 [38:22<319:05:24,  1.74it/s]

buffer size = 8452, epsilon = 0.09799
mean_reward :  0.0


  0%|          | 4027/2000001 [38:23<312:29:23,  1.77it/s]

buffer size = 8454, epsilon = 0.09799
mean_reward :  0.0


  0%|          | 4028/2000001 [38:23<310:20:20,  1.79it/s]

buffer size = 8456, epsilon = 0.09799
mean_reward :  0.0


  0%|          | 4029/2000001 [38:24<307:56:27,  1.80it/s]

buffer size = 8458, epsilon = 0.09799
mean_reward :  0.0


  0%|          | 4030/2000001 [38:25<309:14:53,  1.79it/s]

buffer size = 8460, epsilon = 0.09799
mean_reward :  0.0


  0%|          | 4031/2000001 [38:25<307:25:52,  1.80it/s]

buffer size = 8462, epsilon = 0.09799
mean_reward :  0.0


  0%|          | 4032/2000001 [38:26<308:37:35,  1.80it/s]

buffer size = 8464, epsilon = 0.09798
mean_reward :  0.0


  0%|          | 4033/2000001 [38:26<305:43:34,  1.81it/s]

buffer size = 8466, epsilon = 0.09798
mean_reward :  0.0


  0%|          | 4034/2000001 [38:27<305:41:24,  1.81it/s]

buffer size = 8468, epsilon = 0.09798
mean_reward :  0.0


  0%|          | 4035/2000001 [38:27<305:09:55,  1.82it/s]

buffer size = 8470, epsilon = 0.09798
mean_reward :  0.0


  0%|          | 4036/2000001 [38:28<306:52:56,  1.81it/s]

buffer size = 8472, epsilon = 0.09798
mean_reward :  0.0


  0%|          | 4037/2000001 [38:28<306:56:31,  1.81it/s]

buffer size = 8474, epsilon = 0.09798
mean_reward :  0.0


  0%|          | 4038/2000001 [38:29<309:19:31,  1.79it/s]

buffer size = 8476, epsilon = 0.09798
mean_reward :  0.0


  0%|          | 4039/2000001 [38:30<353:19:28,  1.57it/s]

buffer size = 8478, epsilon = 0.09798
mean_reward :  0.0


  0%|          | 4040/2000001 [38:31<377:09:17,  1.47it/s]

buffer size = 8480, epsilon = 0.09798
mean_reward :  0.0


  0%|          | 4041/2000001 [38:31<399:50:30,  1.39it/s]

buffer size = 8482, epsilon = 0.09798
mean_reward :  0.0


  0%|          | 4042/2000001 [38:32<393:56:08,  1.41it/s]

buffer size = 8484, epsilon = 0.09798
mean_reward :  0.0


  0%|          | 4043/2000001 [38:33<363:06:17,  1.53it/s]

buffer size = 8486, epsilon = 0.09798
mean_reward :  0.0


  0%|          | 4044/2000001 [38:33<342:57:35,  1.62it/s]

buffer size = 8488, epsilon = 0.09798
mean_reward :  0.0


  0%|          | 4045/2000001 [38:34<327:51:54,  1.69it/s]

buffer size = 8490, epsilon = 0.09798
mean_reward :  0.0


  0%|          | 4046/2000001 [38:34<320:26:18,  1.73it/s]

buffer size = 8492, epsilon = 0.09798
mean_reward :  0.0


  0%|          | 4047/2000001 [38:35<318:26:35,  1.74it/s]

buffer size = 8494, epsilon = 0.09798
mean_reward :  0.0


  0%|          | 4048/2000001 [38:35<310:47:51,  1.78it/s]

buffer size = 8496, epsilon = 0.09798
mean_reward :  0.0


  0%|          | 4049/2000001 [38:36<311:59:44,  1.78it/s]

buffer size = 8498, epsilon = 0.09798
mean_reward :  0.0


  0%|          | 4050/2000001 [38:36<309:54:03,  1.79it/s]

buffer size = 8500, epsilon = 0.09798
mean_reward :  0.0


  0%|          | 4051/2000001 [38:37<309:17:00,  1.79it/s]

buffer size = 8502, epsilon = 0.09798
mean_reward :  0.0


  0%|          | 4052/2000001 [38:38<307:09:30,  1.81it/s]

buffer size = 8504, epsilon = 0.09797
mean_reward :  0.0


  0%|          | 4053/2000001 [38:38<307:15:21,  1.80it/s]

buffer size = 8506, epsilon = 0.09797
mean_reward :  0.0


  0%|          | 4054/2000001 [38:39<304:21:24,  1.82it/s]

buffer size = 8508, epsilon = 0.09797
mean_reward :  0.0


  0%|          | 4055/2000001 [38:39<304:08:16,  1.82it/s]

buffer size = 8510, epsilon = 0.09797
mean_reward :  0.0


  0%|          | 4056/2000001 [38:40<301:41:47,  1.84it/s]

buffer size = 8512, epsilon = 0.09797
mean_reward :  0.0


  0%|          | 4057/2000001 [38:40<303:10:10,  1.83it/s]

buffer size = 8514, epsilon = 0.09797
mean_reward :  0.0


  0%|          | 4058/2000001 [38:41<303:36:22,  1.83it/s]

buffer size = 8516, epsilon = 0.09797
mean_reward :  0.0


  0%|          | 4059/2000001 [38:41<305:01:44,  1.82it/s]

buffer size = 8518, epsilon = 0.09797
mean_reward :  0.0


  0%|          | 4060/2000001 [38:42<304:36:58,  1.82it/s]

buffer size = 8520, epsilon = 0.09797
mean_reward :  0.0


  0%|          | 4061/2000001 [38:43<348:37:41,  1.59it/s]

buffer size = 8522, epsilon = 0.09797
mean_reward :  0.0


  0%|          | 4062/2000001 [38:44<386:31:40,  1.43it/s]

buffer size = 8524, epsilon = 0.09797
mean_reward :  0.0


  0%|          | 4063/2000001 [38:44<416:09:13,  1.33it/s]

buffer size = 8526, epsilon = 0.09797
mean_reward :  0.0


  0%|          | 4064/2000001 [38:45<386:05:40,  1.44it/s]

buffer size = 8528, epsilon = 0.09797
mean_reward :  0.0


  0%|          | 4065/2000001 [38:46<361:35:17,  1.53it/s]

buffer size = 8530, epsilon = 0.09797
mean_reward :  0.0


  0%|          | 4066/2000001 [38:46<343:42:22,  1.61it/s]

buffer size = 8532, epsilon = 0.09797
mean_reward :  0.0


  0%|          | 4067/2000001 [38:47<332:57:37,  1.67it/s]

buffer size = 8534, epsilon = 0.09797
mean_reward :  0.0


  0%|          | 4068/2000001 [38:47<322:11:02,  1.72it/s]

buffer size = 8536, epsilon = 0.09797
mean_reward :  0.0


  0%|          | 4069/2000001 [38:48<317:32:09,  1.75it/s]

buffer size = 8538, epsilon = 0.09797
mean_reward :  0.0


  0%|          | 4070/2000001 [38:48<313:29:14,  1.77it/s]

buffer size = 8540, epsilon = 0.09797
mean_reward :  0.0


  0%|          | 4071/2000001 [38:49<313:39:20,  1.77it/s]

buffer size = 8542, epsilon = 0.09797
mean_reward :  0.0


  0%|          | 4072/2000001 [38:49<311:54:03,  1.78it/s]

buffer size = 8544, epsilon = 0.09796
mean_reward :  0.0


  0%|          | 4073/2000001 [38:50<309:57:50,  1.79it/s]

buffer size = 8546, epsilon = 0.09796
mean_reward :  0.0


  0%|          | 4074/2000001 [38:51<308:00:15,  1.80it/s]

buffer size = 8548, epsilon = 0.09796
mean_reward :  0.0


  0%|          | 4075/2000001 [38:51<309:59:21,  1.79it/s]

buffer size = 8550, epsilon = 0.09796
mean_reward :  0.0


  0%|          | 4076/2000001 [38:52<306:28:35,  1.81it/s]

buffer size = 8552, epsilon = 0.09796
mean_reward :  0.0


  0%|          | 4077/2000001 [38:52<306:03:24,  1.81it/s]

buffer size = 8554, epsilon = 0.09796
mean_reward :  0.0


  0%|          | 4078/2000001 [38:53<305:39:05,  1.81it/s]

buffer size = 8556, epsilon = 0.09796
mean_reward :  0.0


  0%|          | 4079/2000001 [38:53<306:09:26,  1.81it/s]

buffer size = 8558, epsilon = 0.09796
mean_reward :  0.0


  0%|          | 4080/2000001 [38:54<308:08:06,  1.80it/s]

buffer size = 8560, epsilon = 0.09796
mean_reward :  0.0


  0%|          | 4081/2000001 [38:54<307:53:10,  1.80it/s]

buffer size = 8562, epsilon = 0.09796
mean_reward :  0.0


  0%|          | 4082/2000001 [38:55<347:26:02,  1.60it/s]

buffer size = 8564, epsilon = 0.09796
mean_reward :  0.0


  0%|          | 4083/2000001 [38:56<377:58:12,  1.47it/s]

buffer size = 8566, epsilon = 0.09796
mean_reward :  0.0


  0%|          | 4084/2000001 [38:57<401:13:08,  1.38it/s]

buffer size = 8568, epsilon = 0.09796
mean_reward :  0.0


  0%|          | 4085/2000001 [38:58<398:53:06,  1.39it/s]

buffer size = 8570, epsilon = 0.09796
mean_reward :  0.0


  0%|          | 4086/2000001 [38:58<372:29:27,  1.49it/s]

buffer size = 8572, epsilon = 0.09796
mean_reward :  0.0


  0%|          | 4087/2000001 [38:59<353:14:06,  1.57it/s]

buffer size = 8574, epsilon = 0.09796
mean_reward :  0.0


  0%|          | 4088/2000001 [38:59<340:47:04,  1.63it/s]

buffer size = 8576, epsilon = 0.09796
mean_reward :  0.0


  0%|          | 4089/2000001 [39:00<327:43:00,  1.69it/s]

buffer size = 8578, epsilon = 0.09796
mean_reward :  0.0


  0%|          | 4090/2000001 [39:00<321:40:44,  1.72it/s]

buffer size = 8580, epsilon = 0.09796
mean_reward :  0.0


  0%|          | 4091/2000001 [39:01<315:58:29,  1.75it/s]

buffer size = 8582, epsilon = 0.09796
mean_reward :  0.0


  0%|          | 4092/2000001 [39:01<313:21:05,  1.77it/s]

buffer size = 8584, epsilon = 0.09795
mean_reward :  0.0


  0%|          | 4093/2000001 [39:02<310:07:27,  1.79it/s]

buffer size = 8586, epsilon = 0.09795
mean_reward :  0.0


  0%|          | 4094/2000001 [39:03<309:24:08,  1.79it/s]

buffer size = 8588, epsilon = 0.09795
mean_reward :  0.0


  0%|          | 4095/2000001 [39:03<309:07:51,  1.79it/s]

buffer size = 8590, epsilon = 0.09795
mean_reward :  0.0


  0%|          | 4096/2000001 [39:04<306:59:24,  1.81it/s]

buffer size = 8592, epsilon = 0.09795
mean_reward :  0.0


  0%|          | 4097/2000001 [39:04<306:50:37,  1.81it/s]

buffer size = 8594, epsilon = 0.09795
mean_reward :  0.0


  0%|          | 4098/2000001 [39:05<307:10:53,  1.80it/s]

buffer size = 8596, epsilon = 0.09795
mean_reward :  0.0


  0%|          | 4099/2000001 [39:05<308:30:42,  1.80it/s]

buffer size = 8598, epsilon = 0.09795
mean_reward :  0.0


  0%|          | 4100/2000001 [39:06<306:17:28,  1.81it/s]

buffer size = 8600, epsilon = 0.09795
mean_reward :  0.0


  0%|          | 4101/2000001 [39:06<310:03:43,  1.79it/s]

buffer size = 8602, epsilon = 0.09795
mean_reward :  0.0


  0%|          | 4102/2000001 [39:07<306:39:55,  1.81it/s]

buffer size = 8604, epsilon = 0.09795
mean_reward :  0.0


  0%|          | 4103/2000001 [39:08<318:09:10,  1.74it/s]

buffer size = 8606, epsilon = 0.09795
mean_reward :  0.0


  0%|          | 4104/2000001 [39:08<358:26:23,  1.55it/s]

buffer size = 8608, epsilon = 0.09795
mean_reward :  0.0


  0%|          | 4105/2000001 [39:09<381:38:49,  1.45it/s]

buffer size = 8610, epsilon = 0.09795
mean_reward :  0.0


  0%|          | 4106/2000001 [39:10<403:01:23,  1.38it/s]

buffer size = 8612, epsilon = 0.09795
mean_reward :  0.0


  0%|          | 4107/2000001 [39:11<391:11:54,  1.42it/s]

buffer size = 8614, epsilon = 0.09795
mean_reward :  0.0


  0%|          | 4108/2000001 [39:11<365:22:11,  1.52it/s]

buffer size = 8616, epsilon = 0.09795
mean_reward :  0.0


  0%|          | 4109/2000001 [39:12<349:09:01,  1.59it/s]

buffer size = 8618, epsilon = 0.09795
mean_reward :  0.0


  0%|          | 4110/2000001 [39:12<334:55:16,  1.66it/s]

buffer size = 8620, epsilon = 0.09795
mean_reward :  0.0


  0%|          | 4111/2000001 [39:13<326:33:29,  1.70it/s]

buffer size = 8622, epsilon = 0.09795
mean_reward :  0.0


  0%|          | 4112/2000001 [39:13<319:23:54,  1.74it/s]

buffer size = 8624, epsilon = 0.09794
mean_reward :  0.0


  0%|          | 4113/2000001 [39:14<316:50:14,  1.75it/s]

buffer size = 8626, epsilon = 0.09794
mean_reward :  0.0


  0%|          | 4114/2000001 [39:14<309:30:30,  1.79it/s]

buffer size = 8628, epsilon = 0.09794
mean_reward :  0.0


  0%|          | 4115/2000001 [39:15<309:03:09,  1.79it/s]

buffer size = 8630, epsilon = 0.09794
mean_reward :  0.0


  0%|          | 4116/2000001 [39:16<307:49:46,  1.80it/s]

buffer size = 8632, epsilon = 0.09794
mean_reward :  0.0


  0%|          | 4117/2000001 [39:16<309:55:16,  1.79it/s]

buffer size = 8634, epsilon = 0.09794
mean_reward :  0.0


  0%|          | 4118/2000001 [39:17<305:28:26,  1.81it/s]

buffer size = 8636, epsilon = 0.09794
mean_reward :  0.0


  0%|          | 4119/2000001 [39:17<310:03:01,  1.79it/s]

buffer size = 8638, epsilon = 0.09794
mean_reward :  0.0


  0%|          | 4120/2000001 [39:18<304:44:19,  1.82it/s]

buffer size = 8640, epsilon = 0.09794
mean_reward :  0.0


  0%|          | 4121/2000001 [39:18<307:38:58,  1.80it/s]

buffer size = 8642, epsilon = 0.09794
mean_reward :  0.0


  0%|          | 4122/2000001 [39:19<308:01:41,  1.80it/s]

buffer size = 8644, epsilon = 0.09794
mean_reward :  0.0


  0%|          | 4123/2000001 [39:19<308:44:43,  1.80it/s]

buffer size = 8646, epsilon = 0.09794
mean_reward :  0.0


  0%|          | 4124/2000001 [39:20<308:01:38,  1.80it/s]

buffer size = 8648, epsilon = 0.09794
mean_reward :  0.0


  0%|          | 4125/2000001 [39:21<324:51:01,  1.71it/s]

buffer size = 8650, epsilon = 0.09794
mean_reward :  0.0


  0%|          | 4126/2000001 [39:22<365:32:00,  1.52it/s]

buffer size = 8652, epsilon = 0.09794
mean_reward :  0.0


  0%|          | 4127/2000001 [39:22<400:56:25,  1.38it/s]

buffer size = 8654, epsilon = 0.09794
mean_reward :  0.0


  0%|          | 4128/2000001 [39:23<411:16:26,  1.35it/s]

buffer size = 8656, epsilon = 0.09794
mean_reward :  0.0


  0%|          | 4129/2000001 [39:24<380:00:32,  1.46it/s]

buffer size = 8658, epsilon = 0.09794
mean_reward :  0.0


  0%|          | 4130/2000001 [39:24<360:28:10,  1.54it/s]

buffer size = 8660, epsilon = 0.09794
mean_reward :  0.0


  0%|          | 4131/2000001 [39:25<343:21:02,  1.61it/s]

buffer size = 8662, epsilon = 0.09794
mean_reward :  0.0


  0%|          | 4132/2000001 [39:25<333:11:48,  1.66it/s]

buffer size = 8664, epsilon = 0.09793
mean_reward :  0.0


  0%|          | 4133/2000001 [39:26<322:43:45,  1.72it/s]

buffer size = 8666, epsilon = 0.09793
mean_reward :  0.0


  0%|          | 4134/2000001 [39:27<318:13:26,  1.74it/s]

buffer size = 8668, epsilon = 0.09793
mean_reward :  0.0


  0%|          | 4135/2000001 [39:27<324:59:53,  1.71it/s]

buffer size = 8670, epsilon = 0.09793
mean_reward :  0.0


  0%|          | 4136/2000001 [39:28<322:03:34,  1.72it/s]

buffer size = 8672, epsilon = 0.09793
mean_reward :  0.0


  0%|          | 4137/2000001 [39:28<315:56:23,  1.75it/s]

buffer size = 8674, epsilon = 0.09793
mean_reward :  0.0


  0%|          | 4138/2000001 [39:29<316:02:13,  1.75it/s]

buffer size = 8676, epsilon = 0.09793
mean_reward :  0.0


  0%|          | 4139/2000001 [39:29<310:49:46,  1.78it/s]

buffer size = 8678, epsilon = 0.09793
mean_reward :  0.0


  0%|          | 4140/2000001 [39:30<309:28:51,  1.79it/s]

buffer size = 8680, epsilon = 0.09793
mean_reward :  0.0


  0%|          | 4141/2000001 [39:30<308:25:00,  1.80it/s]

buffer size = 8682, epsilon = 0.09793
mean_reward :  0.0


  0%|          | 4142/2000001 [39:31<309:35:56,  1.79it/s]

buffer size = 8684, epsilon = 0.09793
mean_reward :  0.0


  0%|          | 4143/2000001 [39:32<310:16:09,  1.79it/s]

buffer size = 8686, epsilon = 0.09793
mean_reward :  0.0


  0%|          | 4144/2000001 [39:32<308:41:45,  1.80it/s]

buffer size = 8688, epsilon = 0.09793
mean_reward :  0.0


  0%|          | 4145/2000001 [39:33<309:54:20,  1.79it/s]

buffer size = 8690, epsilon = 0.09793
mean_reward :  0.0


  0%|          | 4146/2000001 [39:33<313:52:34,  1.77it/s]

buffer size = 8692, epsilon = 0.09793
mean_reward :  0.0


  0%|          | 4147/2000001 [39:34<355:03:45,  1.56it/s]

buffer size = 8694, epsilon = 0.09793
mean_reward :  0.0


  0%|          | 4148/2000001 [39:35<379:53:17,  1.46it/s]

buffer size = 8696, epsilon = 0.09793
mean_reward :  0.0


  0%|          | 4149/2000001 [39:36<402:20:19,  1.38it/s]

buffer size = 8698, epsilon = 0.09793
mean_reward :  0.0


  0%|          | 4150/2000001 [39:36<389:34:21,  1.42it/s]

buffer size = 8700, epsilon = 0.09793
mean_reward :  0.0


  0%|          | 4151/2000001 [39:37<366:27:47,  1.51it/s]

buffer size = 8702, epsilon = 0.09793
mean_reward :  0.0


  0%|          | 4152/2000001 [39:37<346:48:45,  1.60it/s]

buffer size = 8704, epsilon = 0.09792
mean_reward :  0.0


  0%|          | 4153/2000001 [39:38<336:44:03,  1.65it/s]

buffer size = 8706, epsilon = 0.09792
mean_reward :  0.0


  0%|          | 4154/2000001 [39:39<324:11:27,  1.71it/s]

buffer size = 8708, epsilon = 0.09792
mean_reward :  0.0


  0%|          | 4155/2000001 [39:39<319:08:14,  1.74it/s]

buffer size = 8710, epsilon = 0.09792
mean_reward :  0.0


  0%|          | 4156/2000001 [39:40<310:33:40,  1.79it/s]

buffer size = 8712, epsilon = 0.09792
mean_reward :  0.0


  0%|          | 4157/2000001 [39:40<309:00:45,  1.79it/s]

buffer size = 8714, epsilon = 0.09792
mean_reward :  0.0


  0%|          | 4158/2000001 [39:41<304:28:47,  1.82it/s]

buffer size = 8716, epsilon = 0.09792
mean_reward :  0.0


  0%|          | 4159/2000001 [39:41<306:04:58,  1.81it/s]

buffer size = 8718, epsilon = 0.09792
mean_reward :  0.0


  0%|          | 4160/2000001 [39:42<306:05:20,  1.81it/s]

buffer size = 8720, epsilon = 0.09792
mean_reward :  0.0


  0%|          | 4161/2000001 [39:42<307:06:25,  1.81it/s]

buffer size = 8722, epsilon = 0.09792
mean_reward :  0.0


  0%|          | 4162/2000001 [39:43<302:44:27,  1.83it/s]

buffer size = 8724, epsilon = 0.09792
mean_reward :  0.0


  0%|          | 4163/2000001 [39:43<305:33:08,  1.81it/s]

buffer size = 8726, epsilon = 0.09792
mean_reward :  0.0


  0%|          | 4164/2000001 [39:44<307:49:24,  1.80it/s]

buffer size = 8728, epsilon = 0.09792
mean_reward :  0.0


  0%|          | 4165/2000001 [39:45<307:45:06,  1.80it/s]

buffer size = 8730, epsilon = 0.09792
mean_reward :  0.0


  0%|          | 4166/2000001 [39:45<307:35:08,  1.80it/s]

buffer size = 8732, epsilon = 0.09792
mean_reward :  0.0


  0%|          | 4167/2000001 [39:46<307:09:14,  1.80it/s]

buffer size = 8734, epsilon = 0.09792
mean_reward :  0.0


  0%|          | 4168/2000001 [39:46<317:52:37,  1.74it/s]

buffer size = 8736, epsilon = 0.09792
mean_reward :  0.0


  0%|          | 4169/2000001 [39:47<356:54:29,  1.55it/s]

buffer size = 8738, epsilon = 0.09792
mean_reward :  0.0


  0%|          | 4170/2000001 [39:48<382:39:32,  1.45it/s]

buffer size = 8740, epsilon = 0.09792
mean_reward :  0.0


  0%|          | 4171/2000001 [39:49<408:24:54,  1.36it/s]

buffer size = 8742, epsilon = 0.09792
mean_reward :  0.0


  0%|          | 4172/2000001 [39:49<387:42:52,  1.43it/s]

buffer size = 8744, epsilon = 0.09791
mean_reward :  0.0


  0%|          | 4173/2000001 [39:50<361:23:28,  1.53it/s]

buffer size = 8746, epsilon = 0.09791
mean_reward :  0.0


  0%|          | 4174/2000001 [39:50<344:24:17,  1.61it/s]

buffer size = 8748, epsilon = 0.09791
mean_reward :  0.0


  0%|          | 4175/2000001 [39:51<336:09:15,  1.65it/s]

buffer size = 8750, epsilon = 0.09791
mean_reward :  0.0


  0%|          | 4176/2000001 [39:52<329:39:23,  1.68it/s]

buffer size = 8752, epsilon = 0.09791
mean_reward :  0.0


  0%|          | 4177/2000001 [39:52<322:13:54,  1.72it/s]

buffer size = 8754, epsilon = 0.09791
mean_reward :  0.0


  0%|          | 4178/2000001 [39:53<318:59:08,  1.74it/s]

buffer size = 8756, epsilon = 0.09791
mean_reward :  0.0


  0%|          | 4179/2000001 [39:53<313:21:38,  1.77it/s]

buffer size = 8758, epsilon = 0.09791
mean_reward :  0.0


  0%|          | 4180/2000001 [39:54<312:17:54,  1.78it/s]

buffer size = 8760, epsilon = 0.09791
mean_reward :  0.0


  0%|          | 4181/2000001 [39:54<308:56:23,  1.79it/s]

buffer size = 8762, epsilon = 0.09791
mean_reward :  0.0


  0%|          | 4182/2000001 [39:55<308:58:59,  1.79it/s]

buffer size = 8764, epsilon = 0.09791
mean_reward :  0.0


  0%|          | 4183/2000001 [39:55<306:22:29,  1.81it/s]

buffer size = 8766, epsilon = 0.09791
mean_reward :  0.0


  0%|          | 4184/2000001 [39:56<307:20:11,  1.80it/s]

buffer size = 8768, epsilon = 0.09791
mean_reward :  0.0


  0%|          | 4185/2000001 [39:57<305:36:24,  1.81it/s]

buffer size = 8770, epsilon = 0.09791
mean_reward :  0.0


  0%|          | 4186/2000001 [39:57<309:58:17,  1.79it/s]

buffer size = 8772, epsilon = 0.09791
mean_reward :  0.0


  0%|          | 4187/2000001 [39:58<308:31:56,  1.80it/s]

buffer size = 8774, epsilon = 0.09791
mean_reward :  0.0


  0%|          | 4188/2000001 [39:58<310:07:51,  1.79it/s]

buffer size = 8776, epsilon = 0.09791
mean_reward :  0.0


  0%|          | 4189/2000001 [39:59<310:53:22,  1.78it/s]

buffer size = 8778, epsilon = 0.09791
mean_reward :  0.0


  0%|          | 4190/2000001 [40:00<345:24:21,  1.61it/s]

buffer size = 8780, epsilon = 0.09791
mean_reward :  0.0


  0%|          | 4191/2000001 [40:00<383:00:47,  1.45it/s]

buffer size = 8782, epsilon = 0.09791
mean_reward :  0.0


  0%|          | 4192/2000001 [40:01<410:10:19,  1.35it/s]

buffer size = 8784, epsilon = 0.09790
mean_reward :  0.0


  0%|          | 4193/2000001 [40:02<395:23:21,  1.40it/s]

buffer size = 8786, epsilon = 0.09790
mean_reward :  0.0


  0%|          | 4194/2000001 [40:03<370:00:10,  1.50it/s]

buffer size = 8788, epsilon = 0.09790
mean_reward :  0.0


  0%|          | 4195/2000001 [40:03<353:30:12,  1.57it/s]

buffer size = 8790, epsilon = 0.09790
mean_reward :  0.0


  0%|          | 4196/2000001 [40:04<338:35:39,  1.64it/s]

buffer size = 8792, epsilon = 0.09790
mean_reward :  0.0


  0%|          | 4197/2000001 [40:04<331:47:05,  1.67it/s]

buffer size = 8794, epsilon = 0.09790
mean_reward :  0.0


  0%|          | 4198/2000001 [40:05<325:41:13,  1.70it/s]

buffer size = 8796, epsilon = 0.09790
mean_reward :  0.0


  0%|          | 4199/2000001 [40:05<319:58:21,  1.73it/s]

buffer size = 8798, epsilon = 0.09790
mean_reward :  0.0


  0%|          | 4200/2000001 [40:06<315:18:11,  1.76it/s]

buffer size = 8800, epsilon = 0.09790
mean_reward :  0.0


  0%|          | 4201/2000001 [40:06<313:32:11,  1.77it/s]

buffer size = 8802, epsilon = 0.09790
mean_reward :  0.0


  0%|          | 4202/2000001 [40:07<314:59:55,  1.76it/s]

buffer size = 8804, epsilon = 0.09790
mean_reward :  0.0


  0%|          | 4203/2000001 [40:08<316:24:40,  1.75it/s]

buffer size = 8806, epsilon = 0.09790
mean_reward :  0.0


  0%|          | 4204/2000001 [40:08<311:45:48,  1.78it/s]

buffer size = 8808, epsilon = 0.09790
mean_reward :  0.0


  0%|          | 4205/2000001 [40:09<309:39:16,  1.79it/s]

buffer size = 8810, epsilon = 0.09790
mean_reward :  0.0


  0%|          | 4206/2000001 [40:09<309:37:17,  1.79it/s]

buffer size = 8812, epsilon = 0.09790
mean_reward :  0.0


  0%|          | 4207/2000001 [40:10<310:27:04,  1.79it/s]

buffer size = 8814, epsilon = 0.09790
mean_reward :  0.0


  0%|          | 4208/2000001 [40:10<308:41:41,  1.80it/s]

buffer size = 8816, epsilon = 0.09790
mean_reward :  0.0


  0%|          | 4209/2000001 [40:11<307:44:30,  1.80it/s]

buffer size = 8818, epsilon = 0.09790
mean_reward :  0.0


  0%|          | 4210/2000001 [40:11<305:18:43,  1.82it/s]

buffer size = 8820, epsilon = 0.09790
mean_reward :  0.0


  0%|          | 4211/2000001 [40:12<337:08:41,  1.64it/s]

buffer size = 8822, epsilon = 0.09790
mean_reward :  0.0


  0%|          | 4212/2000001 [40:13<372:14:03,  1.49it/s]

buffer size = 8824, epsilon = 0.09789
mean_reward :  0.0


  0%|          | 4213/2000001 [40:14<405:23:07,  1.37it/s]

buffer size = 8826, epsilon = 0.09789
mean_reward :  0.0


  0%|          | 4214/2000001 [40:15<393:24:17,  1.41it/s]

buffer size = 8828, epsilon = 0.09789
mean_reward :  0.0


  0%|          | 4215/2000001 [40:15<370:39:02,  1.50it/s]

buffer size = 8830, epsilon = 0.09789
mean_reward :  0.0


  0%|          | 4216/2000001 [40:16<353:35:27,  1.57it/s]

buffer size = 8832, epsilon = 0.09789
mean_reward :  0.0


  0%|          | 4217/2000001 [40:16<339:19:07,  1.63it/s]

buffer size = 8834, epsilon = 0.09789
mean_reward :  0.0


  0%|          | 4218/2000001 [40:17<331:11:13,  1.67it/s]

buffer size = 8836, epsilon = 0.09789
mean_reward :  0.0


  0%|          | 4219/2000001 [40:17<323:32:15,  1.71it/s]

buffer size = 8838, epsilon = 0.09789
mean_reward :  0.0


  0%|          | 4220/2000001 [40:18<322:19:17,  1.72it/s]

buffer size = 8840, epsilon = 0.09789
mean_reward :  0.0


  0%|          | 4221/2000001 [40:18<320:10:08,  1.73it/s]

buffer size = 8842, epsilon = 0.09789
mean_reward :  0.0


  0%|          | 4222/2000001 [40:19<318:18:10,  1.74it/s]

buffer size = 8844, epsilon = 0.09789
mean_reward :  0.0


  0%|          | 4223/2000001 [40:20<318:16:34,  1.74it/s]

buffer size = 8846, epsilon = 0.09789
mean_reward :  0.0


  0%|          | 4224/2000001 [40:20<313:29:01,  1.77it/s]

buffer size = 8848, epsilon = 0.09789
mean_reward :  0.0


  0%|          | 4225/2000001 [40:21<308:40:03,  1.80it/s]

buffer size = 8850, epsilon = 0.09789
mean_reward :  0.0


  0%|          | 4226/2000001 [40:21<315:17:00,  1.76it/s]

buffer size = 8852, epsilon = 0.09789
mean_reward :  0.0


  0%|          | 4227/2000001 [40:22<313:06:56,  1.77it/s]

buffer size = 8854, epsilon = 0.09789
mean_reward :  0.0


  0%|          | 4228/2000001 [40:22<310:06:14,  1.79it/s]

buffer size = 8856, epsilon = 0.09789
mean_reward :  0.0


  0%|          | 4229/2000001 [40:23<308:20:04,  1.80it/s]

buffer size = 8858, epsilon = 0.09789
mean_reward :  0.0


  0%|          | 4230/2000001 [40:24<311:33:51,  1.78it/s]

buffer size = 8860, epsilon = 0.09789
mean_reward :  0.0


  0%|          | 4231/2000001 [40:24<313:32:34,  1.77it/s]

buffer size = 8862, epsilon = 0.09788
mean_reward :  0.0


  0%|          | 4232/2000001 [40:25<349:33:38,  1.59it/s]

buffer size = 8864, epsilon = 0.09788
mean_reward :  0.0


  0%|          | 4233/2000001 [40:26<384:40:54,  1.44it/s]

buffer size = 8866, epsilon = 0.09788
mean_reward :  0.0


  0%|          | 4234/2000001 [40:27<410:18:02,  1.35it/s]

buffer size = 8868, epsilon = 0.09788
mean_reward :  0.0


  0%|          | 4235/2000001 [40:27<400:53:09,  1.38it/s]

buffer size = 8870, epsilon = 0.09788
mean_reward :  0.0


  0%|          | 4236/2000001 [40:28<373:11:31,  1.49it/s]

buffer size = 8872, epsilon = 0.09788
mean_reward :  0.0


  0%|          | 4237/2000001 [40:28<356:23:09,  1.56it/s]

buffer size = 8874, epsilon = 0.09788
mean_reward :  0.0


  0%|          | 4238/2000001 [40:29<341:06:28,  1.63it/s]

buffer size = 8876, epsilon = 0.09788
mean_reward :  0.0


  0%|          | 4239/2000001 [40:29<331:49:54,  1.67it/s]

buffer size = 8878, epsilon = 0.09788
mean_reward :  0.0


  0%|          | 4240/2000001 [40:30<323:33:34,  1.71it/s]

buffer size = 8880, epsilon = 0.09788
mean_reward :  0.0


  0%|          | 4241/2000001 [40:31<318:15:38,  1.74it/s]

buffer size = 8882, epsilon = 0.09788
mean_reward :  0.0


  0%|          | 4242/2000001 [40:31<314:19:48,  1.76it/s]

buffer size = 8884, epsilon = 0.09788
mean_reward :  0.0


  0%|          | 4243/2000001 [40:32<313:35:07,  1.77it/s]

buffer size = 8886, epsilon = 0.09788
mean_reward :  0.0


  0%|          | 4244/2000001 [40:32<310:37:37,  1.78it/s]

buffer size = 8888, epsilon = 0.09788
mean_reward :  0.0


  0%|          | 4245/2000001 [40:33<310:00:25,  1.79it/s]

buffer size = 8890, epsilon = 0.09788
mean_reward :  0.0


  0%|          | 4246/2000001 [40:33<307:48:21,  1.80it/s]

buffer size = 8892, epsilon = 0.09788
mean_reward :  0.0


  0%|          | 4247/2000001 [40:34<310:28:31,  1.79it/s]

buffer size = 8894, epsilon = 0.09788
mean_reward :  0.0


  0%|          | 4248/2000001 [40:34<308:36:05,  1.80it/s]

buffer size = 8896, epsilon = 0.09788
mean_reward :  0.0


  0%|          | 4249/2000001 [40:35<308:14:30,  1.80it/s]

buffer size = 8898, epsilon = 0.09788
mean_reward :  0.0


  0%|          | 4250/2000001 [40:36<308:48:43,  1.80it/s]

buffer size = 8900, epsilon = 0.09788
mean_reward :  0.0


  0%|          | 4251/2000001 [40:36<307:56:10,  1.80it/s]

buffer size = 8902, epsilon = 0.09788
mean_reward :  0.0


  0%|          | 4252/2000001 [40:37<308:54:21,  1.79it/s]

buffer size = 8904, epsilon = 0.09787
mean_reward :  0.0


  0%|          | 4253/2000001 [40:37<333:23:06,  1.66it/s]

buffer size = 8906, epsilon = 0.09787
mean_reward :  0.0


  0%|          | 4254/2000001 [40:38<373:29:01,  1.48it/s]

buffer size = 8908, epsilon = 0.09787
mean_reward :  0.0


  0%|          | 4255/2000001 [40:39<397:07:15,  1.40it/s]

buffer size = 8910, epsilon = 0.09787
mean_reward :  0.0


  0%|          | 4256/2000001 [40:40<408:23:00,  1.36it/s]

buffer size = 8912, epsilon = 0.09787
mean_reward :  0.0


  0%|          | 4257/2000001 [40:40<378:59:37,  1.46it/s]

buffer size = 8914, epsilon = 0.09787
mean_reward :  0.0


  0%|          | 4258/2000001 [40:41<359:01:26,  1.54it/s]

buffer size = 8916, epsilon = 0.09787
mean_reward :  0.0


  0%|          | 4259/2000001 [40:42<346:54:27,  1.60it/s]

buffer size = 8918, epsilon = 0.09787
mean_reward :  0.0


  0%|          | 4260/2000001 [40:42<336:53:43,  1.65it/s]

buffer size = 8920, epsilon = 0.09787
mean_reward :  0.0


  0%|          | 4261/2000001 [40:43<328:45:11,  1.69it/s]

buffer size = 8922, epsilon = 0.09787
mean_reward :  0.0


  0%|          | 4262/2000001 [40:43<322:49:50,  1.72it/s]

buffer size = 8924, epsilon = 0.09787
mean_reward :  0.0


  0%|          | 4263/2000001 [40:44<316:01:52,  1.75it/s]

buffer size = 8926, epsilon = 0.09787
mean_reward :  0.0


  0%|          | 4264/2000001 [40:44<312:53:21,  1.77it/s]

buffer size = 8928, epsilon = 0.09787
mean_reward :  0.0


  0%|          | 4265/2000001 [40:45<310:06:48,  1.79it/s]

buffer size = 8930, epsilon = 0.09787
mean_reward :  0.0


  0%|          | 4266/2000001 [40:45<309:59:12,  1.79it/s]

buffer size = 8932, epsilon = 0.09787
mean_reward :  0.0


  0%|          | 4267/2000001 [40:46<312:38:05,  1.77it/s]

buffer size = 8934, epsilon = 0.09787
mean_reward :  0.0


  0%|          | 4268/2000001 [40:47<309:22:59,  1.79it/s]

buffer size = 8936, epsilon = 0.09787
mean_reward :  0.0


  0%|          | 4269/2000001 [40:47<306:40:52,  1.81it/s]

buffer size = 8938, epsilon = 0.09787
mean_reward :  0.0


  0%|          | 4270/2000001 [40:48<304:58:07,  1.82it/s]

buffer size = 8940, epsilon = 0.09787
mean_reward :  0.0


  0%|          | 4271/2000001 [40:48<308:20:22,  1.80it/s]

buffer size = 8942, epsilon = 0.09787
mean_reward :  0.0


  0%|          | 4272/2000001 [40:49<307:44:28,  1.80it/s]

buffer size = 8944, epsilon = 0.09786
mean_reward :  0.0


  0%|          | 4273/2000001 [40:49<310:23:22,  1.79it/s]

buffer size = 8946, epsilon = 0.09786
mean_reward :  0.0


  0%|          | 4274/2000001 [40:50<316:53:27,  1.75it/s]

buffer size = 8948, epsilon = 0.09786
mean_reward :  0.0


  0%|          | 4275/2000001 [40:51<373:07:16,  1.49it/s]

buffer size = 8950, epsilon = 0.09786
mean_reward :  0.0


  0%|          | 4276/2000001 [40:52<401:56:37,  1.38it/s]

buffer size = 8952, epsilon = 0.09786
mean_reward :  0.0


  0%|          | 4277/2000001 [40:52<413:15:15,  1.34it/s]

buffer size = 8954, epsilon = 0.09786
mean_reward :  0.0


  0%|          | 4278/2000001 [40:53<383:29:41,  1.45it/s]

buffer size = 8956, epsilon = 0.09786
mean_reward :  0.0


  0%|          | 4279/2000001 [40:54<361:18:54,  1.53it/s]

buffer size = 8958, epsilon = 0.09786
mean_reward :  0.0


  0%|          | 4280/2000001 [40:54<347:47:39,  1.59it/s]

buffer size = 8960, epsilon = 0.09786
mean_reward :  0.0


  0%|          | 4281/2000001 [40:55<337:33:18,  1.64it/s]

buffer size = 8962, epsilon = 0.09786
mean_reward :  0.0


  0%|          | 4282/2000001 [40:55<329:24:08,  1.68it/s]

buffer size = 8964, epsilon = 0.09786
mean_reward :  0.0


  0%|          | 4283/2000001 [40:56<323:24:50,  1.71it/s]

buffer size = 8966, epsilon = 0.09786
mean_reward :  0.0


  0%|          | 4284/2000001 [40:56<318:11:11,  1.74it/s]

buffer size = 8968, epsilon = 0.09786
mean_reward :  0.0


  0%|          | 4285/2000001 [40:57<315:00:59,  1.76it/s]

buffer size = 8970, epsilon = 0.09786
mean_reward :  0.0


  0%|          | 4286/2000001 [40:57<311:59:18,  1.78it/s]

buffer size = 8972, epsilon = 0.09786
mean_reward :  0.0


  0%|          | 4287/2000001 [40:58<311:55:28,  1.78it/s]

buffer size = 8974, epsilon = 0.09786
mean_reward :  0.0


  0%|          | 4288/2000001 [40:59<312:07:48,  1.78it/s]

buffer size = 8976, epsilon = 0.09786
mean_reward :  0.0


  0%|          | 4289/2000001 [40:59<312:26:48,  1.77it/s]

buffer size = 8978, epsilon = 0.09786
mean_reward :  0.0


  0%|          | 4290/2000001 [41:00<311:55:57,  1.78it/s]

buffer size = 8980, epsilon = 0.09786
mean_reward :  0.0


  0%|          | 4291/2000001 [41:00<311:04:26,  1.78it/s]

buffer size = 8982, epsilon = 0.09786
mean_reward :  0.0


  0%|          | 4292/2000001 [41:01<312:20:32,  1.77it/s]

buffer size = 8984, epsilon = 0.09785
mean_reward :  0.0


  0%|          | 4293/2000001 [41:01<308:53:19,  1.79it/s]

buffer size = 8986, epsilon = 0.09785
mean_reward :  0.0


  0%|          | 4294/2000001 [41:02<308:53:43,  1.79it/s]

buffer size = 8988, epsilon = 0.09785
mean_reward :  0.0


  0%|          | 4295/2000001 [41:03<325:14:19,  1.70it/s]

buffer size = 8990, epsilon = 0.09785
mean_reward :  0.0


  0%|          | 4296/2000001 [41:04<372:46:11,  1.49it/s]

buffer size = 8992, epsilon = 0.09785
mean_reward :  0.0


  0%|          | 4297/2000001 [41:04<401:52:08,  1.38it/s]

buffer size = 8994, epsilon = 0.09785
mean_reward :  0.0


  0%|          | 4298/2000001 [41:05<413:20:16,  1.34it/s]

buffer size = 8996, epsilon = 0.09785
mean_reward :  0.0


  0%|          | 4299/2000001 [41:06<382:06:55,  1.45it/s]

buffer size = 8998, epsilon = 0.09785
mean_reward :  0.0


  0%|          | 4300/2000001 [41:06<364:15:38,  1.52it/s]

buffer size = 9000, epsilon = 0.09785
mean_reward :  0.0


  0%|          | 4301/2000001 [41:07<347:39:05,  1.59it/s]

buffer size = 9002, epsilon = 0.09785
mean_reward :  0.0


  0%|          | 4302/2000001 [41:07<337:10:14,  1.64it/s]

buffer size = 9004, epsilon = 0.09785
mean_reward :  0.0


  0%|          | 4303/2000001 [41:08<326:10:03,  1.70it/s]

buffer size = 9006, epsilon = 0.09785
mean_reward :  0.0


  0%|          | 4304/2000001 [41:09<323:24:45,  1.71it/s]

buffer size = 9008, epsilon = 0.09785
mean_reward :  0.0


  0%|          | 4305/2000001 [41:09<321:24:47,  1.72it/s]

buffer size = 9010, epsilon = 0.09785
mean_reward :  0.0


  0%|          | 4306/2000001 [41:10<321:00:29,  1.73it/s]

buffer size = 9012, epsilon = 0.09785
mean_reward :  0.0


  0%|          | 4307/2000001 [41:10<316:36:11,  1.75it/s]

buffer size = 9014, epsilon = 0.09785
mean_reward :  0.0


  0%|          | 4308/2000001 [41:11<318:11:49,  1.74it/s]

buffer size = 9016, epsilon = 0.09785
mean_reward :  0.0


  0%|          | 4309/2000001 [41:11<316:27:22,  1.75it/s]

buffer size = 9018, epsilon = 0.09785
mean_reward :  0.0


  0%|          | 4310/2000001 [41:12<315:04:49,  1.76it/s]

buffer size = 9020, epsilon = 0.09785
mean_reward :  0.0


  0%|          | 4311/2000001 [41:12<314:20:32,  1.76it/s]

buffer size = 9022, epsilon = 0.09785
mean_reward :  0.0


  0%|          | 4312/2000001 [41:13<312:37:04,  1.77it/s]

buffer size = 9024, epsilon = 0.09784
mean_reward :  0.0


  0%|          | 4313/2000001 [41:14<315:04:52,  1.76it/s]

buffer size = 9026, epsilon = 0.09784
mean_reward :  0.0


  0%|          | 4314/2000001 [41:14<313:20:59,  1.77it/s]

buffer size = 9028, epsilon = 0.09784
mean_reward :  0.0


  0%|          | 4315/2000001 [41:15<314:02:19,  1.77it/s]

buffer size = 9030, epsilon = 0.09784
mean_reward :  0.0


  0%|          | 4316/2000001 [41:16<349:01:02,  1.59it/s]

buffer size = 9032, epsilon = 0.09784
mean_reward :  0.0


  0%|          | 4317/2000001 [41:16<370:28:23,  1.50it/s]

buffer size = 9034, epsilon = 0.09784
mean_reward :  0.0


  0%|          | 4318/2000001 [41:17<389:21:11,  1.42it/s]

buffer size = 9036, epsilon = 0.09784
mean_reward :  0.0


  0%|          | 4319/2000001 [41:18<409:16:29,  1.35it/s]

buffer size = 9038, epsilon = 0.09784
mean_reward :  0.0


  0%|          | 4320/2000001 [41:18<378:55:40,  1.46it/s]

buffer size = 9040, epsilon = 0.09784
mean_reward :  0.0


  0%|          | 4321/2000001 [41:19<359:31:41,  1.54it/s]

buffer size = 9042, epsilon = 0.09784
mean_reward :  0.0


  0%|          | 4322/2000001 [41:20<347:47:22,  1.59it/s]

buffer size = 9044, epsilon = 0.09784
mean_reward :  0.0


  0%|          | 4323/2000001 [41:20<336:12:05,  1.65it/s]

buffer size = 9046, epsilon = 0.09784
mean_reward :  0.0


  0%|          | 4324/2000001 [41:21<327:11:18,  1.69it/s]

buffer size = 9048, epsilon = 0.09784
mean_reward :  0.0


  0%|          | 4325/2000001 [41:21<324:09:33,  1.71it/s]

buffer size = 9050, epsilon = 0.09784
mean_reward :  0.0


  0%|          | 4326/2000001 [41:22<321:44:35,  1.72it/s]

buffer size = 9052, epsilon = 0.09784
mean_reward :  0.0


  0%|          | 4327/2000001 [41:22<317:42:02,  1.74it/s]

buffer size = 9054, epsilon = 0.09784
mean_reward :  0.0


  0%|          | 4328/2000001 [41:23<316:42:56,  1.75it/s]

buffer size = 9056, epsilon = 0.09784
mean_reward :  0.0


  0%|          | 4329/2000001 [41:24<312:19:57,  1.77it/s]

buffer size = 9058, epsilon = 0.09784
mean_reward :  0.0


  0%|          | 4330/2000001 [41:24<314:38:50,  1.76it/s]

buffer size = 9060, epsilon = 0.09784
mean_reward :  0.0


  0%|          | 4331/2000001 [41:25<313:35:24,  1.77it/s]

buffer size = 9062, epsilon = 0.09784
mean_reward :  0.0


  0%|          | 4332/2000001 [41:25<313:23:55,  1.77it/s]

buffer size = 9064, epsilon = 0.09783
mean_reward :  0.0


  0%|          | 4333/2000001 [41:26<313:26:18,  1.77it/s]

buffer size = 9066, epsilon = 0.09783
mean_reward :  0.0


  0%|          | 4334/2000001 [41:26<313:06:03,  1.77it/s]

buffer size = 9068, epsilon = 0.09783
mean_reward :  0.0


  0%|          | 4335/2000001 [41:27<312:52:13,  1.77it/s]

buffer size = 9070, epsilon = 0.09783
mean_reward :  0.0


  0%|          | 4336/2000001 [41:27<313:41:06,  1.77it/s]

buffer size = 9072, epsilon = 0.09783
mean_reward :  0.0


  0%|          | 4337/2000001 [41:28<326:11:11,  1.70it/s]

buffer size = 9074, epsilon = 0.09783
mean_reward :  0.0


  0%|          | 4338/2000001 [41:29<362:53:30,  1.53it/s]

buffer size = 9076, epsilon = 0.09783
mean_reward :  0.0


  0%|          | 4339/2000001 [41:30<384:47:13,  1.44it/s]

buffer size = 9078, epsilon = 0.09783
mean_reward :  0.0


  0%|          | 4340/2000001 [41:31<403:47:37,  1.37it/s]

buffer size = 9080, epsilon = 0.09783
mean_reward :  0.0


  0%|          | 4341/2000001 [41:31<397:27:03,  1.39it/s]

buffer size = 9082, epsilon = 0.09783
mean_reward :  0.0


  0%|          | 4342/2000001 [41:32<371:43:50,  1.49it/s]

buffer size = 9084, epsilon = 0.09783
mean_reward :  0.0


  0%|          | 4343/2000001 [41:32<350:59:14,  1.58it/s]

buffer size = 9086, epsilon = 0.09783
mean_reward :  0.0


  0%|          | 4344/2000001 [41:33<339:36:40,  1.63it/s]

buffer size = 9088, epsilon = 0.09783
mean_reward :  0.0


  0%|          | 4345/2000001 [41:33<329:47:21,  1.68it/s]

buffer size = 9090, epsilon = 0.09783
mean_reward :  0.0


  0%|          | 4346/2000001 [41:34<324:02:54,  1.71it/s]

buffer size = 9092, epsilon = 0.09783
mean_reward :  0.0


  0%|          | 4347/2000001 [41:35<322:52:14,  1.72it/s]

buffer size = 9094, epsilon = 0.09783
mean_reward :  0.0


  0%|          | 4348/2000001 [41:35<317:30:21,  1.75it/s]

buffer size = 9096, epsilon = 0.09783
mean_reward :  0.0


  0%|          | 4349/2000001 [41:36<312:16:30,  1.78it/s]

buffer size = 9098, epsilon = 0.09783
mean_reward :  0.0


  0%|          | 4350/2000001 [41:36<307:59:07,  1.80it/s]

buffer size = 9100, epsilon = 0.09783
mean_reward :  0.0


  0%|          | 4351/2000001 [41:37<307:01:34,  1.81it/s]

buffer size = 9102, epsilon = 0.09783
mean_reward :  0.0


  0%|          | 4352/2000001 [41:37<304:43:06,  1.82it/s]

buffer size = 9104, epsilon = 0.09782
mean_reward :  0.0


  0%|          | 4353/2000001 [41:38<304:57:40,  1.82it/s]

buffer size = 9106, epsilon = 0.09782
mean_reward :  0.0


  0%|          | 4354/2000001 [41:38<304:43:24,  1.82it/s]

buffer size = 9108, epsilon = 0.09782
mean_reward :  0.0


  0%|          | 4355/2000001 [41:39<309:28:12,  1.79it/s]

buffer size = 9110, epsilon = 0.09782
mean_reward :  0.0


  0%|          | 4356/2000001 [41:40<309:21:23,  1.79it/s]

buffer size = 9112, epsilon = 0.09782
mean_reward :  0.0


  0%|          | 4357/2000001 [41:40<310:30:44,  1.79it/s]

buffer size = 9114, epsilon = 0.09782
mean_reward :  0.0


  0%|          | 4358/2000001 [41:41<309:54:59,  1.79it/s]

buffer size = 9116, epsilon = 0.09782
mean_reward :  0.0


  0%|          | 4359/2000001 [41:41<329:41:19,  1.68it/s]

buffer size = 9118, epsilon = 0.09782
mean_reward :  0.0


  0%|          | 4360/2000001 [41:42<367:09:40,  1.51it/s]

buffer size = 9120, epsilon = 0.09782
mean_reward :  0.0


  0%|          | 4361/2000001 [41:43<387:16:47,  1.43it/s]

buffer size = 9122, epsilon = 0.09782
mean_reward :  0.0


  0%|          | 4362/2000001 [41:44<409:07:26,  1.35it/s]

buffer size = 9124, epsilon = 0.09782
mean_reward :  0.0


  0%|          | 4363/2000001 [41:44<390:23:59,  1.42it/s]

buffer size = 9126, epsilon = 0.09782
mean_reward :  0.0


  0%|          | 4364/2000001 [41:45<368:14:49,  1.51it/s]

buffer size = 9128, epsilon = 0.09782
mean_reward :  0.0


  0%|          | 4365/2000001 [41:46<352:07:27,  1.57it/s]

buffer size = 9130, epsilon = 0.09782
mean_reward :  0.0


  0%|          | 4366/2000001 [41:46<337:47:06,  1.64it/s]

buffer size = 9132, epsilon = 0.09782
mean_reward :  0.0


  0%|          | 4367/2000001 [41:47<329:58:37,  1.68it/s]

buffer size = 9134, epsilon = 0.09782
mean_reward :  0.0


  0%|          | 4368/2000001 [41:47<325:27:45,  1.70it/s]

buffer size = 9136, epsilon = 0.09782
mean_reward :  0.0


  0%|          | 4369/2000001 [41:48<320:14:38,  1.73it/s]

buffer size = 9138, epsilon = 0.09782
mean_reward :  0.0


  0%|          | 4370/2000001 [41:48<318:31:49,  1.74it/s]

buffer size = 9140, epsilon = 0.09782
mean_reward :  0.0


  0%|          | 4371/2000001 [41:49<316:06:08,  1.75it/s]

buffer size = 9142, epsilon = 0.09781
mean_reward :  0.0


  0%|          | 4372/2000001 [41:49<317:51:08,  1.74it/s]

buffer size = 9144, epsilon = 0.09781
mean_reward :  0.0


  0%|          | 4373/2000001 [41:50<313:29:24,  1.77it/s]

buffer size = 9146, epsilon = 0.09781
mean_reward :  0.0


  0%|          | 4374/2000001 [41:51<312:08:48,  1.78it/s]

buffer size = 9148, epsilon = 0.09781
mean_reward :  0.0


  0%|          | 4375/2000001 [41:51<309:49:32,  1.79it/s]

buffer size = 9150, epsilon = 0.09781
mean_reward :  0.0


  0%|          | 4376/2000001 [41:52<315:34:30,  1.76it/s]

buffer size = 9152, epsilon = 0.09781
mean_reward :  0.0


  0%|          | 4377/2000001 [41:52<314:31:30,  1.76it/s]

buffer size = 9154, epsilon = 0.09781
mean_reward :  0.0


  0%|          | 4378/2000001 [41:53<315:32:47,  1.76it/s]

buffer size = 9156, epsilon = 0.09781
mean_reward :  0.0


  0%|          | 4379/2000001 [41:53<312:07:19,  1.78it/s]

buffer size = 9158, epsilon = 0.09781
mean_reward :  0.0


  0%|          | 4380/2000001 [41:54<312:15:22,  1.78it/s]

buffer size = 9160, epsilon = 0.09781
mean_reward :  0.0


  0%|          | 4381/2000001 [41:55<362:55:44,  1.53it/s]

buffer size = 9162, epsilon = 0.09781
mean_reward :  0.0


  0%|          | 4382/2000001 [41:56<393:17:08,  1.41it/s]

buffer size = 9164, epsilon = 0.09781
mean_reward :  0.0


  0%|          | 4383/2000001 [41:57<418:14:28,  1.33it/s]

buffer size = 9166, epsilon = 0.09781
mean_reward :  0.0


  0%|          | 4384/2000001 [41:57<398:54:04,  1.39it/s]

buffer size = 9168, epsilon = 0.09781
mean_reward :  0.0


  0%|          | 4385/2000001 [41:58<372:54:36,  1.49it/s]

buffer size = 9170, epsilon = 0.09781
mean_reward :  0.0


  0%|          | 4386/2000001 [41:58<353:23:39,  1.57it/s]

buffer size = 9172, epsilon = 0.09781
mean_reward :  0.0


  0%|          | 4387/2000001 [41:59<342:41:08,  1.62it/s]

buffer size = 9174, epsilon = 0.09781
mean_reward :  0.0


  0%|          | 4388/2000001 [41:59<331:36:49,  1.67it/s]

buffer size = 9176, epsilon = 0.09781
mean_reward :  0.0


  0%|          | 4389/2000001 [42:00<325:03:25,  1.71it/s]

buffer size = 9178, epsilon = 0.09781
mean_reward :  0.0


  0%|          | 4390/2000001 [42:01<318:23:48,  1.74it/s]

buffer size = 9180, epsilon = 0.09781
mean_reward :  0.0


  0%|          | 4391/2000001 [42:01<316:17:44,  1.75it/s]

buffer size = 9182, epsilon = 0.09781
mean_reward :  0.0


  0%|          | 4392/2000001 [42:02<312:32:19,  1.77it/s]

buffer size = 9184, epsilon = 0.09780
mean_reward :  0.0


  0%|          | 4393/2000001 [42:02<315:19:23,  1.76it/s]

buffer size = 9186, epsilon = 0.09780
mean_reward :  0.0


  0%|          | 4394/2000001 [42:03<311:50:53,  1.78it/s]

buffer size = 9188, epsilon = 0.09780
mean_reward :  0.0


  0%|          | 4395/2000001 [42:03<311:19:09,  1.78it/s]

buffer size = 9190, epsilon = 0.09780
mean_reward :  0.0


  0%|          | 4396/2000001 [42:04<308:03:18,  1.80it/s]

buffer size = 9192, epsilon = 0.09780
mean_reward :  0.0


  0%|          | 4397/2000001 [42:04<307:34:45,  1.80it/s]

buffer size = 9194, epsilon = 0.09780
mean_reward :  0.0


  0%|          | 4398/2000001 [42:05<309:01:30,  1.79it/s]

buffer size = 9196, epsilon = 0.09780
mean_reward :  0.0


  0%|          | 4399/2000001 [42:06<309:40:08,  1.79it/s]

buffer size = 9198, epsilon = 0.09780
mean_reward :  0.0


  0%|          | 4400/2000001 [42:06<309:13:34,  1.79it/s]

buffer size = 9200, epsilon = 0.09780
mean_reward :  0.0


  0%|          | 4401/2000001 [42:07<308:37:39,  1.80it/s]

buffer size = 9202, epsilon = 0.09780
mean_reward :  0.0


  0%|          | 4402/2000001 [42:07<348:11:54,  1.59it/s]

buffer size = 9204, epsilon = 0.09780
mean_reward :  0.0


  0%|          | 4403/2000001 [42:08<387:18:57,  1.43it/s]

buffer size = 9206, epsilon = 0.09780
mean_reward :  0.0


  0%|          | 4404/2000001 [42:09<414:02:38,  1.34it/s]

buffer size = 9208, epsilon = 0.09780
mean_reward :  0.0


  0%|          | 4405/2000001 [42:10<401:09:26,  1.38it/s]

buffer size = 9210, epsilon = 0.09780
mean_reward :  0.0


  0%|          | 4406/2000001 [42:10<375:36:05,  1.48it/s]

buffer size = 9212, epsilon = 0.09780
mean_reward :  0.0


  0%|          | 4407/2000001 [42:11<355:12:46,  1.56it/s]

buffer size = 9214, epsilon = 0.09780
mean_reward :  0.0


  0%|          | 4408/2000001 [42:12<341:31:26,  1.62it/s]

buffer size = 9216, epsilon = 0.09780
mean_reward :  0.0


  0%|          | 4409/2000001 [42:12<331:18:00,  1.67it/s]

buffer size = 9218, epsilon = 0.09780
mean_reward :  0.0


  0%|          | 4410/2000001 [42:13<322:46:23,  1.72it/s]

buffer size = 9220, epsilon = 0.09780
mean_reward :  0.0


  0%|          | 4411/2000001 [42:13<315:22:06,  1.76it/s]

buffer size = 9222, epsilon = 0.09780
mean_reward :  0.0


  0%|          | 4412/2000001 [42:14<315:41:36,  1.76it/s]

buffer size = 9224, epsilon = 0.09779
mean_reward :  0.0


  0%|          | 4413/2000001 [42:14<313:10:00,  1.77it/s]

buffer size = 9226, epsilon = 0.09779
mean_reward :  0.0


  0%|          | 4414/2000001 [42:15<309:04:29,  1.79it/s]

buffer size = 9228, epsilon = 0.09779
mean_reward :  0.0


  0%|          | 4415/2000001 [42:15<310:23:37,  1.79it/s]

buffer size = 9230, epsilon = 0.09779
mean_reward :  0.0


  0%|          | 4416/2000001 [42:16<308:54:08,  1.79it/s]

buffer size = 9232, epsilon = 0.09779
mean_reward :  0.0


  0%|          | 4417/2000001 [42:17<311:19:37,  1.78it/s]

buffer size = 9234, epsilon = 0.09779
mean_reward :  0.0


  0%|          | 4418/2000001 [42:17<312:57:59,  1.77it/s]

buffer size = 9236, epsilon = 0.09779
mean_reward :  0.0


  0%|          | 4419/2000001 [42:18<312:55:46,  1.77it/s]

buffer size = 9238, epsilon = 0.09779
mean_reward :  0.0


  0%|          | 4420/2000001 [42:18<310:30:51,  1.79it/s]

buffer size = 9240, epsilon = 0.09779
mean_reward :  0.0


  0%|          | 4421/2000001 [42:19<311:59:24,  1.78it/s]

buffer size = 9242, epsilon = 0.09779
mean_reward :  0.0


  0%|          | 4422/2000001 [42:19<310:12:37,  1.79it/s]

buffer size = 9244, epsilon = 0.09779
mean_reward :  0.0


  0%|          | 4423/2000001 [42:20<342:02:15,  1.62it/s]

buffer size = 9246, epsilon = 0.09779
mean_reward :  0.0


  0%|          | 4424/2000001 [42:21<376:58:36,  1.47it/s]

buffer size = 9248, epsilon = 0.09779
mean_reward :  0.0


  0%|          | 4425/2000001 [42:22<396:26:29,  1.40it/s]

buffer size = 9250, epsilon = 0.09779
mean_reward :  0.0


  0%|          | 4426/2000001 [42:22<409:23:14,  1.35it/s]

buffer size = 9252, epsilon = 0.09779
mean_reward :  0.0


  0%|          | 4427/2000001 [42:23<380:50:31,  1.46it/s]

buffer size = 9254, epsilon = 0.09779
mean_reward :  0.0


  0%|          | 4428/2000001 [42:24<359:48:15,  1.54it/s]

buffer size = 9256, epsilon = 0.09779
mean_reward :  0.0


  0%|          | 4429/2000001 [42:24<347:04:08,  1.60it/s]

buffer size = 9258, epsilon = 0.09779
mean_reward :  0.0


  0%|          | 4430/2000001 [42:25<336:16:58,  1.65it/s]

buffer size = 9260, epsilon = 0.09779
mean_reward :  0.0


  0%|          | 4431/2000001 [42:25<327:01:56,  1.70it/s]

buffer size = 9262, epsilon = 0.09779
mean_reward :  0.0


  0%|          | 4432/2000001 [42:26<324:45:24,  1.71it/s]

buffer size = 9264, epsilon = 0.09778
mean_reward :  0.0


  0%|          | 4433/2000001 [42:26<318:51:20,  1.74it/s]

buffer size = 9266, epsilon = 0.09778
mean_reward :  0.0


  0%|          | 4434/2000001 [42:27<317:33:08,  1.75it/s]

buffer size = 9268, epsilon = 0.09778
mean_reward :  0.0


  0%|          | 4435/2000001 [42:28<314:20:20,  1.76it/s]

buffer size = 9270, epsilon = 0.09778
mean_reward :  0.0


  0%|          | 4436/2000001 [42:28<315:37:11,  1.76it/s]

buffer size = 9272, epsilon = 0.09778
mean_reward :  0.0


  0%|          | 4437/2000001 [42:29<315:09:01,  1.76it/s]

buffer size = 9274, epsilon = 0.09778
mean_reward :  0.0


  0%|          | 4438/2000001 [42:29<314:07:06,  1.76it/s]

buffer size = 9276, epsilon = 0.09778
mean_reward :  0.0


  0%|          | 4439/2000001 [42:30<312:20:50,  1.77it/s]

buffer size = 9278, epsilon = 0.09778
mean_reward :  0.0


  0%|          | 4440/2000001 [42:30<311:53:50,  1.78it/s]

buffer size = 9280, epsilon = 0.09778
mean_reward :  0.0


  0%|          | 4441/2000001 [42:31<310:34:17,  1.78it/s]

buffer size = 9282, epsilon = 0.09778
mean_reward :  0.0


  0%|          | 4442/2000001 [42:31<312:19:03,  1.77it/s]

buffer size = 9284, epsilon = 0.09778
mean_reward :  0.0


  0%|          | 4443/2000001 [42:32<314:29:43,  1.76it/s]

buffer size = 9286, epsilon = 0.09778
mean_reward :  0.0


  0%|          | 4444/2000001 [42:33<332:03:48,  1.67it/s]

buffer size = 9288, epsilon = 0.09778
mean_reward :  0.0


  0%|          | 4445/2000001 [42:34<365:18:36,  1.52it/s]

buffer size = 9290, epsilon = 0.09778
mean_reward :  0.0


  0%|          | 4446/2000001 [42:34<386:13:54,  1.44it/s]

buffer size = 9292, epsilon = 0.09778
mean_reward :  0.0


  0%|          | 4447/2000001 [42:35<410:44:28,  1.35it/s]

buffer size = 9294, epsilon = 0.09778
mean_reward :  0.0


  0%|          | 4448/2000001 [42:36<389:03:38,  1.42it/s]

buffer size = 9296, epsilon = 0.09778
mean_reward :  0.0


  0%|          | 4449/2000001 [42:36<366:28:52,  1.51it/s]

buffer size = 9298, epsilon = 0.09778
mean_reward :  0.0


  0%|          | 4450/2000001 [42:37<344:59:15,  1.61it/s]

buffer size = 9300, epsilon = 0.09778
mean_reward :  0.0


  0%|          | 4451/2000001 [42:37<335:21:57,  1.65it/s]

buffer size = 9302, epsilon = 0.09778
mean_reward :  0.0


  0%|          | 4452/2000001 [42:38<324:20:28,  1.71it/s]

buffer size = 9304, epsilon = 0.09777
mean_reward :  0.0


  0%|          | 4453/2000001 [42:39<317:18:34,  1.75it/s]

buffer size = 9306, epsilon = 0.09777
mean_reward :  0.0


  0%|          | 4454/2000001 [42:39<313:11:32,  1.77it/s]

buffer size = 9308, epsilon = 0.09777
mean_reward :  0.0


  0%|          | 4455/2000001 [42:40<310:42:46,  1.78it/s]

buffer size = 9310, epsilon = 0.09777
mean_reward :  0.0


  0%|          | 4456/2000001 [42:40<309:39:10,  1.79it/s]

buffer size = 9312, epsilon = 0.09777
mean_reward :  0.0


  0%|          | 4457/2000001 [42:41<313:09:25,  1.77it/s]

buffer size = 9314, epsilon = 0.09777
mean_reward :  0.0


  0%|          | 4458/2000001 [42:41<310:50:57,  1.78it/s]

buffer size = 9316, epsilon = 0.09777
mean_reward :  0.0


  0%|          | 4459/2000001 [42:42<309:09:30,  1.79it/s]

buffer size = 9318, epsilon = 0.09777
mean_reward :  0.0


  0%|          | 4460/2000001 [42:42<306:10:14,  1.81it/s]

buffer size = 9320, epsilon = 0.09777
mean_reward :  0.0


  0%|          | 4461/2000001 [42:43<305:07:55,  1.82it/s]

buffer size = 9322, epsilon = 0.09777
mean_reward :  0.0


  0%|          | 4462/2000001 [42:43<303:02:09,  1.83it/s]

buffer size = 9324, epsilon = 0.09777
mean_reward :  0.0


  0%|          | 4463/2000001 [42:44<304:04:16,  1.82it/s]

buffer size = 9326, epsilon = 0.09777
mean_reward :  0.0


  0%|          | 4464/2000001 [42:45<300:58:48,  1.84it/s]

buffer size = 9328, epsilon = 0.09777
mean_reward :  0.0


  0%|          | 4465/2000001 [42:45<302:59:54,  1.83it/s]

buffer size = 9330, epsilon = 0.09777
mean_reward :  0.0


  0%|          | 4466/2000001 [42:46<337:55:02,  1.64it/s]

buffer size = 9332, epsilon = 0.09777
mean_reward :  0.0


  0%|          | 4467/2000001 [42:47<362:27:59,  1.53it/s]

buffer size = 9334, epsilon = 0.09777
mean_reward :  0.0


  0%|          | 4468/2000001 [42:47<385:31:39,  1.44it/s]

buffer size = 9336, epsilon = 0.09777
mean_reward :  0.0


  0%|          | 4469/2000001 [42:48<408:36:52,  1.36it/s]

buffer size = 9338, epsilon = 0.09777
mean_reward :  0.0


  0%|          | 4470/2000001 [42:49<385:20:05,  1.44it/s]

buffer size = 9340, epsilon = 0.09777
mean_reward :  0.0


  0%|          | 4471/2000001 [42:49<365:07:01,  1.52it/s]

buffer size = 9342, epsilon = 0.09777
mean_reward :  0.0


  0%|          | 4472/2000001 [42:50<351:57:02,  1.57it/s]

buffer size = 9344, epsilon = 0.09776
mean_reward :  0.0


  0%|          | 4473/2000001 [42:51<340:17:07,  1.63it/s]

buffer size = 9346, epsilon = 0.09776
mean_reward :  0.0


  0%|          | 4474/2000001 [42:51<335:21:17,  1.65it/s]

buffer size = 9348, epsilon = 0.09776
mean_reward :  0.0


  0%|          | 4475/2000001 [42:52<330:40:29,  1.68it/s]

buffer size = 9350, epsilon = 0.09776
mean_reward :  0.0


  0%|          | 4476/2000001 [42:52<328:55:59,  1.69it/s]

buffer size = 9352, epsilon = 0.09776
mean_reward :  0.0


  0%|          | 4477/2000001 [42:53<322:19:33,  1.72it/s]

buffer size = 9354, epsilon = 0.09776
mean_reward :  0.0


  0%|          | 4478/2000001 [42:53<322:05:22,  1.72it/s]

buffer size = 9356, epsilon = 0.09776
mean_reward :  0.0


  0%|          | 4479/2000001 [42:54<322:28:55,  1.72it/s]

buffer size = 9358, epsilon = 0.09776
mean_reward :  0.0


  0%|          | 4480/2000001 [42:55<321:59:14,  1.72it/s]

buffer size = 9360, epsilon = 0.09776
mean_reward :  0.0


  0%|          | 4481/2000001 [42:55<318:48:06,  1.74it/s]

buffer size = 9362, epsilon = 0.09776
mean_reward :  0.0


  0%|          | 4482/2000001 [42:56<343:28:43,  1.61it/s]

buffer size = 9364, epsilon = 0.09776
mean_reward :  0.0


  0%|          | 4483/2000001 [42:56<338:00:46,  1.64it/s]

buffer size = 9366, epsilon = 0.09776
mean_reward :  0.0


  0%|          | 4484/2000001 [42:57<330:44:37,  1.68it/s]

buffer size = 9368, epsilon = 0.09776
mean_reward :  0.0


  0%|          | 4485/2000001 [42:58<325:27:25,  1.70it/s]

buffer size = 9370, epsilon = 0.09776
mean_reward :  0.0


  0%|          | 4486/2000001 [42:58<324:40:04,  1.71it/s]

buffer size = 9372, epsilon = 0.09776
mean_reward :  0.0


  0%|          | 4487/2000001 [42:59<355:43:13,  1.56it/s]

buffer size = 9374, epsilon = 0.09776
mean_reward :  0.0


  0%|          | 4488/2000001 [43:00<390:15:24,  1.42it/s]

buffer size = 9376, epsilon = 0.09776
mean_reward :  0.0


  0%|          | 4489/2000001 [43:01<416:53:12,  1.33it/s]

buffer size = 9378, epsilon = 0.09776
mean_reward :  0.0


  0%|          | 4490/2000001 [43:01<414:05:26,  1.34it/s]

buffer size = 9380, epsilon = 0.09776
mean_reward :  0.0


  0%|          | 4491/2000001 [43:02<385:56:02,  1.44it/s]

buffer size = 9382, epsilon = 0.09776
mean_reward :  0.0


  0%|          | 4492/2000001 [43:03<364:51:49,  1.52it/s]

buffer size = 9384, epsilon = 0.09775
mean_reward :  0.0


  0%|          | 4493/2000001 [43:03<349:00:04,  1.59it/s]

buffer size = 9386, epsilon = 0.09775
mean_reward :  0.0


  0%|          | 4494/2000001 [43:04<339:48:02,  1.63it/s]

buffer size = 9388, epsilon = 0.09775
mean_reward :  0.0


  0%|          | 4495/2000001 [43:04<333:55:27,  1.66it/s]

buffer size = 9390, epsilon = 0.09775
mean_reward :  0.0


  0%|          | 4496/2000001 [43:05<333:48:47,  1.66it/s]

buffer size = 9392, epsilon = 0.09775
mean_reward :  0.0


  0%|          | 4497/2000001 [43:05<324:23:01,  1.71it/s]

buffer size = 9394, epsilon = 0.09775
mean_reward :  0.0


  0%|          | 4498/2000001 [43:06<323:59:42,  1.71it/s]

buffer size = 9396, epsilon = 0.09775
mean_reward :  0.0


  0%|          | 4499/2000001 [43:07<322:17:01,  1.72it/s]

buffer size = 9398, epsilon = 0.09775
mean_reward :  0.0


  0%|          | 4500/2000001 [43:07<316:06:37,  1.75it/s]

buffer size = 9400, epsilon = 0.09775
mean_reward :  0.0


  0%|          | 4501/2000001 [43:08<313:24:20,  1.77it/s]

buffer size = 9402, epsilon = 0.09775
mean_reward :  0.0


  0%|          | 4502/2000001 [43:08<314:20:48,  1.76it/s]

buffer size = 9404, epsilon = 0.09775
mean_reward :  0.0


  0%|          | 4503/2000001 [43:09<314:09:16,  1.76it/s]

buffer size = 9406, epsilon = 0.09775
mean_reward :  0.0


  0%|          | 4504/2000001 [43:09<314:08:06,  1.76it/s]

buffer size = 9408, epsilon = 0.09775
mean_reward :  0.0


  0%|          | 4505/2000001 [43:10<316:39:24,  1.75it/s]

buffer size = 9410, epsilon = 0.09775
mean_reward :  0.0


  0%|          | 4506/2000001 [43:11<315:35:35,  1.76it/s]

buffer size = 9412, epsilon = 0.09775
mean_reward :  0.0


  0%|          | 4507/2000001 [43:11<317:24:59,  1.75it/s]

buffer size = 9414, epsilon = 0.09775
mean_reward :  0.0


  0%|          | 4508/2000001 [43:12<352:33:22,  1.57it/s]

buffer size = 9416, epsilon = 0.09775
mean_reward :  0.0


  0%|          | 4509/2000001 [43:13<384:45:04,  1.44it/s]

buffer size = 9418, epsilon = 0.09775
mean_reward :  0.0


  0%|          | 4510/2000001 [43:14<408:41:36,  1.36it/s]

buffer size = 9420, epsilon = 0.09775
mean_reward :  0.0


  0%|          | 4511/2000001 [43:14<407:02:07,  1.36it/s]

buffer size = 9422, epsilon = 0.09774
mean_reward :  0.0


  0%|          | 4512/2000001 [43:15<376:47:59,  1.47it/s]

buffer size = 9424, epsilon = 0.09774
mean_reward :  0.0


  0%|          | 4513/2000001 [43:15<358:29:54,  1.55it/s]

buffer size = 9426, epsilon = 0.09774
mean_reward :  0.0


  0%|          | 4514/2000001 [43:16<344:28:18,  1.61it/s]

buffer size = 9428, epsilon = 0.09774
mean_reward :  0.0


  0%|          | 4515/2000001 [43:17<335:58:27,  1.65it/s]

buffer size = 9430, epsilon = 0.09774
mean_reward :  0.0


  0%|          | 4516/2000001 [43:17<329:28:49,  1.68it/s]

buffer size = 9432, epsilon = 0.09774
mean_reward :  0.0


  0%|          | 4517/2000001 [43:18<323:36:37,  1.71it/s]

buffer size = 9434, epsilon = 0.09774
mean_reward :  0.0


  0%|          | 4518/2000001 [43:18<318:27:24,  1.74it/s]

buffer size = 9436, epsilon = 0.09774
mean_reward :  0.0


  0%|          | 4519/2000001 [43:19<314:06:36,  1.76it/s]

buffer size = 9438, epsilon = 0.09774
mean_reward :  0.0


  0%|          | 4520/2000001 [43:19<314:19:33,  1.76it/s]

buffer size = 9440, epsilon = 0.09774
mean_reward :  0.0


  0%|          | 4521/2000001 [43:20<316:33:15,  1.75it/s]

buffer size = 9442, epsilon = 0.09774
mean_reward :  0.0


  0%|          | 4522/2000001 [43:20<314:23:05,  1.76it/s]

buffer size = 9444, epsilon = 0.09774
mean_reward :  0.0


  0%|          | 4523/2000001 [43:21<313:14:01,  1.77it/s]

buffer size = 9446, epsilon = 0.09774
mean_reward :  0.0


  0%|          | 4524/2000001 [43:22<313:36:36,  1.77it/s]

buffer size = 9448, epsilon = 0.09774
mean_reward :  0.0


  0%|          | 4525/2000001 [43:22<318:05:23,  1.74it/s]

buffer size = 9450, epsilon = 0.09774
mean_reward :  0.0


  0%|          | 4526/2000001 [43:23<317:46:27,  1.74it/s]

buffer size = 9452, epsilon = 0.09774
mean_reward :  0.0


  0%|          | 4527/2000001 [43:23<313:43:56,  1.77it/s]

buffer size = 9454, epsilon = 0.09774
mean_reward :  0.0


  0%|          | 4528/2000001 [43:24<318:36:29,  1.74it/s]

buffer size = 9456, epsilon = 0.09774
mean_reward :  0.0


  0%|          | 4529/2000001 [43:25<349:04:44,  1.59it/s]

buffer size = 9458, epsilon = 0.09774
mean_reward :  0.0


  0%|          | 4530/2000001 [43:25<376:32:10,  1.47it/s]

buffer size = 9460, epsilon = 0.09774
mean_reward :  0.0


  0%|          | 4531/2000001 [43:26<400:05:10,  1.39it/s]

buffer size = 9462, epsilon = 0.09774
mean_reward :  0.0


  0%|          | 4532/2000001 [43:27<415:42:41,  1.33it/s]

buffer size = 9464, epsilon = 0.09773
mean_reward :  0.0


  0%|          | 4533/2000001 [43:28<384:41:36,  1.44it/s]

buffer size = 9466, epsilon = 0.09773
mean_reward :  0.0


  0%|          | 4534/2000001 [43:28<367:04:11,  1.51it/s]

buffer size = 9468, epsilon = 0.09773
mean_reward :  0.0


  0%|          | 4535/2000001 [43:29<352:25:31,  1.57it/s]

buffer size = 9470, epsilon = 0.09773
mean_reward :  0.0


  0%|          | 4536/2000001 [43:29<340:09:45,  1.63it/s]

buffer size = 9472, epsilon = 0.09773
mean_reward :  0.0


  0%|          | 4537/2000001 [43:30<332:26:16,  1.67it/s]

buffer size = 9474, epsilon = 0.09773
mean_reward :  0.0


  0%|          | 4538/2000001 [43:31<326:38:41,  1.70it/s]

buffer size = 9476, epsilon = 0.09773
mean_reward :  0.0


  0%|          | 4539/2000001 [43:31<324:05:01,  1.71it/s]

buffer size = 9478, epsilon = 0.09773
mean_reward :  0.0


  0%|          | 4540/2000001 [43:32<318:50:01,  1.74it/s]

buffer size = 9480, epsilon = 0.09773
mean_reward :  0.0


  0%|          | 4541/2000001 [43:32<322:56:48,  1.72it/s]

buffer size = 9482, epsilon = 0.09773
mean_reward :  0.0


  0%|          | 4542/2000001 [43:33<320:29:58,  1.73it/s]

buffer size = 9484, epsilon = 0.09773
mean_reward :  0.0


  0%|          | 4543/2000001 [43:33<320:51:39,  1.73it/s]

buffer size = 9486, epsilon = 0.09773
mean_reward :  0.0


  0%|          | 4544/2000001 [43:34<316:22:38,  1.75it/s]

buffer size = 9488, epsilon = 0.09773
mean_reward :  0.0


  0%|          | 4545/2000001 [43:35<318:14:02,  1.74it/s]

buffer size = 9490, epsilon = 0.09773
mean_reward :  0.0


  0%|          | 4546/2000001 [43:35<317:41:39,  1.74it/s]

buffer size = 9492, epsilon = 0.09773
mean_reward :  0.0


  0%|          | 4547/2000001 [43:36<313:56:57,  1.77it/s]

buffer size = 9494, epsilon = 0.09773
mean_reward :  0.0


  0%|          | 4548/2000001 [43:36<316:42:09,  1.75it/s]

buffer size = 9496, epsilon = 0.09773
mean_reward :  0.0


  0%|          | 4549/2000001 [43:37<315:19:02,  1.76it/s]

buffer size = 9498, epsilon = 0.09773
mean_reward :  0.0


  0%|          | 4550/2000001 [43:38<349:02:28,  1.59it/s]

buffer size = 9500, epsilon = 0.09773
mean_reward :  0.0


  0%|          | 4551/2000001 [43:38<379:23:51,  1.46it/s]

buffer size = 9502, epsilon = 0.09773
mean_reward :  0.0


  0%|          | 4552/2000001 [43:39<397:46:15,  1.39it/s]

buffer size = 9504, epsilon = 0.09772
mean_reward :  0.0


  0%|          | 4553/2000001 [43:40<420:07:08,  1.32it/s]

buffer size = 9506, epsilon = 0.09772
mean_reward :  0.0


  0%|          | 4554/2000001 [43:41<388:26:13,  1.43it/s]

buffer size = 9508, epsilon = 0.09772
mean_reward :  0.0


  0%|          | 4555/2000001 [43:41<364:14:31,  1.52it/s]

buffer size = 9510, epsilon = 0.09772
mean_reward :  0.0


  0%|          | 4556/2000001 [43:42<351:11:26,  1.58it/s]

buffer size = 9512, epsilon = 0.09772
mean_reward :  0.0


  0%|          | 4557/2000001 [43:42<339:54:58,  1.63it/s]

buffer size = 9514, epsilon = 0.09772
mean_reward :  0.0


  0%|          | 4558/2000001 [43:43<331:26:46,  1.67it/s]

buffer size = 9516, epsilon = 0.09772
mean_reward :  0.0


  0%|          | 4559/2000001 [43:43<323:16:21,  1.71it/s]

buffer size = 9518, epsilon = 0.09772
mean_reward :  0.0


  0%|          | 4560/2000001 [43:44<321:59:44,  1.72it/s]

buffer size = 9520, epsilon = 0.09772
mean_reward :  0.0


  0%|          | 4561/2000001 [43:45<317:03:32,  1.75it/s]

buffer size = 9522, epsilon = 0.09772
mean_reward :  0.0


  0%|          | 4562/2000001 [43:45<315:50:07,  1.75it/s]

buffer size = 9524, epsilon = 0.09772
mean_reward :  0.0


  0%|          | 4563/2000001 [43:46<315:09:49,  1.76it/s]

buffer size = 9526, epsilon = 0.09772
mean_reward :  0.0


  0%|          | 4564/2000001 [43:46<313:01:39,  1.77it/s]

buffer size = 9528, epsilon = 0.09772
mean_reward :  0.0


  0%|          | 4565/2000001 [43:47<311:14:15,  1.78it/s]

buffer size = 9530, epsilon = 0.09772
mean_reward :  0.0


  0%|          | 4566/2000001 [43:47<310:40:19,  1.78it/s]

buffer size = 9532, epsilon = 0.09772
mean_reward :  0.0


  0%|          | 4567/2000001 [43:48<309:54:12,  1.79it/s]

buffer size = 9534, epsilon = 0.09772
mean_reward :  0.0


  0%|          | 4568/2000001 [43:48<310:09:32,  1.79it/s]

buffer size = 9536, epsilon = 0.09772
mean_reward :  0.0


  0%|          | 4569/2000001 [43:49<313:37:11,  1.77it/s]

buffer size = 9538, epsilon = 0.09772
mean_reward :  0.0


  0%|          | 4570/2000001 [43:50<313:57:26,  1.77it/s]

buffer size = 9540, epsilon = 0.09772
mean_reward :  0.0


  0%|          | 4571/2000001 [43:50<337:33:49,  1.64it/s]

buffer size = 9542, epsilon = 0.09772
mean_reward :  0.0


  0%|          | 4572/2000001 [43:51<368:23:13,  1.50it/s]

buffer size = 9544, epsilon = 0.09771
mean_reward :  0.0


  0%|          | 4573/2000001 [43:52<390:11:29,  1.42it/s]

buffer size = 9546, epsilon = 0.09771
mean_reward :  0.0


  0%|          | 4574/2000001 [43:53<408:44:00,  1.36it/s]

buffer size = 9548, epsilon = 0.09771
mean_reward :  0.0


  0%|          | 4575/2000001 [43:53<384:10:41,  1.44it/s]

buffer size = 9550, epsilon = 0.09771
mean_reward :  0.0


  0%|          | 4576/2000001 [43:54<361:27:07,  1.53it/s]

buffer size = 9552, epsilon = 0.09771
mean_reward :  0.0


  0%|          | 4577/2000001 [43:54<348:28:17,  1.59it/s]

buffer size = 9554, epsilon = 0.09771
mean_reward :  0.0


  0%|          | 4578/2000001 [43:55<336:31:54,  1.65it/s]

buffer size = 9556, epsilon = 0.09771
mean_reward :  0.0


  0%|          | 4579/2000001 [43:56<328:18:23,  1.69it/s]

buffer size = 9558, epsilon = 0.09771
mean_reward :  0.0


  0%|          | 4580/2000001 [43:56<323:02:39,  1.72it/s]

buffer size = 9560, epsilon = 0.09771
mean_reward :  0.0


  0%|          | 4581/2000001 [43:57<320:15:46,  1.73it/s]

buffer size = 9562, epsilon = 0.09771
mean_reward :  0.0


  0%|          | 4582/2000001 [43:57<319:57:09,  1.73it/s]

buffer size = 9564, epsilon = 0.09771
mean_reward :  0.0


  0%|          | 4583/2000001 [43:58<318:02:49,  1.74it/s]

buffer size = 9566, epsilon = 0.09771
mean_reward :  0.0


  0%|          | 4584/2000001 [43:58<317:29:31,  1.75it/s]

buffer size = 9568, epsilon = 0.09771
mean_reward :  0.0


  0%|          | 4585/2000001 [43:59<314:07:30,  1.76it/s]

buffer size = 9570, epsilon = 0.09771
mean_reward :  0.0


  0%|          | 4586/2000001 [44:00<316:46:08,  1.75it/s]

buffer size = 9572, epsilon = 0.09771
mean_reward :  0.0


  0%|          | 4587/2000001 [44:00<316:30:41,  1.75it/s]

buffer size = 9574, epsilon = 0.09771
mean_reward :  0.0


  0%|          | 4588/2000001 [44:01<315:50:39,  1.75it/s]

buffer size = 9576, epsilon = 0.09771
mean_reward :  0.0


  0%|          | 4589/2000001 [44:01<313:34:48,  1.77it/s]

buffer size = 9578, epsilon = 0.09771
mean_reward :  0.0


  0%|          | 4590/2000001 [44:02<315:37:47,  1.76it/s]

buffer size = 9580, epsilon = 0.09771
mean_reward :  0.0


  0%|          | 4591/2000001 [44:02<317:17:00,  1.75it/s]

buffer size = 9582, epsilon = 0.09771
mean_reward :  0.0


  0%|          | 4592/2000001 [44:03<316:52:20,  1.75it/s]

buffer size = 9584, epsilon = 0.09770
mean_reward :  0.0


  0%|          | 4593/2000001 [44:04<356:04:59,  1.56it/s]

buffer size = 9586, epsilon = 0.09770
mean_reward :  0.0


  0%|          | 4594/2000001 [44:05<400:01:18,  1.39it/s]

buffer size = 9588, epsilon = 0.09770
mean_reward :  0.0


  0%|          | 4595/2000001 [44:06<419:56:52,  1.32it/s]

buffer size = 9590, epsilon = 0.09770
mean_reward :  0.0


  0%|          | 4596/2000001 [44:06<398:47:06,  1.39it/s]

buffer size = 9592, epsilon = 0.09770
mean_reward :  0.0


  0%|          | 4597/2000001 [44:07<378:00:53,  1.47it/s]

buffer size = 9594, epsilon = 0.09770
mean_reward :  0.0


  0%|          | 4598/2000001 [44:07<356:21:34,  1.56it/s]

buffer size = 9596, epsilon = 0.09770
mean_reward :  0.0


  0%|          | 4599/2000001 [44:08<341:57:36,  1.62it/s]

buffer size = 9598, epsilon = 0.09770
mean_reward :  0.0


  0%|          | 4600/2000001 [44:08<330:09:42,  1.68it/s]

buffer size = 9600, epsilon = 0.09770
mean_reward :  0.0


  0%|          | 4601/2000001 [44:09<329:04:09,  1.68it/s]

buffer size = 9602, epsilon = 0.09770
mean_reward :  0.0


  0%|          | 4602/2000001 [44:10<323:40:52,  1.71it/s]

buffer size = 9604, epsilon = 0.09770
mean_reward :  0.0


  0%|          | 4603/2000001 [44:10<322:52:28,  1.72it/s]

buffer size = 9606, epsilon = 0.09770
mean_reward :  0.0


  0%|          | 4604/2000001 [44:11<315:50:32,  1.75it/s]

buffer size = 9608, epsilon = 0.09770
mean_reward :  0.0


  0%|          | 4605/2000001 [44:11<316:00:47,  1.75it/s]

buffer size = 9610, epsilon = 0.09770
mean_reward :  0.0


  0%|          | 4606/2000001 [44:12<315:18:24,  1.76it/s]

buffer size = 9612, epsilon = 0.09770
mean_reward :  0.0


  0%|          | 4607/2000001 [44:12<313:45:14,  1.77it/s]

buffer size = 9614, epsilon = 0.09770
mean_reward :  0.0


  0%|          | 4608/2000001 [44:13<315:58:12,  1.75it/s]

buffer size = 9616, epsilon = 0.09770
mean_reward :  0.0


  0%|          | 4609/2000001 [44:13<312:44:23,  1.77it/s]

buffer size = 9618, epsilon = 0.09770
mean_reward :  0.0


  0%|          | 4610/2000001 [44:14<314:02:24,  1.76it/s]

buffer size = 9620, epsilon = 0.09770
mean_reward :  0.0


  0%|          | 4611/2000001 [44:15<312:43:27,  1.77it/s]

buffer size = 9622, epsilon = 0.09770
mean_reward :  0.0


  0%|          | 4612/2000001 [44:15<314:35:03,  1.76it/s]

buffer size = 9624, epsilon = 0.09769
mean_reward :  0.0


  0%|          | 4613/2000001 [44:16<313:16:29,  1.77it/s]

buffer size = 9626, epsilon = 0.09769
mean_reward :  0.0


  0%|          | 4614/2000001 [44:17<359:51:47,  1.54it/s]

buffer size = 9628, epsilon = 0.09769
mean_reward :  0.0


  0%|          | 4615/2000001 [44:17<390:46:37,  1.42it/s]

buffer size = 9630, epsilon = 0.09769
mean_reward :  0.0


  0%|          | 4616/2000001 [44:18<425:29:18,  1.30it/s]

buffer size = 9632, epsilon = 0.09769
mean_reward :  0.0


  0%|          | 4617/2000001 [44:19<406:10:18,  1.36it/s]

buffer size = 9634, epsilon = 0.09769
mean_reward :  0.0


  0%|          | 4618/2000001 [44:20<386:13:03,  1.44it/s]

buffer size = 9636, epsilon = 0.09769
mean_reward :  0.0


  0%|          | 4619/2000001 [44:20<364:51:56,  1.52it/s]

buffer size = 9638, epsilon = 0.09769
mean_reward :  0.0


  0%|          | 4620/2000001 [44:21<352:54:30,  1.57it/s]

buffer size = 9640, epsilon = 0.09769
mean_reward :  0.0


  0%|          | 4621/2000001 [44:21<341:32:07,  1.62it/s]

buffer size = 9642, epsilon = 0.09769
mean_reward :  0.0


  0%|          | 4622/2000001 [44:22<332:51:10,  1.67it/s]

buffer size = 9644, epsilon = 0.09769
mean_reward :  0.0


  0%|          | 4623/2000001 [44:22<331:42:46,  1.67it/s]

buffer size = 9646, epsilon = 0.09769
mean_reward :  0.0


  0%|          | 4624/2000001 [44:23<329:08:18,  1.68it/s]

buffer size = 9648, epsilon = 0.09769
mean_reward :  0.0


  0%|          | 4625/2000001 [44:24<326:28:26,  1.70it/s]

buffer size = 9650, epsilon = 0.09769
mean_reward :  0.0


  0%|          | 4626/2000001 [44:24<323:56:18,  1.71it/s]

buffer size = 9652, epsilon = 0.09769
mean_reward :  0.0


  0%|          | 4627/2000001 [44:25<324:16:17,  1.71it/s]

buffer size = 9654, epsilon = 0.09769
mean_reward :  0.0


  0%|          | 4628/2000001 [44:25<321:16:49,  1.73it/s]

buffer size = 9656, epsilon = 0.09769
mean_reward :  0.0


  0%|          | 4629/2000001 [44:26<317:27:57,  1.75it/s]

buffer size = 9658, epsilon = 0.09769
mean_reward :  0.0


  0%|          | 4630/2000001 [44:26<316:23:23,  1.75it/s]

buffer size = 9660, epsilon = 0.09769
mean_reward :  0.0


  0%|          | 4631/2000001 [44:27<319:21:00,  1.74it/s]

buffer size = 9662, epsilon = 0.09769
mean_reward :  0.0


  0%|          | 4632/2000001 [44:28<318:14:51,  1.74it/s]

buffer size = 9664, epsilon = 0.09768
mean_reward :  0.0


  0%|          | 4633/2000001 [44:28<320:06:14,  1.73it/s]

buffer size = 9666, epsilon = 0.09768
mean_reward :  0.0


  0%|          | 4634/2000001 [44:29<343:07:01,  1.62it/s]

buffer size = 9668, epsilon = 0.09768
mean_reward :  0.0


  0%|          | 4635/2000001 [44:30<379:37:28,  1.46it/s]

buffer size = 9670, epsilon = 0.09768
mean_reward :  0.0


  0%|          | 4636/2000001 [44:31<393:57:18,  1.41it/s]

buffer size = 9672, epsilon = 0.09768
mean_reward :  0.0


  0%|          | 4637/2000001 [44:31<415:50:18,  1.33it/s]

buffer size = 9674, epsilon = 0.09768
mean_reward :  0.0


  0%|          | 4638/2000001 [44:32<393:36:29,  1.41it/s]

buffer size = 9676, epsilon = 0.09768
mean_reward :  0.0


  0%|          | 4639/2000001 [44:33<370:24:14,  1.50it/s]

buffer size = 9678, epsilon = 0.09768
mean_reward :  0.0


  0%|          | 4640/2000001 [44:33<355:30:35,  1.56it/s]

buffer size = 9680, epsilon = 0.09768
mean_reward :  0.0


  0%|          | 4641/2000001 [44:34<341:51:12,  1.62it/s]

buffer size = 9682, epsilon = 0.09768
mean_reward :  0.0


  0%|          | 4642/2000001 [44:34<335:11:58,  1.65it/s]

buffer size = 9684, epsilon = 0.09768
mean_reward :  0.0


  0%|          | 4643/2000001 [44:35<328:29:34,  1.69it/s]

buffer size = 9686, epsilon = 0.09768
mean_reward :  0.0


  0%|          | 4644/2000001 [44:35<323:51:38,  1.71it/s]

buffer size = 9688, epsilon = 0.09768
mean_reward :  0.0


  0%|          | 4645/2000001 [44:36<321:24:35,  1.72it/s]

buffer size = 9690, epsilon = 0.09768
mean_reward :  0.0


  0%|          | 4646/2000001 [44:37<319:24:24,  1.74it/s]

buffer size = 9692, epsilon = 0.09768
mean_reward :  0.0


  0%|          | 4647/2000001 [44:37<315:12:22,  1.76it/s]

buffer size = 9694, epsilon = 0.09768
mean_reward :  0.0


  0%|          | 4648/2000001 [44:38<313:56:45,  1.77it/s]

buffer size = 9696, epsilon = 0.09768
mean_reward :  0.0


  0%|          | 4649/2000001 [44:38<316:35:29,  1.75it/s]

buffer size = 9698, epsilon = 0.09768
mean_reward :  0.0


  0%|          | 4650/2000001 [44:39<318:52:02,  1.74it/s]

buffer size = 9700, epsilon = 0.09768
mean_reward :  0.0


  0%|          | 4651/2000001 [44:39<323:25:36,  1.71it/s]

buffer size = 9702, epsilon = 0.09768
mean_reward :  0.0


  0%|          | 4652/2000001 [44:40<320:37:35,  1.73it/s]

buffer size = 9704, epsilon = 0.09767
mean_reward :  0.0


  0%|          | 4653/2000001 [44:41<321:33:20,  1.72it/s]

buffer size = 9706, epsilon = 0.09767
mean_reward :  0.0


  0%|          | 4654/2000001 [44:41<318:50:44,  1.74it/s]

buffer size = 9708, epsilon = 0.09767
mean_reward :  0.0


  0%|          | 4655/2000001 [44:42<339:48:50,  1.63it/s]

buffer size = 9710, epsilon = 0.09767
mean_reward :  0.0


  0%|          | 4656/2000001 [44:43<379:46:06,  1.46it/s]

buffer size = 9712, epsilon = 0.09767
mean_reward :  0.0


  0%|          | 4657/2000001 [44:44<397:37:46,  1.39it/s]

buffer size = 9714, epsilon = 0.09767
mean_reward :  0.0


  0%|          | 4658/2000001 [44:44<419:05:49,  1.32it/s]

buffer size = 9716, epsilon = 0.09767
mean_reward :  0.0


  0%|          | 4659/2000001 [44:45<387:15:48,  1.43it/s]

buffer size = 9718, epsilon = 0.09767
mean_reward :  0.0


  0%|          | 4660/2000001 [44:46<365:59:58,  1.51it/s]

buffer size = 9720, epsilon = 0.09767
mean_reward :  0.0


  0%|          | 4661/2000001 [44:46<354:17:56,  1.56it/s]

buffer size = 9722, epsilon = 0.09767
mean_reward :  0.0


  0%|          | 4662/2000001 [44:47<344:34:27,  1.61it/s]

buffer size = 9724, epsilon = 0.09767
mean_reward :  0.0


  0%|          | 4663/2000001 [44:47<334:04:04,  1.66it/s]

buffer size = 9726, epsilon = 0.09767
mean_reward :  0.0


  0%|          | 4664/2000001 [44:48<332:51:54,  1.67it/s]

buffer size = 9728, epsilon = 0.09767
mean_reward :  0.0


  0%|          | 4665/2000001 [44:48<326:02:35,  1.70it/s]

buffer size = 9730, epsilon = 0.09767
mean_reward :  0.0


  0%|          | 4666/2000001 [44:49<326:15:57,  1.70it/s]

buffer size = 9732, epsilon = 0.09767
mean_reward :  0.0


  0%|          | 4667/2000001 [44:50<325:12:22,  1.70it/s]

buffer size = 9734, epsilon = 0.09767
mean_reward :  0.0


  0%|          | 4668/2000001 [44:50<323:32:40,  1.71it/s]

buffer size = 9736, epsilon = 0.09767
mean_reward :  0.0


  0%|          | 4669/2000001 [44:51<318:35:24,  1.74it/s]

buffer size = 9738, epsilon = 0.09767
mean_reward :  0.0


  0%|          | 4670/2000001 [44:51<316:33:25,  1.75it/s]

buffer size = 9740, epsilon = 0.09767
mean_reward :  0.0


  0%|          | 4671/2000001 [44:52<316:08:59,  1.75it/s]

buffer size = 9742, epsilon = 0.09767
mean_reward :  0.0


  0%|          | 4672/2000001 [44:52<319:45:13,  1.73it/s]

buffer size = 9744, epsilon = 0.09766
mean_reward :  0.0


  0%|          | 4673/2000001 [44:53<318:59:12,  1.74it/s]

buffer size = 9746, epsilon = 0.09766
mean_reward :  0.0


  0%|          | 4674/2000001 [44:54<317:39:28,  1.74it/s]

buffer size = 9748, epsilon = 0.09766
mean_reward :  0.0


  0%|          | 4675/2000001 [44:54<319:02:35,  1.74it/s]

buffer size = 9750, epsilon = 0.09766
mean_reward :  0.0


  0%|          | 4676/2000001 [44:55<354:46:36,  1.56it/s]

buffer size = 9752, epsilon = 0.09766
mean_reward :  0.0


  0%|          | 4677/2000001 [44:56<382:59:41,  1.45it/s]

buffer size = 9754, epsilon = 0.09766
mean_reward :  0.0


  0%|          | 4678/2000001 [44:57<405:52:41,  1.37it/s]

buffer size = 9756, epsilon = 0.09766
mean_reward :  0.0


  0%|          | 4679/2000001 [44:57<406:03:55,  1.36it/s]

buffer size = 9758, epsilon = 0.09766
mean_reward :  0.0


  0%|          | 4680/2000001 [44:58<376:10:53,  1.47it/s]

buffer size = 9760, epsilon = 0.09766
mean_reward :  0.0


  0%|          | 4681/2000001 [44:58<354:10:24,  1.56it/s]

buffer size = 9762, epsilon = 0.09766
mean_reward :  0.0


  0%|          | 4682/2000001 [44:59<337:48:45,  1.64it/s]

buffer size = 9764, epsilon = 0.09766
mean_reward :  0.0


  0%|          | 4683/2000001 [45:00<333:12:29,  1.66it/s]

buffer size = 9766, epsilon = 0.09766
mean_reward :  0.0


  0%|          | 4684/2000001 [45:00<329:37:26,  1.68it/s]

buffer size = 9768, epsilon = 0.09766
mean_reward :  0.0


  0%|          | 4685/2000001 [45:01<325:50:45,  1.70it/s]

buffer size = 9770, epsilon = 0.09766
mean_reward :  0.0


  0%|          | 4686/2000001 [45:01<321:19:43,  1.72it/s]

buffer size = 9772, epsilon = 0.09766
mean_reward :  0.0


  0%|          | 4687/2000001 [45:02<320:11:15,  1.73it/s]

buffer size = 9774, epsilon = 0.09766
mean_reward :  0.0


  0%|          | 4688/2000001 [45:02<321:57:32,  1.72it/s]

buffer size = 9776, epsilon = 0.09766
mean_reward :  0.0


  0%|          | 4689/2000001 [45:03<321:38:26,  1.72it/s]

buffer size = 9778, epsilon = 0.09766
mean_reward :  0.0


  0%|          | 4690/2000001 [45:04<318:22:11,  1.74it/s]

buffer size = 9780, epsilon = 0.09766
mean_reward :  0.0


  0%|          | 4691/2000001 [45:04<315:34:01,  1.76it/s]

buffer size = 9782, epsilon = 0.09766
mean_reward :  0.0


  0%|          | 4692/2000001 [45:05<313:15:48,  1.77it/s]

buffer size = 9784, epsilon = 0.09765
mean_reward :  0.0


  0%|          | 4693/2000001 [45:05<311:51:30,  1.78it/s]

buffer size = 9786, epsilon = 0.09765
mean_reward :  0.0


  0%|          | 4694/2000001 [45:06<313:44:29,  1.77it/s]

buffer size = 9788, epsilon = 0.09765
mean_reward :  0.0


  0%|          | 4695/2000001 [45:06<314:08:21,  1.76it/s]

buffer size = 9790, epsilon = 0.09765
mean_reward :  0.0


  0%|          | 4696/2000001 [45:07<317:11:09,  1.75it/s]

buffer size = 9792, epsilon = 0.09765
mean_reward :  0.0


  0%|          | 4697/2000001 [45:08<354:44:51,  1.56it/s]

buffer size = 9794, epsilon = 0.09765
mean_reward :  0.0


  0%|          | 4698/2000001 [45:09<379:45:49,  1.46it/s]

buffer size = 9796, epsilon = 0.09765
mean_reward :  0.0


  0%|          | 4699/2000001 [45:09<400:47:24,  1.38it/s]

buffer size = 9798, epsilon = 0.09765
mean_reward :  0.0


  0%|          | 4700/2000001 [45:10<416:07:13,  1.33it/s]

buffer size = 9800, epsilon = 0.09765
mean_reward :  0.0


  0%|          | 4701/2000001 [45:11<384:20:04,  1.44it/s]

buffer size = 9802, epsilon = 0.09765
mean_reward :  0.0


  0%|          | 4702/2000001 [45:11<362:09:38,  1.53it/s]

buffer size = 9804, epsilon = 0.09765
mean_reward :  0.0


  0%|          | 4703/2000001 [45:12<345:01:23,  1.61it/s]

buffer size = 9806, epsilon = 0.09765
mean_reward :  0.0


  0%|          | 4704/2000001 [45:12<337:52:27,  1.64it/s]

buffer size = 9808, epsilon = 0.09765
mean_reward :  0.0


  0%|          | 4705/2000001 [45:13<329:10:21,  1.68it/s]

buffer size = 9810, epsilon = 0.09765
mean_reward :  0.0


  0%|          | 4706/2000001 [45:14<325:11:36,  1.70it/s]

buffer size = 9812, epsilon = 0.09765
mean_reward :  0.0


  0%|          | 4707/2000001 [45:14<326:22:19,  1.70it/s]

buffer size = 9814, epsilon = 0.09765
mean_reward :  0.0


  0%|          | 4708/2000001 [45:15<322:24:32,  1.72it/s]

buffer size = 9816, epsilon = 0.09765
mean_reward :  0.0


  0%|          | 4709/2000001 [45:15<323:00:13,  1.72it/s]

buffer size = 9818, epsilon = 0.09765
mean_reward :  0.0


  0%|          | 4710/2000001 [45:16<321:11:07,  1.73it/s]

buffer size = 9820, epsilon = 0.09765
mean_reward :  0.0


  0%|          | 4711/2000001 [45:16<322:21:05,  1.72it/s]

buffer size = 9822, epsilon = 0.09765
mean_reward :  0.0


  0%|          | 4712/2000001 [45:17<322:05:41,  1.72it/s]

buffer size = 9824, epsilon = 0.09764
mean_reward :  0.0


  0%|          | 4713/2000001 [45:18<322:40:40,  1.72it/s]

buffer size = 9826, epsilon = 0.09764
mean_reward :  0.0


  0%|          | 4714/2000001 [45:18<319:20:31,  1.74it/s]

buffer size = 9828, epsilon = 0.09764
mean_reward :  0.0


  0%|          | 4715/2000001 [45:19<320:01:00,  1.73it/s]

buffer size = 9830, epsilon = 0.09764
mean_reward :  0.0


  0%|          | 4716/2000001 [45:19<319:22:27,  1.74it/s]

buffer size = 9832, epsilon = 0.09764
mean_reward :  0.0


  0%|          | 4717/2000001 [45:20<317:54:14,  1.74it/s]

buffer size = 9834, epsilon = 0.09764
mean_reward :  0.0


  0%|          | 4718/2000001 [45:21<357:10:37,  1.55it/s]

buffer size = 9836, epsilon = 0.09764
mean_reward :  0.0


  0%|          | 4719/2000001 [45:22<400:51:14,  1.38it/s]

buffer size = 9838, epsilon = 0.09764
mean_reward :  0.0


  0%|          | 4720/2000001 [45:22<426:18:12,  1.30it/s]

buffer size = 9840, epsilon = 0.09764
mean_reward :  0.0


  0%|          | 4721/2000001 [45:23<408:15:16,  1.36it/s]

buffer size = 9842, epsilon = 0.09764
mean_reward :  0.0


  0%|          | 4722/2000001 [45:24<380:49:22,  1.46it/s]

buffer size = 9844, epsilon = 0.09764
mean_reward :  0.0


  0%|          | 4723/2000001 [45:24<363:00:34,  1.53it/s]

buffer size = 9846, epsilon = 0.09764
mean_reward :  0.0


  0%|          | 4724/2000001 [45:25<350:04:22,  1.58it/s]

buffer size = 9848, epsilon = 0.09764
mean_reward :  0.0


  0%|          | 4725/2000001 [45:25<338:14:37,  1.64it/s]

buffer size = 9850, epsilon = 0.09764
mean_reward :  0.0


  0%|          | 4726/2000001 [45:26<332:13:43,  1.67it/s]

buffer size = 9852, epsilon = 0.09764
mean_reward :  0.0


  0%|          | 4727/2000001 [45:27<330:34:50,  1.68it/s]

buffer size = 9854, epsilon = 0.09764
mean_reward :  0.0


  0%|          | 4728/2000001 [45:27<327:46:13,  1.69it/s]

buffer size = 9856, epsilon = 0.09764
mean_reward :  0.0


  0%|          | 4729/2000001 [45:28<324:06:35,  1.71it/s]

buffer size = 9858, epsilon = 0.09764
mean_reward :  0.0


  0%|          | 4730/2000001 [45:28<320:20:45,  1.73it/s]

buffer size = 9860, epsilon = 0.09764
mean_reward :  0.0


  0%|          | 4731/2000001 [45:29<321:23:35,  1.72it/s]

buffer size = 9862, epsilon = 0.09763
mean_reward :  0.0


  0%|          | 4732/2000001 [45:29<318:35:18,  1.74it/s]

buffer size = 9864, epsilon = 0.09763
mean_reward :  0.0


  0%|          | 4733/2000001 [45:30<320:26:54,  1.73it/s]

buffer size = 9866, epsilon = 0.09763
mean_reward :  0.0


  0%|          | 4734/2000001 [45:31<321:08:39,  1.73it/s]

buffer size = 9868, epsilon = 0.09763
mean_reward :  0.0


  0%|          | 4735/2000001 [45:31<324:51:40,  1.71it/s]

buffer size = 9870, epsilon = 0.09763
mean_reward :  0.0


  0%|          | 4736/2000001 [45:32<322:14:47,  1.72it/s]

buffer size = 9872, epsilon = 0.09763
mean_reward :  0.0


  0%|          | 4737/2000001 [45:32<323:11:52,  1.71it/s]

buffer size = 9874, epsilon = 0.09763
mean_reward :  0.0


  0%|          | 4738/2000001 [45:33<353:53:05,  1.57it/s]

buffer size = 9876, epsilon = 0.09763
mean_reward :  0.0


  0%|          | 4739/2000001 [45:34<375:47:55,  1.47it/s]

buffer size = 9878, epsilon = 0.09763
mean_reward :  0.0


  0%|          | 4740/2000001 [45:35<393:30:58,  1.41it/s]

buffer size = 9880, epsilon = 0.09763
mean_reward :  0.0


  0%|          | 4741/2000001 [45:36<415:31:57,  1.33it/s]

buffer size = 9882, epsilon = 0.09763
mean_reward :  0.0


  0%|          | 4742/2000001 [45:36<402:54:13,  1.38it/s]

buffer size = 9884, epsilon = 0.09763
mean_reward :  0.0


  0%|          | 4743/2000001 [45:37<378:59:10,  1.46it/s]

buffer size = 9886, epsilon = 0.09763
mean_reward :  0.0


  0%|          | 4744/2000001 [45:37<361:36:31,  1.53it/s]

buffer size = 9888, epsilon = 0.09763
mean_reward :  0.0


  0%|          | 4745/2000001 [45:38<347:18:57,  1.60it/s]

buffer size = 9890, epsilon = 0.09763
mean_reward :  0.0


  0%|          | 4746/2000001 [45:39<341:41:46,  1.62it/s]

buffer size = 9892, epsilon = 0.09763
mean_reward :  0.0


  0%|          | 4747/2000001 [45:39<338:21:58,  1.64it/s]

buffer size = 9894, epsilon = 0.09763
mean_reward :  0.0


  0%|          | 4748/2000001 [45:40<336:11:53,  1.65it/s]

buffer size = 9896, epsilon = 0.09763
mean_reward :  0.0


  0%|          | 4749/2000001 [45:40<333:22:16,  1.66it/s]

buffer size = 9898, epsilon = 0.09763
mean_reward :  0.0


  0%|          | 4750/2000001 [45:41<331:05:02,  1.67it/s]

buffer size = 9900, epsilon = 0.09763
mean_reward :  0.0


  0%|          | 4751/2000001 [45:41<331:13:59,  1.67it/s]

buffer size = 9902, epsilon = 0.09763
mean_reward :  0.0


  0%|          | 4752/2000001 [45:42<323:35:25,  1.71it/s]

buffer size = 9904, epsilon = 0.09762
mean_reward :  0.0


  0%|          | 4753/2000001 [45:43<324:08:34,  1.71it/s]

buffer size = 9906, epsilon = 0.09762
mean_reward :  0.0


  0%|          | 4754/2000001 [45:43<321:42:06,  1.72it/s]

buffer size = 9908, epsilon = 0.09762
mean_reward :  0.0


  0%|          | 4755/2000001 [45:44<320:37:08,  1.73it/s]

buffer size = 9910, epsilon = 0.09762
mean_reward :  0.0


  0%|          | 4756/2000001 [45:44<316:38:09,  1.75it/s]

buffer size = 9912, epsilon = 0.09762
mean_reward :  0.0


  0%|          | 4757/2000001 [45:45<318:47:29,  1.74it/s]

buffer size = 9914, epsilon = 0.09762
mean_reward :  0.0


  0%|          | 4758/2000001 [45:45<316:15:57,  1.75it/s]

buffer size = 9916, epsilon = 0.09762
mean_reward :  0.0


  0%|          | 4759/2000001 [45:46<331:10:35,  1.67it/s]

buffer size = 9918, epsilon = 0.09762
mean_reward :  0.0


  0%|          | 4760/2000001 [45:47<382:36:49,  1.45it/s]

buffer size = 9920, epsilon = 0.09762
mean_reward :  0.0


  0%|          | 4761/2000001 [45:48<405:53:51,  1.37it/s]

buffer size = 9922, epsilon = 0.09762
mean_reward :  0.0


  0%|          | 4762/2000001 [45:49<413:13:16,  1.34it/s]

buffer size = 9924, epsilon = 0.09762
mean_reward :  0.0


  0%|          | 4763/2000001 [45:49<386:27:04,  1.43it/s]

buffer size = 9926, epsilon = 0.09762
mean_reward :  0.0


  0%|          | 4764/2000001 [45:50<366:55:21,  1.51it/s]

buffer size = 9928, epsilon = 0.09762
mean_reward :  0.0


  0%|          | 4765/2000001 [45:50<352:11:53,  1.57it/s]

buffer size = 9930, epsilon = 0.09762
mean_reward :  0.0


  0%|          | 4766/2000001 [45:51<342:32:44,  1.62it/s]

buffer size = 9932, epsilon = 0.09762
mean_reward :  0.0


  0%|          | 4767/2000001 [45:52<335:43:52,  1.65it/s]

buffer size = 9934, epsilon = 0.09762
mean_reward :  0.0


  0%|          | 4768/2000001 [45:52<331:47:59,  1.67it/s]

buffer size = 9936, epsilon = 0.09762
mean_reward :  0.0


  0%|          | 4769/2000001 [45:53<331:07:45,  1.67it/s]

buffer size = 9938, epsilon = 0.09762
mean_reward :  0.0


  0%|          | 4770/2000001 [45:53<329:13:23,  1.68it/s]

buffer size = 9940, epsilon = 0.09762
mean_reward :  0.0


  0%|          | 4771/2000001 [45:54<325:36:49,  1.70it/s]

buffer size = 9942, epsilon = 0.09762
mean_reward :  0.0


  0%|          | 4772/2000001 [45:54<326:47:59,  1.70it/s]

buffer size = 9944, epsilon = 0.09761
mean_reward :  0.0


  0%|          | 4773/2000001 [45:55<328:39:58,  1.69it/s]

buffer size = 9946, epsilon = 0.09761
mean_reward :  0.0


  0%|          | 4774/2000001 [45:56<323:58:58,  1.71it/s]

buffer size = 9948, epsilon = 0.09761
mean_reward :  0.0


  0%|          | 4775/2000001 [45:56<322:51:37,  1.72it/s]

buffer size = 9950, epsilon = 0.09761
mean_reward :  0.0


  0%|          | 4776/2000001 [45:57<320:22:58,  1.73it/s]

buffer size = 9952, epsilon = 0.09761
mean_reward :  0.0


  0%|          | 4777/2000001 [45:57<319:02:16,  1.74it/s]

buffer size = 9954, epsilon = 0.09761
mean_reward :  0.0


  0%|          | 4778/2000001 [45:58<316:57:42,  1.75it/s]

buffer size = 9956, epsilon = 0.09761
mean_reward :  0.0


  0%|          | 4779/2000001 [45:59<319:03:15,  1.74it/s]

buffer size = 9958, epsilon = 0.09761
mean_reward :  0.0


  0%|          | 4780/2000001 [45:59<359:37:05,  1.54it/s]

buffer size = 9960, epsilon = 0.09761
mean_reward :  0.0


  0%|          | 4781/2000001 [46:00<380:26:53,  1.46it/s]

buffer size = 9962, epsilon = 0.09761
mean_reward :  0.0


  0%|          | 4782/2000001 [46:01<399:44:23,  1.39it/s]

buffer size = 9964, epsilon = 0.09761
mean_reward :  0.0


  0%|          | 4783/2000001 [46:02<422:12:42,  1.31it/s]

buffer size = 9966, epsilon = 0.09761
mean_reward :  0.0


  0%|          | 4784/2000001 [46:02<393:03:03,  1.41it/s]

buffer size = 9968, epsilon = 0.09761
mean_reward :  0.0


  0%|          | 4785/2000001 [46:03<372:27:48,  1.49it/s]

buffer size = 9970, epsilon = 0.09761
mean_reward :  0.0


  0%|          | 4786/2000001 [46:04<356:19:23,  1.56it/s]

buffer size = 9972, epsilon = 0.09761
mean_reward :  0.0


  0%|          | 4787/2000001 [46:04<341:19:49,  1.62it/s]

buffer size = 9974, epsilon = 0.09761
mean_reward :  0.0


  0%|          | 4788/2000001 [46:05<335:12:58,  1.65it/s]

buffer size = 9976, epsilon = 0.09761
mean_reward :  0.0


  0%|          | 4789/2000001 [46:05<329:38:27,  1.68it/s]

buffer size = 9978, epsilon = 0.09761
mean_reward :  0.0


  0%|          | 4790/2000001 [46:06<328:40:01,  1.69it/s]

buffer size = 9980, epsilon = 0.09761
mean_reward :  0.0


  0%|          | 4791/2000001 [46:06<324:45:14,  1.71it/s]

buffer size = 9982, epsilon = 0.09761
mean_reward :  0.0


  0%|          | 4792/2000001 [46:07<321:09:52,  1.73it/s]

buffer size = 9984, epsilon = 0.09760
mean_reward :  0.0


  0%|          | 4793/2000001 [46:07<319:32:56,  1.73it/s]

buffer size = 9986, epsilon = 0.09760
mean_reward :  0.0


  0%|          | 4794/2000001 [46:08<318:03:57,  1.74it/s]

buffer size = 9988, epsilon = 0.09760
mean_reward :  0.0


  0%|          | 4795/2000001 [46:09<318:58:17,  1.74it/s]

buffer size = 9990, epsilon = 0.09760
mean_reward :  0.0


  0%|          | 4796/2000001 [46:09<322:19:45,  1.72it/s]

buffer size = 9992, epsilon = 0.09760
mean_reward :  0.0


  0%|          | 4797/2000001 [46:10<322:47:58,  1.72it/s]

buffer size = 9994, epsilon = 0.09760
mean_reward :  0.0


  0%|          | 4798/2000001 [46:10<320:50:11,  1.73it/s]

buffer size = 9996, epsilon = 0.09760
mean_reward :  0.0


  0%|          | 4799/2000001 [46:11<318:56:16,  1.74it/s]

buffer size = 9998, epsilon = 0.09760
mean_reward :  0.0


  0%|          | 4800/2000001 [46:12<316:26:33,  1.75it/s]

buffer size = 10000, epsilon = 0.09760
mean_reward :  0.0


  0%|          | 4801/2000001 [46:12<350:19:54,  1.58it/s]

buffer size = 10002, epsilon = 0.09760
mean_reward :  0.0


  0%|          | 4802/2000001 [46:13<385:05:15,  1.44it/s]

buffer size = 10004, epsilon = 0.09760
mean_reward :  0.0


  0%|          | 4803/2000001 [46:14<415:26:32,  1.33it/s]

buffer size = 10006, epsilon = 0.09760
mean_reward :  0.0


  0%|          | 4804/2000001 [46:15<408:17:12,  1.36it/s]

buffer size = 10008, epsilon = 0.09760
mean_reward :  0.0


  0%|          | 4805/2000001 [46:15<383:24:23,  1.45it/s]

buffer size = 10010, epsilon = 0.09760
mean_reward :  0.0


  0%|          | 4806/2000001 [46:16<361:00:02,  1.54it/s]

buffer size = 10012, epsilon = 0.09760
mean_reward :  0.0


  0%|          | 4807/2000001 [46:16<348:31:55,  1.59it/s]

buffer size = 10014, epsilon = 0.09760
mean_reward :  0.0


  0%|          | 4808/2000001 [46:17<337:16:34,  1.64it/s]

buffer size = 10016, epsilon = 0.09760
mean_reward :  0.0


  0%|          | 4809/2000001 [46:18<333:12:24,  1.66it/s]

buffer size = 10018, epsilon = 0.09760
mean_reward :  0.0


  0%|          | 4810/2000001 [46:18<328:43:50,  1.69it/s]

buffer size = 10020, epsilon = 0.09760
mean_reward :  0.0


  0%|          | 4811/2000001 [46:19<324:25:58,  1.71it/s]

buffer size = 10022, epsilon = 0.09760
mean_reward :  0.0


  0%|          | 4812/2000001 [46:19<326:49:56,  1.70it/s]

buffer size = 10024, epsilon = 0.09759
mean_reward :  0.0


  0%|          | 4813/2000001 [46:20<326:12:38,  1.70it/s]

buffer size = 10026, epsilon = 0.09759
mean_reward :  0.0


  0%|          | 4814/2000001 [46:20<322:06:48,  1.72it/s]

buffer size = 10028, epsilon = 0.09759
mean_reward :  0.0


  0%|          | 4815/2000001 [46:21<321:22:50,  1.72it/s]

buffer size = 10030, epsilon = 0.09759
mean_reward :  0.0


  0%|          | 4816/2000001 [46:22<323:02:19,  1.72it/s]

buffer size = 10032, epsilon = 0.09759
mean_reward :  0.0


  0%|          | 4817/2000001 [46:22<319:37:30,  1.73it/s]

buffer size = 10034, epsilon = 0.09759
mean_reward :  0.0


  0%|          | 4818/2000001 [46:23<324:44:20,  1.71it/s]

buffer size = 10036, epsilon = 0.09759
mean_reward :  0.0


  0%|          | 4819/2000001 [46:23<322:52:12,  1.72it/s]

buffer size = 10038, epsilon = 0.09759
mean_reward :  0.0


  0%|          | 4820/2000001 [46:24<322:04:25,  1.72it/s]

buffer size = 10040, epsilon = 0.09759
mean_reward :  0.0


  0%|          | 4821/2000001 [46:25<318:53:58,  1.74it/s]

buffer size = 10042, epsilon = 0.09759
mean_reward :  0.0


  0%|          | 4822/2000001 [46:25<356:54:11,  1.55it/s]

buffer size = 10044, epsilon = 0.09759
mean_reward :  0.0


  0%|          | 4823/2000001 [46:26<379:51:17,  1.46it/s]

buffer size = 10046, epsilon = 0.09759
mean_reward :  0.0


  0%|          | 4824/2000001 [46:27<398:00:55,  1.39it/s]

buffer size = 10048, epsilon = 0.09759
mean_reward :  0.0


  0%|          | 4825/2000001 [46:28<416:05:47,  1.33it/s]

buffer size = 10050, epsilon = 0.09759
mean_reward :  0.0


  0%|          | 4826/2000001 [46:28<388:36:32,  1.43it/s]

buffer size = 10052, epsilon = 0.09759
mean_reward :  0.0


  0%|          | 4827/2000001 [46:29<372:00:13,  1.49it/s]

buffer size = 10054, epsilon = 0.09759
mean_reward :  0.0


  0%|          | 4828/2000001 [46:29<351:36:44,  1.58it/s]

buffer size = 10056, epsilon = 0.09759
mean_reward :  0.0


  0%|          | 4829/2000001 [46:30<346:48:33,  1.60it/s]

buffer size = 10058, epsilon = 0.09759
mean_reward :  0.0


  0%|          | 4830/2000001 [46:31<338:07:44,  1.64it/s]

buffer size = 10060, epsilon = 0.09759
mean_reward :  0.0


  0%|          | 4831/2000001 [46:31<335:15:57,  1.65it/s]

buffer size = 10062, epsilon = 0.09759
mean_reward :  0.0


  0%|          | 4832/2000001 [46:32<332:51:38,  1.67it/s]

buffer size = 10064, epsilon = 0.09758
mean_reward :  0.0


  0%|          | 4833/2000001 [46:32<328:18:03,  1.69it/s]

buffer size = 10066, epsilon = 0.09758
mean_reward :  0.0


  0%|          | 4834/2000001 [46:33<326:33:16,  1.70it/s]

buffer size = 10068, epsilon = 0.09758
mean_reward :  0.0


  0%|          | 4835/2000001 [46:34<321:59:47,  1.72it/s]

buffer size = 10070, epsilon = 0.09758
mean_reward :  0.0


  0%|          | 4836/2000001 [46:34<323:12:42,  1.71it/s]

buffer size = 10072, epsilon = 0.09758
mean_reward :  0.0


  0%|          | 4837/2000001 [46:35<324:02:16,  1.71it/s]

buffer size = 10074, epsilon = 0.09758
mean_reward :  0.0


  0%|          | 4838/2000001 [46:35<322:42:21,  1.72it/s]

buffer size = 10076, epsilon = 0.09758
mean_reward :  0.0


  0%|          | 4839/2000001 [46:36<321:15:04,  1.73it/s]

buffer size = 10078, epsilon = 0.09758
mean_reward :  0.0


  0%|          | 4840/2000001 [46:36<322:42:51,  1.72it/s]

buffer size = 10080, epsilon = 0.09758
mean_reward :  0.0


  0%|          | 4841/2000001 [46:37<325:37:45,  1.70it/s]

buffer size = 10082, epsilon = 0.09758
mean_reward :  0.0


  0%|          | 4842/2000001 [46:38<324:25:47,  1.71it/s]

buffer size = 10084, epsilon = 0.09758
mean_reward :  0.0


  0%|          | 4843/2000001 [46:38<357:46:16,  1.55it/s]

buffer size = 10086, epsilon = 0.09758
mean_reward :  0.0


  0%|          | 4844/2000001 [46:39<384:30:57,  1.44it/s]

buffer size = 10088, epsilon = 0.09758
mean_reward :  0.0


  0%|          | 4845/2000001 [46:40<400:15:11,  1.38it/s]

buffer size = 10090, epsilon = 0.09758
mean_reward :  0.0


  0%|          | 4846/2000001 [46:41<423:57:53,  1.31it/s]

buffer size = 10092, epsilon = 0.09758
mean_reward :  0.0


  0%|          | 4847/2000001 [46:41<392:58:10,  1.41it/s]

buffer size = 10094, epsilon = 0.09758
mean_reward :  0.0


  0%|          | 4848/2000001 [46:42<372:36:38,  1.49it/s]

buffer size = 10096, epsilon = 0.09758
mean_reward :  0.0


  0%|          | 4849/2000001 [46:43<358:45:37,  1.54it/s]

buffer size = 10098, epsilon = 0.09758
mean_reward :  0.0


  0%|          | 4850/2000001 [46:43<344:01:56,  1.61it/s]

buffer size = 10100, epsilon = 0.09758
mean_reward :  0.0


  0%|          | 4851/2000001 [46:44<336:55:46,  1.64it/s]

buffer size = 10102, epsilon = 0.09758
mean_reward :  0.0


  0%|          | 4852/2000001 [46:44<329:48:19,  1.68it/s]

buffer size = 10104, epsilon = 0.09757
mean_reward :  0.0


  0%|          | 4853/2000001 [46:45<327:11:50,  1.69it/s]

buffer size = 10106, epsilon = 0.09757
mean_reward :  0.0


  0%|          | 4854/2000001 [46:46<325:09:22,  1.70it/s]

buffer size = 10108, epsilon = 0.09757
mean_reward :  0.0


  0%|          | 4855/2000001 [46:46<323:11:33,  1.71it/s]

buffer size = 10110, epsilon = 0.09757
mean_reward :  0.0


  0%|          | 4856/2000001 [46:47<321:53:23,  1.72it/s]

buffer size = 10112, epsilon = 0.09757
mean_reward :  0.0


  0%|          | 4857/2000001 [46:47<318:08:13,  1.74it/s]

buffer size = 10114, epsilon = 0.09757
mean_reward :  0.0


  0%|          | 4858/2000001 [46:48<317:35:00,  1.75it/s]

buffer size = 10116, epsilon = 0.09757
mean_reward :  0.0


  0%|          | 4859/2000001 [46:48<316:29:13,  1.75it/s]

buffer size = 10118, epsilon = 0.09757
mean_reward :  0.0


  0%|          | 4860/2000001 [46:49<315:15:48,  1.76it/s]

buffer size = 10120, epsilon = 0.09757
mean_reward :  0.0


  0%|          | 4861/2000001 [46:49<316:53:35,  1.75it/s]

buffer size = 10122, epsilon = 0.09757
mean_reward :  0.0


  0%|          | 4862/2000001 [46:50<316:50:23,  1.75it/s]

buffer size = 10124, epsilon = 0.09757
mean_reward :  0.0


  0%|          | 4863/2000001 [46:51<317:45:21,  1.74it/s]

buffer size = 10126, epsilon = 0.09757
mean_reward :  0.0


  0%|          | 4864/2000001 [46:51<354:43:26,  1.56it/s]

buffer size = 10128, epsilon = 0.09757
mean_reward :  0.0


  0%|          | 4865/2000001 [46:52<393:54:32,  1.41it/s]

buffer size = 10130, epsilon = 0.09757
mean_reward :  0.0


  0%|          | 4866/2000001 [46:53<421:57:24,  1.31it/s]

buffer size = 10132, epsilon = 0.09757
mean_reward :  0.0


  0%|          | 4867/2000001 [46:54<415:12:07,  1.33it/s]

buffer size = 10134, epsilon = 0.09757
mean_reward :  0.0


  0%|          | 4868/2000001 [46:54<385:16:11,  1.44it/s]

buffer size = 10136, epsilon = 0.09757
mean_reward :  0.0


  0%|          | 4869/2000001 [46:55<364:33:31,  1.52it/s]

buffer size = 10138, epsilon = 0.09757
mean_reward :  0.0


  0%|          | 4870/2000001 [46:56<347:51:35,  1.59it/s]

buffer size = 10140, epsilon = 0.09757
mean_reward :  0.0


  0%|          | 4871/2000001 [46:56<341:01:15,  1.63it/s]

buffer size = 10142, epsilon = 0.09756
mean_reward :  0.0


  0%|          | 4872/2000001 [46:57<332:20:56,  1.67it/s]

buffer size = 10144, epsilon = 0.09756
mean_reward :  0.0


  0%|          | 4873/2000001 [46:57<329:27:46,  1.68it/s]

buffer size = 10146, epsilon = 0.09756
mean_reward :  0.0


  0%|          | 4874/2000001 [46:58<324:56:57,  1.71it/s]

buffer size = 10148, epsilon = 0.09756
mean_reward :  0.0


  0%|          | 4875/2000001 [46:58<324:05:07,  1.71it/s]

buffer size = 10150, epsilon = 0.09756
mean_reward :  0.0


  0%|          | 4876/2000001 [46:59<322:46:51,  1.72it/s]

buffer size = 10152, epsilon = 0.09756
mean_reward :  0.0


  0%|          | 4877/2000001 [47:00<325:06:32,  1.70it/s]

buffer size = 10154, epsilon = 0.09756
mean_reward :  0.0


  0%|          | 4878/2000001 [47:00<322:01:13,  1.72it/s]

buffer size = 10156, epsilon = 0.09756
mean_reward :  0.0


  0%|          | 4879/2000001 [47:01<319:01:18,  1.74it/s]

buffer size = 10158, epsilon = 0.09756
mean_reward :  0.0


  0%|          | 4880/2000001 [47:01<320:18:16,  1.73it/s]

buffer size = 10160, epsilon = 0.09756
mean_reward :  0.0


  0%|          | 4881/2000001 [47:02<319:08:32,  1.74it/s]

buffer size = 10162, epsilon = 0.09756
mean_reward :  0.0


  0%|          | 4882/2000001 [47:03<322:14:19,  1.72it/s]

buffer size = 10164, epsilon = 0.09756
mean_reward :  0.0


  0%|          | 4883/2000001 [47:03<321:43:33,  1.72it/s]

buffer size = 10166, epsilon = 0.09756
mean_reward :  0.0


  0%|          | 4884/2000001 [47:04<320:42:10,  1.73it/s]

buffer size = 10168, epsilon = 0.09756
mean_reward :  0.0


  0%|          | 4885/2000001 [47:05<365:10:03,  1.52it/s]

buffer size = 10170, epsilon = 0.09756
mean_reward :  0.0


  0%|          | 4886/2000001 [47:05<407:30:15,  1.36it/s]

buffer size = 10172, epsilon = 0.09756
mean_reward :  0.0


  0%|          | 4887/2000001 [47:06<432:36:06,  1.28it/s]

buffer size = 10174, epsilon = 0.09756
mean_reward :  0.0


  0%|          | 4888/2000001 [47:07<409:50:37,  1.35it/s]

buffer size = 10176, epsilon = 0.09756
mean_reward :  0.0


  0%|          | 4889/2000001 [47:08<381:10:02,  1.45it/s]

buffer size = 10178, epsilon = 0.09756
mean_reward :  0.0


  0%|          | 4890/2000001 [47:08<362:39:20,  1.53it/s]

buffer size = 10180, epsilon = 0.09756
mean_reward :  0.0


  0%|          | 4891/2000001 [47:09<352:51:46,  1.57it/s]

buffer size = 10182, epsilon = 0.09756
mean_reward :  0.0


  0%|          | 4892/2000001 [47:09<345:25:20,  1.60it/s]

buffer size = 10184, epsilon = 0.09755
mean_reward :  0.0


  0%|          | 4893/2000001 [47:10<341:03:03,  1.62it/s]

buffer size = 10186, epsilon = 0.09755
mean_reward :  0.0


  0%|          | 4894/2000001 [47:10<334:32:50,  1.66it/s]

buffer size = 10188, epsilon = 0.09755
mean_reward :  0.0


  0%|          | 4895/2000001 [47:11<329:30:49,  1.68it/s]

buffer size = 10190, epsilon = 0.09755
mean_reward :  0.0


  0%|          | 4896/2000001 [47:12<326:00:05,  1.70it/s]

buffer size = 10192, epsilon = 0.09755
mean_reward :  0.0


  0%|          | 4897/2000001 [47:12<323:52:00,  1.71it/s]

buffer size = 10194, epsilon = 0.09755
mean_reward :  0.0


  0%|          | 4898/2000001 [47:13<324:54:05,  1.71it/s]

buffer size = 10196, epsilon = 0.09755
mean_reward :  0.0


  0%|          | 4899/2000001 [47:13<323:18:15,  1.71it/s]

buffer size = 10198, epsilon = 0.09755
mean_reward :  0.0


  0%|          | 4900/2000001 [47:14<322:53:13,  1.72it/s]

buffer size = 10200, epsilon = 0.09755
mean_reward :  0.0


  0%|          | 4901/2000001 [47:15<320:22:09,  1.73it/s]

buffer size = 10202, epsilon = 0.09755
mean_reward :  0.0


  0%|          | 4902/2000001 [47:15<320:59:21,  1.73it/s]

buffer size = 10204, epsilon = 0.09755
mean_reward :  0.0


  0%|          | 4903/2000001 [47:16<320:44:26,  1.73it/s]

buffer size = 10206, epsilon = 0.09755
mean_reward :  0.0


  0%|          | 4904/2000001 [47:16<322:27:57,  1.72it/s]

buffer size = 10208, epsilon = 0.09755
mean_reward :  0.0


  0%|          | 4905/2000001 [47:17<344:32:53,  1.61it/s]

buffer size = 10210, epsilon = 0.09755
mean_reward :  0.0


  0%|          | 4906/2000001 [47:18<371:51:14,  1.49it/s]

buffer size = 10212, epsilon = 0.09755
mean_reward :  0.0


  0%|          | 4907/2000001 [47:19<392:23:37,  1.41it/s]

buffer size = 10214, epsilon = 0.09755
mean_reward :  0.0


  0%|          | 4908/2000001 [47:19<412:28:33,  1.34it/s]

buffer size = 10216, epsilon = 0.09755
mean_reward :  0.0


  0%|          | 4909/2000001 [47:20<397:02:11,  1.40it/s]

buffer size = 10218, epsilon = 0.09755
mean_reward :  0.0


  0%|          | 4910/2000001 [47:21<373:04:18,  1.49it/s]

buffer size = 10220, epsilon = 0.09755
mean_reward :  0.0


  0%|          | 4911/2000001 [47:21<356:41:29,  1.55it/s]

buffer size = 10222, epsilon = 0.09755
mean_reward :  0.0


  0%|          | 4912/2000001 [47:22<347:06:11,  1.60it/s]

buffer size = 10224, epsilon = 0.09754
mean_reward :  0.0


  0%|          | 4913/2000001 [47:22<338:45:16,  1.64it/s]

buffer size = 10226, epsilon = 0.09754
mean_reward :  0.0


  0%|          | 4914/2000001 [47:23<337:37:19,  1.64it/s]

buffer size = 10228, epsilon = 0.09754
mean_reward :  0.0


  0%|          | 4915/2000001 [47:24<334:53:07,  1.65it/s]

buffer size = 10230, epsilon = 0.09754
mean_reward :  0.0


  0%|          | 4916/2000001 [47:24<330:38:59,  1.68it/s]

buffer size = 10232, epsilon = 0.09754
mean_reward :  0.0


  0%|          | 4917/2000001 [47:25<328:17:55,  1.69it/s]

buffer size = 10234, epsilon = 0.09754
mean_reward :  0.0


  0%|          | 4918/2000001 [47:25<326:20:57,  1.70it/s]

buffer size = 10236, epsilon = 0.09754
mean_reward :  0.0


  0%|          | 4919/2000001 [47:26<327:23:20,  1.69it/s]

buffer size = 10238, epsilon = 0.09754
mean_reward :  0.0


  0%|          | 4920/2000001 [47:26<321:44:25,  1.72it/s]

buffer size = 10240, epsilon = 0.09754
mean_reward :  0.0


  0%|          | 4921/2000001 [47:27<322:12:23,  1.72it/s]

buffer size = 10242, epsilon = 0.09754
mean_reward :  0.0


  0%|          | 4922/2000001 [47:28<322:17:00,  1.72it/s]

buffer size = 10244, epsilon = 0.09754
mean_reward :  0.0


  0%|          | 4923/2000001 [47:28<322:28:15,  1.72it/s]

buffer size = 10246, epsilon = 0.09754
mean_reward :  0.0


  0%|          | 4924/2000001 [47:29<324:54:17,  1.71it/s]

buffer size = 10248, epsilon = 0.09754
mean_reward :  0.0


  0%|          | 4925/2000001 [47:29<321:03:38,  1.73it/s]

buffer size = 10250, epsilon = 0.09754
mean_reward :  0.0


  0%|          | 4926/2000001 [47:30<329:24:43,  1.68it/s]

buffer size = 10252, epsilon = 0.09754
mean_reward :  0.0


  0%|          | 4927/2000001 [47:31<374:14:43,  1.48it/s]

buffer size = 10254, epsilon = 0.09754
mean_reward :  0.0


  0%|          | 4928/2000001 [47:32<392:51:10,  1.41it/s]

buffer size = 10256, epsilon = 0.09754
mean_reward :  0.0


  0%|          | 4929/2000001 [47:32<408:39:46,  1.36it/s]

buffer size = 10258, epsilon = 0.09754
mean_reward :  0.0


  0%|          | 4930/2000001 [47:33<404:47:15,  1.37it/s]

buffer size = 10260, epsilon = 0.09754
mean_reward :  0.0


  0%|          | 4931/2000001 [47:34<378:36:00,  1.46it/s]

buffer size = 10262, epsilon = 0.09754
mean_reward :  0.0


  0%|          | 4932/2000001 [47:34<360:10:41,  1.54it/s]

buffer size = 10264, epsilon = 0.09753
mean_reward :  0.0


  0%|          | 4933/2000001 [47:35<347:00:52,  1.60it/s]

buffer size = 10266, epsilon = 0.09753
mean_reward :  0.0


  0%|          | 4934/2000001 [47:35<337:59:07,  1.64it/s]

buffer size = 10268, epsilon = 0.09753
mean_reward :  0.0


  0%|          | 4935/2000001 [47:36<332:49:07,  1.67it/s]

buffer size = 10270, epsilon = 0.09753
mean_reward :  0.0


  0%|          | 4936/2000001 [47:37<330:41:07,  1.68it/s]

buffer size = 10272, epsilon = 0.09753
mean_reward :  0.0


  0%|          | 4937/2000001 [47:37<326:43:19,  1.70it/s]

buffer size = 10274, epsilon = 0.09753
mean_reward :  0.0


  0%|          | 4938/2000001 [47:38<321:48:26,  1.72it/s]

buffer size = 10276, epsilon = 0.09753
mean_reward :  0.0


  0%|          | 4939/2000001 [47:38<324:05:20,  1.71it/s]

buffer size = 10278, epsilon = 0.09753
mean_reward :  0.0


  0%|          | 4940/2000001 [47:39<320:45:21,  1.73it/s]

buffer size = 10280, epsilon = 0.09753
mean_reward :  0.0


  0%|          | 4941/2000001 [47:39<322:30:51,  1.72it/s]

buffer size = 10282, epsilon = 0.09753
mean_reward :  0.0


  0%|          | 4942/2000001 [47:40<320:06:07,  1.73it/s]

buffer size = 10284, epsilon = 0.09753
mean_reward :  0.0


  0%|          | 4943/2000001 [47:41<321:11:49,  1.73it/s]

buffer size = 10286, epsilon = 0.09753
mean_reward :  0.0


  0%|          | 4944/2000001 [47:41<319:53:35,  1.73it/s]

buffer size = 10288, epsilon = 0.09753
mean_reward :  0.0


  0%|          | 4945/2000001 [47:42<317:40:11,  1.74it/s]

buffer size = 10290, epsilon = 0.09753
mean_reward :  0.0


  0%|          | 4946/2000001 [47:42<318:35:15,  1.74it/s]

buffer size = 10292, epsilon = 0.09753
mean_reward :  0.0


  0%|          | 4947/2000001 [47:43<316:08:18,  1.75it/s]

buffer size = 10294, epsilon = 0.09753
mean_reward :  0.0


  0%|          | 4948/2000001 [47:44<351:09:23,  1.58it/s]

buffer size = 10296, epsilon = 0.09753
mean_reward :  0.0


  0%|          | 4949/2000001 [47:44<377:30:52,  1.47it/s]

buffer size = 10298, epsilon = 0.09753
mean_reward :  0.0


  0%|          | 4950/2000001 [47:45<397:40:50,  1.39it/s]

buffer size = 10300, epsilon = 0.09753
mean_reward :  0.0


  0%|          | 4951/2000001 [47:46<418:42:47,  1.32it/s]

buffer size = 10302, epsilon = 0.09753
mean_reward :  0.0


  0%|          | 4952/2000001 [47:47<392:37:30,  1.41it/s]

buffer size = 10304, epsilon = 0.09752
mean_reward :  0.0


  0%|          | 4953/2000001 [47:47<370:24:40,  1.50it/s]

buffer size = 10306, epsilon = 0.09752
mean_reward :  0.0


  0%|          | 4954/2000001 [47:48<356:32:55,  1.55it/s]

buffer size = 10308, epsilon = 0.09752
mean_reward :  0.0


  0%|          | 4955/2000001 [47:48<344:02:46,  1.61it/s]

buffer size = 10310, epsilon = 0.09752
mean_reward :  0.0


  0%|          | 4956/2000001 [47:49<339:34:17,  1.63it/s]

buffer size = 10312, epsilon = 0.09752
mean_reward :  0.0


  0%|          | 4957/2000001 [47:50<337:37:29,  1.64it/s]

buffer size = 10314, epsilon = 0.09752
mean_reward :  0.0


  0%|          | 4958/2000001 [47:50<329:32:49,  1.68it/s]

buffer size = 10316, epsilon = 0.09752
mean_reward :  0.0


  0%|          | 4959/2000001 [47:51<328:34:35,  1.69it/s]

buffer size = 10318, epsilon = 0.09752
mean_reward :  0.0


  0%|          | 4960/2000001 [47:51<325:54:51,  1.70it/s]

buffer size = 10320, epsilon = 0.09752
mean_reward :  0.0


  0%|          | 4961/2000001 [47:52<325:35:05,  1.70it/s]

buffer size = 10322, epsilon = 0.09752
mean_reward :  0.0


  0%|          | 4962/2000001 [47:53<323:54:26,  1.71it/s]

buffer size = 10324, epsilon = 0.09752
mean_reward :  0.0


  0%|          | 4963/2000001 [47:53<326:16:41,  1.70it/s]

buffer size = 10326, epsilon = 0.09752
mean_reward :  0.0


  0%|          | 4964/2000001 [47:54<321:48:09,  1.72it/s]

buffer size = 10328, epsilon = 0.09752
mean_reward :  0.0


  0%|          | 4965/2000001 [47:54<320:50:05,  1.73it/s]

buffer size = 10330, epsilon = 0.09752
mean_reward :  0.0


  0%|          | 4966/2000001 [47:55<322:29:38,  1.72it/s]

buffer size = 10332, epsilon = 0.09752
mean_reward :  0.0


  0%|          | 4967/2000001 [47:55<320:33:39,  1.73it/s]

buffer size = 10334, epsilon = 0.09752
mean_reward :  0.0


  0%|          | 4968/2000001 [47:56<321:23:56,  1.72it/s]

buffer size = 10336, epsilon = 0.09752
mean_reward :  0.0


  0%|          | 4969/2000001 [47:57<357:12:05,  1.55it/s]

buffer size = 10338, epsilon = 0.09752
mean_reward :  0.0


  0%|          | 4970/2000001 [47:58<401:45:09,  1.38it/s]

buffer size = 10340, epsilon = 0.09752
mean_reward :  0.0


  0%|          | 4971/2000001 [47:59<427:42:14,  1.30it/s]

buffer size = 10342, epsilon = 0.09752
mean_reward :  0.0


  0%|          | 4972/2000001 [47:59<410:30:05,  1.35it/s]

buffer size = 10344, epsilon = 0.09751
mean_reward :  0.0


  0%|          | 4973/2000001 [48:00<380:26:10,  1.46it/s]

buffer size = 10346, epsilon = 0.09751
mean_reward :  0.0


  0%|          | 4974/2000001 [48:00<363:58:30,  1.52it/s]

buffer size = 10348, epsilon = 0.09751
mean_reward :  0.0


  0%|          | 4975/2000001 [48:01<348:55:08,  1.59it/s]

buffer size = 10350, epsilon = 0.09751
mean_reward :  0.0


  0%|          | 4976/2000001 [48:02<342:46:15,  1.62it/s]

buffer size = 10352, epsilon = 0.09751
mean_reward :  0.0


  0%|          | 4977/2000001 [48:02<334:45:54,  1.66it/s]

buffer size = 10354, epsilon = 0.09751
mean_reward :  0.0


  0%|          | 4978/2000001 [48:03<331:53:39,  1.67it/s]

buffer size = 10356, epsilon = 0.09751
mean_reward :  0.0


  0%|          | 4979/2000001 [48:03<330:23:46,  1.68it/s]

buffer size = 10358, epsilon = 0.09751
mean_reward :  0.0


  0%|          | 4980/2000001 [48:04<328:48:53,  1.69it/s]

buffer size = 10360, epsilon = 0.09751
mean_reward :  0.0


  0%|          | 4981/2000001 [48:05<328:27:55,  1.69it/s]

buffer size = 10362, epsilon = 0.09751
mean_reward :  0.0


  0%|          | 4982/2000001 [48:05<323:22:21,  1.71it/s]

buffer size = 10364, epsilon = 0.09751
mean_reward :  0.0


  0%|          | 4983/2000001 [48:06<323:33:18,  1.71it/s]

buffer size = 10366, epsilon = 0.09751
mean_reward :  0.0


  0%|          | 4984/2000001 [48:06<320:07:11,  1.73it/s]

buffer size = 10368, epsilon = 0.09751
mean_reward :  0.0


  0%|          | 4985/2000001 [48:07<322:47:32,  1.72it/s]

buffer size = 10370, epsilon = 0.09751
mean_reward :  0.0


  0%|          | 4986/2000001 [48:07<320:47:37,  1.73it/s]

buffer size = 10372, epsilon = 0.09751
mean_reward :  0.0


  0%|          | 4987/2000001 [48:08<321:45:18,  1.72it/s]

buffer size = 10374, epsilon = 0.09751
mean_reward :  0.0


  0%|          | 4988/2000001 [48:09<320:48:30,  1.73it/s]

buffer size = 10376, epsilon = 0.09751
mean_reward :  0.0


  0%|          | 4989/2000001 [48:09<337:14:14,  1.64it/s]

buffer size = 10378, epsilon = 0.09751
mean_reward :  0.0


  0%|          | 4990/2000001 [48:10<381:54:41,  1.45it/s]

buffer size = 10380, epsilon = 0.09751
mean_reward :  0.0


  0%|          | 4991/2000001 [48:11<402:26:18,  1.38it/s]

buffer size = 10382, epsilon = 0.09751
mean_reward :  0.0


  0%|          | 4992/2000001 [48:12<422:52:49,  1.31it/s]

buffer size = 10384, epsilon = 0.09750
mean_reward :  0.0


  0%|          | 4993/2000001 [48:12<403:29:49,  1.37it/s]

buffer size = 10386, epsilon = 0.09750
mean_reward :  0.0


  0%|          | 4994/2000001 [48:13<380:50:32,  1.46it/s]

buffer size = 10388, epsilon = 0.09750
mean_reward :  0.0


  0%|          | 4995/2000001 [48:14<363:13:44,  1.53it/s]

buffer size = 10390, epsilon = 0.09750
mean_reward :  0.0


  0%|          | 4996/2000001 [48:14<354:18:04,  1.56it/s]

buffer size = 10392, epsilon = 0.09750
mean_reward :  0.0


  0%|          | 4997/2000001 [48:15<343:52:09,  1.61it/s]

buffer size = 10394, epsilon = 0.09750
mean_reward :  0.0


  0%|          | 4998/2000001 [48:15<335:53:30,  1.65it/s]

buffer size = 10396, epsilon = 0.09750
mean_reward :  0.0


  0%|          | 4999/2000001 [48:16<330:41:58,  1.68it/s]

buffer size = 10398, epsilon = 0.09750
mean_reward :  0.0


  0%|          | 5000/2000001 [48:16<327:20:02,  1.69it/s]

buffer size = 10400, epsilon = 0.09750
mean_reward :  0.0


  0%|          | 5001/2000001 [48:17<326:01:00,  1.70it/s]

buffer size = 10402, epsilon = 0.09750
mean_reward :  0.0


  0%|          | 5002/2000001 [48:18<323:44:19,  1.71it/s]

buffer size = 10404, epsilon = 0.09750
mean_reward :  0.0


  0%|          | 5003/2000001 [48:18<324:03:47,  1.71it/s]

buffer size = 10406, epsilon = 0.09750
mean_reward :  0.0


  0%|          | 5004/2000001 [48:19<321:05:25,  1.73it/s]

buffer size = 10408, epsilon = 0.09750
mean_reward :  0.0


  0%|          | 5005/2000001 [48:19<325:38:25,  1.70it/s]

buffer size = 10410, epsilon = 0.09750
mean_reward :  0.0


  0%|          | 5006/2000001 [48:20<325:05:59,  1.70it/s]

buffer size = 10412, epsilon = 0.09750
mean_reward :  0.0


  0%|          | 5007/2000001 [48:21<326:15:57,  1.70it/s]

buffer size = 10414, epsilon = 0.09750
mean_reward :  0.0


  0%|          | 5008/2000001 [48:21<322:34:12,  1.72it/s]

buffer size = 10416, epsilon = 0.09750
mean_reward :  0.0


  0%|          | 5009/2000001 [48:22<323:55:43,  1.71it/s]

buffer size = 10418, epsilon = 0.09750
mean_reward :  0.0


  0%|          | 5010/2000001 [48:22<347:07:55,  1.60it/s]

buffer size = 10420, epsilon = 0.09750
mean_reward :  0.0


  0%|          | 5011/2000001 [48:23<373:43:38,  1.48it/s]

buffer size = 10422, epsilon = 0.09750
mean_reward :  0.0


  0%|          | 5012/2000001 [48:24<404:42:48,  1.37it/s]

buffer size = 10424, epsilon = 0.09749
mean_reward :  0.0


  0%|          | 5013/2000001 [48:25<428:47:52,  1.29it/s]

buffer size = 10426, epsilon = 0.09749
mean_reward :  0.0


  0%|          | 5014/2000001 [48:26<403:24:07,  1.37it/s]

buffer size = 10428, epsilon = 0.09749
mean_reward :  0.0


  0%|          | 5015/2000001 [48:26<378:57:54,  1.46it/s]

buffer size = 10430, epsilon = 0.09749
mean_reward :  0.0


  0%|          | 5016/2000001 [48:27<361:35:37,  1.53it/s]

buffer size = 10432, epsilon = 0.09749
mean_reward :  0.0


  0%|          | 5017/2000001 [48:27<349:15:52,  1.59it/s]

buffer size = 10434, epsilon = 0.09749
mean_reward :  0.0


  0%|          | 5018/2000001 [48:28<345:11:45,  1.61it/s]

buffer size = 10436, epsilon = 0.09749
mean_reward :  0.0


  0%|          | 5019/2000001 [48:29<335:54:08,  1.65it/s]

buffer size = 10438, epsilon = 0.09749
mean_reward :  0.0


  0%|          | 5020/2000001 [48:29<334:13:28,  1.66it/s]

buffer size = 10440, epsilon = 0.09749
mean_reward :  0.0


  0%|          | 5021/2000001 [48:30<328:23:59,  1.69it/s]

buffer size = 10442, epsilon = 0.09749
mean_reward :  0.0


  0%|          | 5022/2000001 [48:30<326:52:56,  1.70it/s]

buffer size = 10444, epsilon = 0.09749
mean_reward :  0.0


  0%|          | 5023/2000001 [48:31<325:05:14,  1.70it/s]

buffer size = 10446, epsilon = 0.09749
mean_reward :  0.0


  0%|          | 5024/2000001 [48:31<323:11:21,  1.71it/s]

buffer size = 10448, epsilon = 0.09749
mean_reward :  0.0


  0%|          | 5025/2000001 [48:32<323:50:19,  1.71it/s]

buffer size = 10450, epsilon = 0.09749
mean_reward :  0.0


  0%|          | 5026/2000001 [48:33<323:25:53,  1.71it/s]

buffer size = 10452, epsilon = 0.09749
mean_reward :  0.0


  0%|          | 5027/2000001 [48:33<326:31:19,  1.70it/s]

buffer size = 10454, epsilon = 0.09749
mean_reward :  0.0


  0%|          | 5028/2000001 [48:34<326:46:02,  1.70it/s]

buffer size = 10456, epsilon = 0.09749
mean_reward :  0.0


  0%|          | 5029/2000001 [48:34<330:36:45,  1.68it/s]

buffer size = 10458, epsilon = 0.09749
mean_reward :  0.0


  0%|          | 5030/2000001 [48:35<329:55:18,  1.68it/s]

buffer size = 10460, epsilon = 0.09749
mean_reward :  0.0


  0%|          | 5031/2000001 [48:36<362:48:42,  1.53it/s]

buffer size = 10462, epsilon = 0.09749
mean_reward :  0.0


  0%|          | 5032/2000001 [48:37<401:38:52,  1.38it/s]

buffer size = 10464, epsilon = 0.09748
mean_reward :  0.0


  0%|          | 5033/2000001 [48:38<425:25:10,  1.30it/s]

buffer size = 10466, epsilon = 0.09748
mean_reward :  0.0


  0%|          | 5034/2000001 [48:38<423:49:27,  1.31it/s]

buffer size = 10468, epsilon = 0.09748
mean_reward :  0.0


  0%|          | 5035/2000001 [48:39<393:08:12,  1.41it/s]

buffer size = 10470, epsilon = 0.09748
mean_reward :  0.0


  0%|          | 5036/2000001 [48:39<373:01:38,  1.49it/s]

buffer size = 10472, epsilon = 0.09748
mean_reward :  0.0


  0%|          | 5037/2000001 [48:40<357:29:30,  1.55it/s]

buffer size = 10474, epsilon = 0.09748
mean_reward :  0.0


  0%|          | 5038/2000001 [48:41<350:11:07,  1.58it/s]

buffer size = 10476, epsilon = 0.09748
mean_reward :  0.0


  0%|          | 5039/2000001 [48:41<339:14:19,  1.63it/s]

buffer size = 10478, epsilon = 0.09748
mean_reward :  0.0


  0%|          | 5040/2000001 [48:42<338:55:17,  1.64it/s]

buffer size = 10480, epsilon = 0.09748
mean_reward :  0.0


  0%|          | 5041/2000001 [48:42<335:10:55,  1.65it/s]

buffer size = 10482, epsilon = 0.09748
mean_reward :  0.0


  0%|          | 5042/2000001 [48:43<330:33:48,  1.68it/s]

buffer size = 10484, epsilon = 0.09748
mean_reward :  0.0


  0%|          | 5043/2000001 [48:44<330:17:31,  1.68it/s]

buffer size = 10486, epsilon = 0.09748
mean_reward :  0.0


  0%|          | 5044/2000001 [48:44<328:38:25,  1.69it/s]

buffer size = 10488, epsilon = 0.09748
mean_reward :  0.0


  0%|          | 5045/2000001 [48:45<327:47:58,  1.69it/s]

buffer size = 10490, epsilon = 0.09748
mean_reward :  0.0


  0%|          | 5046/2000001 [48:45<325:04:09,  1.70it/s]

buffer size = 10492, epsilon = 0.09748
mean_reward :  0.0


  0%|          | 5047/2000001 [48:46<326:24:15,  1.70it/s]

buffer size = 10494, epsilon = 0.09748
mean_reward :  0.0


  0%|          | 5048/2000001 [48:47<324:21:16,  1.71it/s]

buffer size = 10496, epsilon = 0.09748
mean_reward :  0.0


  0%|          | 5049/2000001 [48:47<325:12:32,  1.70it/s]

buffer size = 10498, epsilon = 0.09748
mean_reward :  0.0


  0%|          | 5050/2000001 [48:48<327:27:43,  1.69it/s]

buffer size = 10500, epsilon = 0.09748
mean_reward :  0.0


  0%|          | 5051/2000001 [48:48<328:51:27,  1.69it/s]

buffer size = 10502, epsilon = 0.09748
mean_reward :  0.0


  0%|          | 5052/2000001 [48:49<371:44:13,  1.49it/s]

buffer size = 10504, epsilon = 0.09747
mean_reward :  0.0


  0%|          | 5053/2000001 [48:50<402:16:07,  1.38it/s]

buffer size = 10506, epsilon = 0.09747
mean_reward :  0.0


  0%|          | 5054/2000001 [48:51<423:46:03,  1.31it/s]

buffer size = 10508, epsilon = 0.09747
mean_reward :  0.0


  0%|          | 5055/2000001 [48:52<408:02:12,  1.36it/s]

buffer size = 10510, epsilon = 0.09747
mean_reward :  0.0


  0%|          | 5056/2000001 [48:52<383:17:53,  1.45it/s]

buffer size = 10512, epsilon = 0.09747
mean_reward :  0.0


  0%|          | 5057/2000001 [48:53<363:31:30,  1.52it/s]

buffer size = 10514, epsilon = 0.09747
mean_reward :  0.0


  0%|          | 5058/2000001 [48:53<353:29:11,  1.57it/s]

buffer size = 10516, epsilon = 0.09747
mean_reward :  0.0


  0%|          | 5059/2000001 [48:54<343:52:27,  1.61it/s]

buffer size = 10518, epsilon = 0.09747
mean_reward :  0.0


  0%|          | 5060/2000001 [48:54<344:02:34,  1.61it/s]

buffer size = 10520, epsilon = 0.09747
mean_reward :  0.0


  0%|          | 5061/2000001 [48:55<338:01:43,  1.64it/s]

buffer size = 10522, epsilon = 0.09747
mean_reward :  0.0


  0%|          | 5062/2000001 [48:56<333:29:39,  1.66it/s]

buffer size = 10524, epsilon = 0.09747
mean_reward :  0.0


  0%|          | 5063/2000001 [48:56<329:31:40,  1.68it/s]

buffer size = 10526, epsilon = 0.09747
mean_reward :  0.0


  0%|          | 5064/2000001 [48:57<326:20:08,  1.70it/s]

buffer size = 10528, epsilon = 0.09747
mean_reward :  0.0


  0%|          | 5065/2000001 [48:57<323:39:27,  1.71it/s]

buffer size = 10530, epsilon = 0.09747
mean_reward :  0.0


  0%|          | 5066/2000001 [48:58<322:54:41,  1.72it/s]

buffer size = 10532, epsilon = 0.09747
mean_reward :  0.0


  0%|          | 5067/2000001 [48:59<327:24:12,  1.69it/s]

buffer size = 10534, epsilon = 0.09747
mean_reward :  0.0


  0%|          | 5068/2000001 [48:59<324:44:31,  1.71it/s]

buffer size = 10536, epsilon = 0.09747
mean_reward :  0.0


  0%|          | 5069/2000001 [49:00<326:13:46,  1.70it/s]

buffer size = 10538, epsilon = 0.09747
mean_reward :  0.0


  0%|          | 5070/2000001 [49:00<324:01:27,  1.71it/s]

buffer size = 10540, epsilon = 0.09747
mean_reward :  0.0


  0%|          | 5071/2000001 [49:01<323:22:55,  1.71it/s]

buffer size = 10542, epsilon = 0.09747
mean_reward :  0.0


  0%|          | 5072/2000001 [49:02<342:14:17,  1.62it/s]

buffer size = 10544, epsilon = 0.09746
mean_reward :  0.0


  0%|          | 5073/2000001 [49:02<372:40:46,  1.49it/s]

buffer size = 10546, epsilon = 0.09746
mean_reward :  0.0


  0%|          | 5074/2000001 [49:03<396:31:53,  1.40it/s]

buffer size = 10548, epsilon = 0.09746
mean_reward :  0.0


  0%|          | 5075/2000001 [49:04<417:02:53,  1.33it/s]

buffer size = 10550, epsilon = 0.09746
mean_reward :  0.0


  0%|          | 5076/2000001 [49:05<408:57:34,  1.36it/s]

buffer size = 10552, epsilon = 0.09746
mean_reward :  0.0


  0%|          | 5077/2000001 [49:05<381:59:47,  1.45it/s]

buffer size = 10554, epsilon = 0.09746
mean_reward :  0.0


  0%|          | 5078/2000001 [49:06<367:10:00,  1.51it/s]

buffer size = 10556, epsilon = 0.09746
mean_reward :  0.0


  0%|          | 5079/2000001 [49:07<353:42:59,  1.57it/s]

buffer size = 10558, epsilon = 0.09746
mean_reward :  0.0


  0%|          | 5080/2000001 [49:07<345:48:46,  1.60it/s]

buffer size = 10560, epsilon = 0.09746
mean_reward :  0.0


  0%|          | 5081/2000001 [49:08<337:59:47,  1.64it/s]

buffer size = 10562, epsilon = 0.09746
mean_reward :  0.0


  0%|          | 5082/2000001 [49:08<332:32:16,  1.67it/s]

buffer size = 10564, epsilon = 0.09746
mean_reward :  0.0


  0%|          | 5083/2000001 [49:09<328:15:08,  1.69it/s]

buffer size = 10566, epsilon = 0.09746
mean_reward :  0.0


  0%|          | 5084/2000001 [49:09<325:05:57,  1.70it/s]

buffer size = 10568, epsilon = 0.09746
mean_reward :  0.0


  0%|          | 5085/2000001 [49:10<326:05:05,  1.70it/s]

buffer size = 10570, epsilon = 0.09746
mean_reward :  0.0


  0%|          | 5086/2000001 [49:11<321:39:10,  1.72it/s]

buffer size = 10572, epsilon = 0.09746
mean_reward :  0.0


  0%|          | 5087/2000001 [49:11<321:24:34,  1.72it/s]

buffer size = 10574, epsilon = 0.09746
mean_reward :  0.0


  0%|          | 5088/2000001 [49:12<322:54:01,  1.72it/s]

buffer size = 10576, epsilon = 0.09746
mean_reward :  0.0


  0%|          | 5089/2000001 [49:12<324:58:10,  1.71it/s]

buffer size = 10578, epsilon = 0.09746
mean_reward :  0.0


  0%|          | 5090/2000001 [49:13<323:46:22,  1.71it/s]

buffer size = 10580, epsilon = 0.09746
mean_reward :  0.0


  0%|          | 5091/2000001 [49:13<325:43:14,  1.70it/s]

buffer size = 10582, epsilon = 0.09745
mean_reward :  0.0


  0%|          | 5092/2000001 [49:14<329:59:15,  1.68it/s]

buffer size = 10584, epsilon = 0.09745
mean_reward :  0.0


  0%|          | 5093/2000001 [49:15<331:43:07,  1.67it/s]

buffer size = 10586, epsilon = 0.09745
mean_reward :  0.0


  0%|          | 5094/2000001 [49:16<373:34:27,  1.48it/s]

buffer size = 10588, epsilon = 0.09745
mean_reward :  0.0


  0%|          | 5095/2000001 [49:16<400:59:53,  1.38it/s]

buffer size = 10590, epsilon = 0.09745
mean_reward :  0.0


  0%|          | 5096/2000001 [49:17<421:59:16,  1.31it/s]

buffer size = 10592, epsilon = 0.09745
mean_reward :  0.0


  0%|          | 5097/2000001 [49:18<410:34:48,  1.35it/s]

buffer size = 10594, epsilon = 0.09745
mean_reward :  0.0


  0%|          | 5098/2000001 [49:19<385:05:51,  1.44it/s]

buffer size = 10596, epsilon = 0.09745
mean_reward :  0.0


  0%|          | 5099/2000001 [49:19<364:52:17,  1.52it/s]

buffer size = 10598, epsilon = 0.09745
mean_reward :  0.0


  0%|          | 5100/2000001 [49:20<357:44:11,  1.55it/s]

buffer size = 10600, epsilon = 0.09745
mean_reward :  0.0


  0%|          | 5101/2000001 [49:20<346:44:42,  1.60it/s]

buffer size = 10602, epsilon = 0.09745
mean_reward :  0.0


  0%|          | 5102/2000001 [49:21<341:41:05,  1.62it/s]

buffer size = 10604, epsilon = 0.09745
mean_reward :  0.0


  0%|          | 5103/2000001 [49:21<336:39:39,  1.65it/s]

buffer size = 10606, epsilon = 0.09745
mean_reward :  0.0


  0%|          | 5104/2000001 [49:22<331:08:22,  1.67it/s]

buffer size = 10608, epsilon = 0.09745
mean_reward :  0.0


  0%|          | 5105/2000001 [49:23<330:42:55,  1.68it/s]

buffer size = 10610, epsilon = 0.09745
mean_reward :  0.0


  0%|          | 5106/2000001 [49:23<332:21:52,  1.67it/s]

buffer size = 10612, epsilon = 0.09745
mean_reward :  0.0


  0%|          | 5107/2000001 [49:24<328:50:05,  1.69it/s]

buffer size = 10614, epsilon = 0.09745
mean_reward :  0.0


  0%|          | 5108/2000001 [49:24<326:12:51,  1.70it/s]

buffer size = 10616, epsilon = 0.09745
mean_reward :  0.0


  0%|          | 5109/2000001 [49:25<325:38:32,  1.70it/s]

buffer size = 10618, epsilon = 0.09745
mean_reward :  0.0


  0%|          | 5110/2000001 [49:26<323:31:10,  1.71it/s]

buffer size = 10620, epsilon = 0.09745
mean_reward :  0.0


  0%|          | 5111/2000001 [49:26<323:37:53,  1.71it/s]

buffer size = 10622, epsilon = 0.09745
mean_reward :  0.0


  0%|          | 5112/2000001 [49:27<330:56:06,  1.67it/s]

buffer size = 10624, epsilon = 0.09744
mean_reward :  0.0


  0%|          | 5113/2000001 [49:27<344:28:01,  1.61it/s]

buffer size = 10626, epsilon = 0.09744
mean_reward :  0.0


  0%|          | 5114/2000001 [49:28<371:10:04,  1.49it/s]

buffer size = 10628, epsilon = 0.09744
mean_reward :  0.0


  0%|          | 5115/2000001 [49:29<394:12:05,  1.41it/s]

buffer size = 10630, epsilon = 0.09744
mean_reward :  0.0


  0%|          | 5116/2000001 [49:30<404:14:34,  1.37it/s]

buffer size = 10632, epsilon = 0.09744
mean_reward :  0.0


  0%|          | 5117/2000001 [49:31<423:01:23,  1.31it/s]

buffer size = 10634, epsilon = 0.09744
mean_reward :  0.0


  0%|          | 5118/2000001 [49:31<414:16:39,  1.34it/s]

buffer size = 10636, epsilon = 0.09744
mean_reward :  0.0


  0%|          | 5119/2000001 [49:32<385:04:43,  1.44it/s]

buffer size = 10638, epsilon = 0.09744
mean_reward :  0.0


  0%|          | 5120/2000001 [49:33<368:25:59,  1.50it/s]

buffer size = 10640, epsilon = 0.09744
mean_reward :  0.0


  0%|          | 5121/2000001 [49:33<352:04:15,  1.57it/s]

buffer size = 10642, epsilon = 0.09744
mean_reward :  0.0


  0%|          | 5122/2000001 [49:34<345:00:26,  1.61it/s]

buffer size = 10644, epsilon = 0.09744
mean_reward :  0.0


  0%|          | 5123/2000001 [49:34<342:23:34,  1.62it/s]

buffer size = 10646, epsilon = 0.09744
mean_reward :  0.0


  0%|          | 5124/2000001 [49:35<337:50:16,  1.64it/s]

buffer size = 10648, epsilon = 0.09744
mean_reward :  0.0


  0%|          | 5125/2000001 [49:35<333:14:29,  1.66it/s]

buffer size = 10650, epsilon = 0.09744
mean_reward :  0.0


  0%|          | 5126/2000001 [49:36<331:25:16,  1.67it/s]

buffer size = 10652, epsilon = 0.09744
mean_reward :  0.0


  0%|          | 5127/2000001 [49:37<330:52:47,  1.67it/s]

buffer size = 10654, epsilon = 0.09744
mean_reward :  0.0


  0%|          | 5128/2000001 [49:37<324:35:47,  1.71it/s]

buffer size = 10656, epsilon = 0.09744
mean_reward :  0.0


  0%|          | 5129/2000001 [49:38<325:20:13,  1.70it/s]

buffer size = 10658, epsilon = 0.09744
mean_reward :  0.0


  0%|          | 5130/2000001 [49:38<323:51:14,  1.71it/s]

buffer size = 10660, epsilon = 0.09744
mean_reward :  0.0


  0%|          | 5131/2000001 [49:39<326:12:26,  1.70it/s]

buffer size = 10662, epsilon = 0.09744
mean_reward :  0.0


  0%|          | 5132/2000001 [49:40<327:21:14,  1.69it/s]

buffer size = 10664, epsilon = 0.09743
mean_reward :  0.0


  0%|          | 5133/2000001 [49:40<323:58:48,  1.71it/s]

buffer size = 10666, epsilon = 0.09743
mean_reward :  0.0


  0%|          | 5134/2000001 [49:41<325:57:40,  1.70it/s]

buffer size = 10668, epsilon = 0.09743
mean_reward :  0.0


  0%|          | 5135/2000001 [49:41<346:02:57,  1.60it/s]

buffer size = 10670, epsilon = 0.09743
mean_reward :  0.0


  0%|          | 5136/2000001 [49:42<381:46:47,  1.45it/s]

buffer size = 10672, epsilon = 0.09743
mean_reward :  0.0


  0%|          | 5137/2000001 [49:43<405:27:06,  1.37it/s]

buffer size = 10674, epsilon = 0.09743
mean_reward :  0.0


  0%|          | 5138/2000001 [49:44<428:12:19,  1.29it/s]

buffer size = 10676, epsilon = 0.09743
mean_reward :  0.0


  0%|          | 5139/2000001 [49:45<401:27:24,  1.38it/s]

buffer size = 10678, epsilon = 0.09743
mean_reward :  0.0


  0%|          | 5140/2000001 [49:45<379:36:52,  1.46it/s]

buffer size = 10680, epsilon = 0.09743
mean_reward :  0.0


  0%|          | 5141/2000001 [49:46<359:42:33,  1.54it/s]

buffer size = 10682, epsilon = 0.09743
mean_reward :  0.0


  0%|          | 5142/2000001 [49:46<350:24:03,  1.58it/s]

buffer size = 10684, epsilon = 0.09743
mean_reward :  0.0


  0%|          | 5143/2000001 [49:47<341:23:36,  1.62it/s]

buffer size = 10686, epsilon = 0.09743
mean_reward :  0.0


  0%|          | 5144/2000001 [49:48<335:08:25,  1.65it/s]

buffer size = 10688, epsilon = 0.09743
mean_reward :  0.0


  0%|          | 5145/2000001 [49:48<333:41:55,  1.66it/s]

buffer size = 10690, epsilon = 0.09743
mean_reward :  0.0


  0%|          | 5146/2000001 [49:49<331:40:06,  1.67it/s]

buffer size = 10692, epsilon = 0.09743
mean_reward :  0.0


  0%|          | 5147/2000001 [49:49<331:54:50,  1.67it/s]

buffer size = 10694, epsilon = 0.09743
mean_reward :  0.0


  0%|          | 5148/2000001 [49:50<330:36:05,  1.68it/s]

buffer size = 10696, epsilon = 0.09743
mean_reward :  0.0


  0%|          | 5149/2000001 [49:50<326:12:35,  1.70it/s]

buffer size = 10698, epsilon = 0.09743
mean_reward :  0.0


  0%|          | 5150/2000001 [49:51<327:02:55,  1.69it/s]

buffer size = 10700, epsilon = 0.09743
mean_reward :  0.0


  0%|          | 5151/2000001 [49:52<325:54:44,  1.70it/s]

buffer size = 10702, epsilon = 0.09743
mean_reward :  0.0


  0%|          | 5152/2000001 [49:52<326:24:49,  1.70it/s]

buffer size = 10704, epsilon = 0.09742
mean_reward :  0.0


  0%|          | 5153/2000001 [49:53<325:16:12,  1.70it/s]

buffer size = 10706, epsilon = 0.09742
mean_reward :  0.0


  0%|          | 5154/2000001 [49:53<328:10:52,  1.69it/s]

buffer size = 10708, epsilon = 0.09742
mean_reward :  0.0


  0%|          | 5155/2000001 [49:54<324:39:29,  1.71it/s]

buffer size = 10710, epsilon = 0.09742
mean_reward :  0.0


  0%|          | 5156/2000001 [49:55<361:27:43,  1.53it/s]

buffer size = 10712, epsilon = 0.09742
mean_reward :  0.0


  0%|          | 5157/2000001 [49:56<381:40:04,  1.45it/s]

buffer size = 10714, epsilon = 0.09742
mean_reward :  0.0


  0%|          | 5158/2000001 [49:56<393:34:18,  1.41it/s]

buffer size = 10716, epsilon = 0.09742
mean_reward :  0.0


  0%|          | 5159/2000001 [49:57<415:02:07,  1.34it/s]

buffer size = 10718, epsilon = 0.09742
mean_reward :  0.0


  0%|          | 5160/2000001 [49:58<407:12:35,  1.36it/s]

buffer size = 10720, epsilon = 0.09742
mean_reward :  0.0


  0%|          | 5161/2000001 [49:58<381:23:53,  1.45it/s]

buffer size = 10722, epsilon = 0.09742
mean_reward :  0.0


  0%|          | 5162/2000001 [49:59<362:00:48,  1.53it/s]

buffer size = 10724, epsilon = 0.09742
mean_reward :  0.0


  0%|          | 5163/2000001 [50:00<348:35:45,  1.59it/s]

buffer size = 10726, epsilon = 0.09742
mean_reward :  0.0


  0%|          | 5164/2000001 [50:00<343:51:41,  1.61it/s]

buffer size = 10728, epsilon = 0.09742
mean_reward :  0.0


  0%|          | 5165/2000001 [50:01<338:18:52,  1.64it/s]

buffer size = 10730, epsilon = 0.09742
mean_reward :  0.0


  0%|          | 5166/2000001 [50:01<332:34:54,  1.67it/s]

buffer size = 10732, epsilon = 0.09742
mean_reward :  0.0


  0%|          | 5167/2000001 [50:02<328:55:38,  1.68it/s]

buffer size = 10734, epsilon = 0.09742
mean_reward :  0.0


  0%|          | 5168/2000001 [50:03<328:49:53,  1.69it/s]

buffer size = 10736, epsilon = 0.09742
mean_reward :  0.0


  0%|          | 5169/2000001 [50:03<329:17:48,  1.68it/s]

buffer size = 10738, epsilon = 0.09742
mean_reward :  0.0


  0%|          | 5170/2000001 [50:04<327:40:22,  1.69it/s]

buffer size = 10740, epsilon = 0.09742
mean_reward :  0.0


  0%|          | 5171/2000001 [50:04<328:46:51,  1.69it/s]

buffer size = 10742, epsilon = 0.09742
mean_reward :  0.0


  0%|          | 5172/2000001 [50:05<326:16:58,  1.70it/s]

buffer size = 10744, epsilon = 0.09741
mean_reward :  0.0


  0%|          | 5173/2000001 [50:05<324:26:28,  1.71it/s]

buffer size = 10746, epsilon = 0.09741
mean_reward :  0.0


  0%|          | 5174/2000001 [50:06<324:29:57,  1.71it/s]

buffer size = 10748, epsilon = 0.09741
mean_reward :  0.0


  0%|          | 5175/2000001 [50:07<326:42:43,  1.70it/s]

buffer size = 10750, epsilon = 0.09741
mean_reward :  0.0


  0%|          | 5176/2000001 [50:07<327:18:30,  1.69it/s]

buffer size = 10752, epsilon = 0.09741
mean_reward :  0.0


  0%|          | 5177/2000001 [50:08<335:09:39,  1.65it/s]

buffer size = 10754, epsilon = 0.09741
mean_reward :  0.0


  0%|          | 5178/2000001 [50:09<381:38:39,  1.45it/s]

buffer size = 10756, epsilon = 0.09741
mean_reward :  0.0


  0%|          | 5179/2000001 [50:10<416:24:13,  1.33it/s]

buffer size = 10758, epsilon = 0.09741
mean_reward :  0.0


  0%|          | 5180/2000001 [50:11<429:37:31,  1.29it/s]

buffer size = 10760, epsilon = 0.09741
mean_reward :  0.0


  0%|          | 5181/2000001 [50:11<397:53:57,  1.39it/s]

buffer size = 10762, epsilon = 0.09741
mean_reward :  0.0


  0%|          | 5182/2000001 [50:12<374:44:35,  1.48it/s]

buffer size = 10764, epsilon = 0.09741
mean_reward :  0.0


  0%|          | 5183/2000001 [50:12<358:49:03,  1.54it/s]

buffer size = 10766, epsilon = 0.09741
mean_reward :  0.0


  0%|          | 5184/2000001 [50:13<351:47:31,  1.58it/s]

buffer size = 10768, epsilon = 0.09741
mean_reward :  0.0


  0%|          | 5185/2000001 [50:13<342:34:23,  1.62it/s]

buffer size = 10770, epsilon = 0.09741
mean_reward :  0.0


  0%|          | 5186/2000001 [50:14<335:09:25,  1.65it/s]

buffer size = 10772, epsilon = 0.09741
mean_reward :  0.0


  0%|          | 5187/2000001 [50:15<333:17:24,  1.66it/s]

buffer size = 10774, epsilon = 0.09741
mean_reward :  0.0


  0%|          | 5188/2000001 [50:15<332:02:32,  1.67it/s]

buffer size = 10776, epsilon = 0.09741
mean_reward :  0.0


  0%|          | 5189/2000001 [50:16<330:33:21,  1.68it/s]

buffer size = 10778, epsilon = 0.09741
mean_reward :  0.0


  0%|          | 5190/2000001 [50:16<328:01:19,  1.69it/s]

buffer size = 10780, epsilon = 0.09741
mean_reward :  0.0


  0%|          | 5191/2000001 [50:17<328:38:11,  1.69it/s]

buffer size = 10782, epsilon = 0.09741
mean_reward :  0.0


  0%|          | 5192/2000001 [50:18<328:33:45,  1.69it/s]

buffer size = 10784, epsilon = 0.09740
mean_reward :  0.0


  0%|          | 5193/2000001 [50:18<327:07:22,  1.69it/s]

buffer size = 10786, epsilon = 0.09740
mean_reward :  0.0


  0%|          | 5194/2000001 [50:19<327:02:26,  1.69it/s]

buffer size = 10788, epsilon = 0.09740
mean_reward :  0.0


  0%|          | 5195/2000001 [50:19<326:39:20,  1.70it/s]

buffer size = 10790, epsilon = 0.09740
mean_reward :  0.0


  0%|          | 5196/2000001 [50:20<330:45:07,  1.68it/s]

buffer size = 10792, epsilon = 0.09740
mean_reward :  0.0


  0%|          | 5197/2000001 [50:21<335:01:06,  1.65it/s]

buffer size = 10794, epsilon = 0.09740
mean_reward :  0.0


  0%|          | 5198/2000001 [50:21<374:29:01,  1.48it/s]

buffer size = 10796, epsilon = 0.09740
mean_reward :  0.0


  0%|          | 5199/2000001 [50:22<393:54:20,  1.41it/s]

buffer size = 10798, epsilon = 0.09740
mean_reward :  0.0


  0%|          | 5200/2000001 [50:23<409:13:55,  1.35it/s]

buffer size = 10800, epsilon = 0.09740
mean_reward :  0.0


  0%|          | 5201/2000001 [50:24<424:08:37,  1.31it/s]

buffer size = 10802, epsilon = 0.09740
mean_reward :  0.0


  0%|          | 5202/2000001 [50:24<395:43:51,  1.40it/s]

buffer size = 10804, epsilon = 0.09740
mean_reward :  0.0


  0%|          | 5203/2000001 [50:25<376:18:52,  1.47it/s]

buffer size = 10806, epsilon = 0.09740
mean_reward :  0.0


  0%|          | 5204/2000001 [50:26<358:23:38,  1.55it/s]

buffer size = 10808, epsilon = 0.09740
mean_reward :  0.0


  0%|          | 5205/2000001 [50:26<350:43:27,  1.58it/s]

buffer size = 10810, epsilon = 0.09740
mean_reward :  0.0


  0%|          | 5206/2000001 [50:27<342:29:41,  1.62it/s]

buffer size = 10812, epsilon = 0.09740
mean_reward :  0.0


  0%|          | 5207/2000001 [50:27<339:05:05,  1.63it/s]

buffer size = 10814, epsilon = 0.09740
mean_reward :  0.0


  0%|          | 5208/2000001 [50:28<333:49:35,  1.66it/s]

buffer size = 10816, epsilon = 0.09740
mean_reward :  0.0


  0%|          | 5209/2000001 [50:29<335:38:25,  1.65it/s]

buffer size = 10818, epsilon = 0.09740
mean_reward :  0.0


  0%|          | 5210/2000001 [50:29<336:06:19,  1.65it/s]

buffer size = 10820, epsilon = 0.09740
mean_reward :  0.0


  0%|          | 5211/2000001 [50:30<332:04:37,  1.67it/s]

buffer size = 10822, epsilon = 0.09740
mean_reward :  0.0


  0%|          | 5212/2000001 [50:30<334:24:47,  1.66it/s]

buffer size = 10824, epsilon = 0.09739
mean_reward :  0.0


  0%|          | 5213/2000001 [50:31<331:33:03,  1.67it/s]

buffer size = 10826, epsilon = 0.09739
mean_reward :  0.0


  0%|          | 5214/2000001 [50:32<329:48:27,  1.68it/s]

buffer size = 10828, epsilon = 0.09739
mean_reward :  0.0


  0%|          | 5215/2000001 [50:32<329:42:58,  1.68it/s]

buffer size = 10830, epsilon = 0.09739
mean_reward :  0.0


  0%|          | 5216/2000001 [50:33<329:06:57,  1.68it/s]

buffer size = 10832, epsilon = 0.09739
mean_reward :  0.0


  0%|          | 5217/2000001 [50:33<328:59:49,  1.68it/s]

buffer size = 10834, epsilon = 0.09739
mean_reward :  0.0


  0%|          | 5218/2000001 [50:34<341:45:35,  1.62it/s]

buffer size = 10836, epsilon = 0.09739
mean_reward :  0.0


  0%|          | 5219/2000001 [50:35<388:17:48,  1.43it/s]

buffer size = 10838, epsilon = 0.09739
mean_reward :  0.0


  0%|          | 5220/2000001 [50:36<402:23:30,  1.38it/s]

buffer size = 10840, epsilon = 0.09739
mean_reward :  0.0


  0%|          | 5221/2000001 [50:37<427:34:27,  1.30it/s]

buffer size = 10842, epsilon = 0.09739
mean_reward :  0.0


  0%|          | 5222/2000001 [50:37<411:16:20,  1.35it/s]

buffer size = 10844, epsilon = 0.09739
mean_reward :  0.0


  0%|          | 5223/2000001 [50:38<385:48:45,  1.44it/s]

buffer size = 10846, epsilon = 0.09739
mean_reward :  0.0


  0%|          | 5224/2000001 [50:38<365:49:03,  1.51it/s]

buffer size = 10848, epsilon = 0.09739
mean_reward :  0.0


  0%|          | 5225/2000001 [50:39<356:52:03,  1.55it/s]

buffer size = 10850, epsilon = 0.09739
mean_reward :  0.0


  0%|          | 5226/2000001 [50:40<346:48:59,  1.60it/s]

buffer size = 10852, epsilon = 0.09739
mean_reward :  0.0


  0%|          | 5227/2000001 [50:40<340:34:40,  1.63it/s]

buffer size = 10854, epsilon = 0.09739
mean_reward :  0.0


  0%|          | 5228/2000001 [50:41<335:44:49,  1.65it/s]

buffer size = 10856, epsilon = 0.09739
mean_reward :  0.0


  0%|          | 5229/2000001 [50:41<332:45:40,  1.67it/s]

buffer size = 10858, epsilon = 0.09739
mean_reward :  0.0


  0%|          | 5230/2000001 [50:42<331:51:46,  1.67it/s]

buffer size = 10860, epsilon = 0.09739
mean_reward :  0.0


  0%|          | 5231/2000001 [50:43<329:44:38,  1.68it/s]

buffer size = 10862, epsilon = 0.09738
mean_reward :  0.0


  0%|          | 5232/2000001 [50:43<331:16:21,  1.67it/s]

buffer size = 10864, epsilon = 0.09738
mean_reward :  0.0


  0%|          | 5233/2000001 [50:44<332:09:09,  1.67it/s]

buffer size = 10866, epsilon = 0.09738
mean_reward :  0.0


  0%|          | 5234/2000001 [50:44<331:01:25,  1.67it/s]

buffer size = 10868, epsilon = 0.09738
mean_reward :  0.0


  0%|          | 5235/2000001 [50:45<332:10:46,  1.67it/s]

buffer size = 10870, epsilon = 0.09738
mean_reward :  0.0


  0%|          | 5236/2000001 [50:46<332:28:31,  1.67it/s]

buffer size = 10872, epsilon = 0.09738
mean_reward :  0.0


  0%|          | 5237/2000001 [50:46<331:57:34,  1.67it/s]

buffer size = 10874, epsilon = 0.09738
mean_reward :  0.0


  0%|          | 5238/2000001 [50:47<330:47:12,  1.68it/s]

buffer size = 10876, epsilon = 0.09738
mean_reward :  0.0


  0%|          | 5239/2000001 [50:48<363:13:50,  1.53it/s]

buffer size = 10878, epsilon = 0.09738
mean_reward :  0.0


  0%|          | 5240/2000001 [50:48<391:17:36,  1.42it/s]

buffer size = 10880, epsilon = 0.09738
mean_reward :  0.0


  0%|          | 5241/2000001 [50:49<421:23:40,  1.31it/s]

buffer size = 10882, epsilon = 0.09738
mean_reward :  0.0


  0%|          | 5242/2000001 [50:50<431:41:36,  1.28it/s]

buffer size = 10884, epsilon = 0.09738
mean_reward :  0.0


  0%|          | 5243/2000001 [50:51<403:31:33,  1.37it/s]

buffer size = 10886, epsilon = 0.09738
mean_reward :  0.0


  0%|          | 5244/2000001 [50:51<380:04:39,  1.46it/s]

buffer size = 10888, epsilon = 0.09738
mean_reward :  0.0


  0%|          | 5245/2000001 [50:52<365:09:11,  1.52it/s]

buffer size = 10890, epsilon = 0.09738
mean_reward :  0.0


  0%|          | 5246/2000001 [50:52<353:07:23,  1.57it/s]

buffer size = 10892, epsilon = 0.09738
mean_reward :  0.0


  0%|          | 5247/2000001 [50:53<344:36:45,  1.61it/s]

buffer size = 10894, epsilon = 0.09738
mean_reward :  0.0


  0%|          | 5248/2000001 [50:54<345:01:34,  1.61it/s]

buffer size = 10896, epsilon = 0.09738
mean_reward :  0.0


  0%|          | 5249/2000001 [50:54<338:32:28,  1.64it/s]

buffer size = 10898, epsilon = 0.09738
mean_reward :  0.0


  0%|          | 5250/2000001 [50:55<337:18:37,  1.64it/s]

buffer size = 10900, epsilon = 0.09738
mean_reward :  0.0


  0%|          | 5251/2000001 [50:55<330:49:12,  1.67it/s]

buffer size = 10902, epsilon = 0.09738
mean_reward :  0.0


  0%|          | 5252/2000001 [50:56<331:39:25,  1.67it/s]

buffer size = 10904, epsilon = 0.09737
mean_reward :  0.0


  0%|          | 5253/2000001 [50:57<326:32:05,  1.70it/s]

buffer size = 10906, epsilon = 0.09737
mean_reward :  0.0


  0%|          | 5254/2000001 [50:57<326:52:21,  1.70it/s]

buffer size = 10908, epsilon = 0.09737
mean_reward :  0.0


  0%|          | 5255/2000001 [50:58<325:50:08,  1.70it/s]

buffer size = 10910, epsilon = 0.09737
mean_reward :  0.0


  0%|          | 5256/2000001 [50:58<325:02:20,  1.70it/s]

buffer size = 10912, epsilon = 0.09737
mean_reward :  0.0


  0%|          | 5257/2000001 [50:59<328:03:42,  1.69it/s]

buffer size = 10914, epsilon = 0.09737
mean_reward :  0.0


  0%|          | 5258/2000001 [51:00<327:49:06,  1.69it/s]

buffer size = 10916, epsilon = 0.09737
mean_reward :  0.0


  0%|          | 5259/2000001 [51:00<345:30:27,  1.60it/s]

buffer size = 10918, epsilon = 0.09737
mean_reward :  0.0


  0%|          | 5260/2000001 [51:01<379:20:56,  1.46it/s]

buffer size = 10920, epsilon = 0.09737
mean_reward :  0.0


  0%|          | 5261/2000001 [51:02<402:40:53,  1.38it/s]

buffer size = 10922, epsilon = 0.09737
mean_reward :  0.0


  0%|          | 5262/2000001 [51:03<431:52:59,  1.28it/s]

buffer size = 10924, epsilon = 0.09737
mean_reward :  0.0


  0%|          | 5263/2000001 [51:03<410:38:05,  1.35it/s]

buffer size = 10926, epsilon = 0.09737
mean_reward :  0.0


  0%|          | 5264/2000001 [51:04<385:22:21,  1.44it/s]

buffer size = 10928, epsilon = 0.09737
mean_reward :  0.0


  0%|          | 5265/2000001 [51:05<367:15:28,  1.51it/s]

buffer size = 10930, epsilon = 0.09737
mean_reward :  0.0


  0%|          | 5266/2000001 [51:05<357:25:14,  1.55it/s]

buffer size = 10932, epsilon = 0.09737
mean_reward :  0.0


  0%|          | 5267/2000001 [51:06<347:50:16,  1.59it/s]

buffer size = 10934, epsilon = 0.09737
mean_reward :  0.0


  0%|          | 5268/2000001 [51:06<344:30:06,  1.61it/s]

buffer size = 10936, epsilon = 0.09737
mean_reward :  0.0


  0%|          | 5269/2000001 [51:07<338:06:39,  1.64it/s]

buffer size = 10938, epsilon = 0.09737
mean_reward :  0.0


  0%|          | 5270/2000001 [51:08<335:15:18,  1.65it/s]

buffer size = 10940, epsilon = 0.09737
mean_reward :  0.0


  0%|          | 5271/2000001 [51:08<331:44:36,  1.67it/s]

buffer size = 10942, epsilon = 0.09737
mean_reward :  0.0


  0%|          | 5272/2000001 [51:09<328:04:16,  1.69it/s]

buffer size = 10944, epsilon = 0.09736
mean_reward :  0.0


  0%|          | 5273/2000001 [51:09<330:56:59,  1.67it/s]

buffer size = 10946, epsilon = 0.09736
mean_reward :  0.0


  0%|          | 5274/2000001 [51:10<329:53:14,  1.68it/s]

buffer size = 10948, epsilon = 0.09736
mean_reward :  0.0


  0%|          | 5275/2000001 [51:11<327:37:12,  1.69it/s]

buffer size = 10950, epsilon = 0.09736
mean_reward :  0.0


  0%|          | 5276/2000001 [51:11<327:48:36,  1.69it/s]

buffer size = 10952, epsilon = 0.09736
mean_reward :  0.0


  0%|          | 5277/2000001 [51:12<327:20:46,  1.69it/s]

buffer size = 10954, epsilon = 0.09736
mean_reward :  0.0


  0%|          | 5278/2000001 [51:12<327:28:47,  1.69it/s]

buffer size = 10956, epsilon = 0.09736
mean_reward :  0.0


  0%|          | 5279/2000001 [51:13<330:19:02,  1.68it/s]

buffer size = 10958, epsilon = 0.09736
mean_reward :  0.0


  0%|          | 5280/2000001 [51:14<364:23:39,  1.52it/s]

buffer size = 10960, epsilon = 0.09736
mean_reward :  0.0


  0%|          | 5281/2000001 [51:14<380:20:44,  1.46it/s]

buffer size = 10962, epsilon = 0.09736
mean_reward :  0.0


  0%|          | 5282/2000001 [51:15<396:36:49,  1.40it/s]

buffer size = 10964, epsilon = 0.09736
mean_reward :  0.0


  0%|          | 5283/2000001 [51:16<417:05:08,  1.33it/s]

buffer size = 10966, epsilon = 0.09736
mean_reward :  0.0


  0%|          | 5284/2000001 [51:17<407:39:05,  1.36it/s]

buffer size = 10968, epsilon = 0.09736
mean_reward :  0.0


  0%|          | 5285/2000001 [51:17<384:03:24,  1.44it/s]

buffer size = 10970, epsilon = 0.09736
mean_reward :  0.0


  0%|          | 5286/2000001 [51:18<366:53:55,  1.51it/s]

buffer size = 10972, epsilon = 0.09736
mean_reward :  0.0


  0%|          | 5287/2000001 [51:19<351:47:08,  1.58it/s]

buffer size = 10974, epsilon = 0.09736
mean_reward :  0.0


  0%|          | 5288/2000001 [51:19<347:22:35,  1.60it/s]

buffer size = 10976, epsilon = 0.09736
mean_reward :  0.0


  0%|          | 5289/2000001 [51:20<344:43:41,  1.61it/s]

buffer size = 10978, epsilon = 0.09736
mean_reward :  0.0


  0%|          | 5290/2000001 [51:20<337:55:18,  1.64it/s]

buffer size = 10980, epsilon = 0.09736
mean_reward :  0.0


  0%|          | 5291/2000001 [51:21<337:46:01,  1.64it/s]

buffer size = 10982, epsilon = 0.09736
mean_reward :  0.0


  0%|          | 5292/2000001 [51:22<335:05:24,  1.65it/s]

buffer size = 10984, epsilon = 0.09735
mean_reward :  0.0


  0%|          | 5293/2000001 [51:22<333:37:13,  1.66it/s]

buffer size = 10986, epsilon = 0.09735
mean_reward :  0.0


  0%|          | 5294/2000001 [51:23<329:47:44,  1.68it/s]

buffer size = 10988, epsilon = 0.09735
mean_reward :  0.0


  0%|          | 5295/2000001 [51:23<332:49:09,  1.66it/s]

buffer size = 10990, epsilon = 0.09735
mean_reward :  0.0


  0%|          | 5296/2000001 [51:24<332:09:20,  1.67it/s]

buffer size = 10992, epsilon = 0.09735
mean_reward :  0.0


  0%|          | 5297/2000001 [51:25<331:14:00,  1.67it/s]

buffer size = 10994, epsilon = 0.09735
mean_reward :  0.0


  0%|          | 5298/2000001 [51:25<332:05:47,  1.67it/s]

buffer size = 10996, epsilon = 0.09735
mean_reward :  0.0


  0%|          | 5299/2000001 [51:26<328:02:36,  1.69it/s]

buffer size = 10998, epsilon = 0.09735
mean_reward :  0.0


  0%|          | 5300/2000001 [51:26<328:53:20,  1.68it/s]

buffer size = 11000, epsilon = 0.09735
mean_reward :  0.0


  0%|          | 5301/2000001 [51:27<358:38:36,  1.54it/s]

buffer size = 11002, epsilon = 0.09735
mean_reward :  0.0


  0%|          | 5302/2000001 [51:28<382:40:11,  1.45it/s]

buffer size = 11004, epsilon = 0.09735
mean_reward :  0.0


  0%|          | 5303/2000001 [51:29<401:50:09,  1.38it/s]

buffer size = 11006, epsilon = 0.09735
mean_reward :  0.0


  0%|          | 5304/2000001 [51:29<420:29:20,  1.32it/s]

buffer size = 11008, epsilon = 0.09735
mean_reward :  0.0


  0%|          | 5305/2000001 [51:30<409:34:35,  1.35it/s]

buffer size = 11010, epsilon = 0.09735
mean_reward :  0.0


  0%|          | 5306/2000001 [51:31<385:22:40,  1.44it/s]

buffer size = 11012, epsilon = 0.09735
mean_reward :  0.0


  0%|          | 5307/2000001 [51:31<366:52:21,  1.51it/s]

buffer size = 11014, epsilon = 0.09735
mean_reward :  0.0


  0%|          | 5308/2000001 [51:32<356:58:04,  1.55it/s]

buffer size = 11016, epsilon = 0.09735
mean_reward :  0.0


  0%|          | 5309/2000001 [51:33<350:22:56,  1.58it/s]

buffer size = 11018, epsilon = 0.09735
mean_reward :  0.0


  0%|          | 5310/2000001 [51:33<343:38:16,  1.61it/s]

buffer size = 11020, epsilon = 0.09735
mean_reward :  0.0


  0%|          | 5311/2000001 [51:34<340:47:22,  1.63it/s]

buffer size = 11022, epsilon = 0.09735
mean_reward :  0.0


  0%|          | 5312/2000001 [51:34<340:50:18,  1.63it/s]

buffer size = 11024, epsilon = 0.09734
mean_reward :  0.0


  0%|          | 5313/2000001 [51:35<339:41:47,  1.63it/s]

buffer size = 11026, epsilon = 0.09734
mean_reward :  0.0


  0%|          | 5314/2000001 [51:36<335:38:09,  1.65it/s]

buffer size = 11028, epsilon = 0.09734
mean_reward :  0.0


  0%|          | 5315/2000001 [51:36<334:18:04,  1.66it/s]

buffer size = 11030, epsilon = 0.09734
mean_reward :  0.0


  0%|          | 5316/2000001 [51:37<334:10:23,  1.66it/s]

buffer size = 11032, epsilon = 0.09734
mean_reward :  0.0


  0%|          | 5317/2000001 [51:37<331:51:52,  1.67it/s]

buffer size = 11034, epsilon = 0.09734
mean_reward :  0.0


  0%|          | 5318/2000001 [51:38<333:12:10,  1.66it/s]

buffer size = 11036, epsilon = 0.09734
mean_reward :  0.0


  0%|          | 5319/2000001 [51:39<330:05:18,  1.68it/s]

buffer size = 11038, epsilon = 0.09734
mean_reward :  0.0


  0%|          | 5320/2000001 [51:39<330:37:12,  1.68it/s]

buffer size = 11040, epsilon = 0.09734
mean_reward :  0.0


  0%|          | 5321/2000001 [51:40<330:54:31,  1.67it/s]

buffer size = 11042, epsilon = 0.09734
mean_reward :  0.0


  0%|          | 5322/2000001 [51:41<361:30:22,  1.53it/s]

buffer size = 11044, epsilon = 0.09734
mean_reward :  0.0


  0%|          | 5323/2000001 [51:41<385:40:42,  1.44it/s]

buffer size = 11046, epsilon = 0.09734
mean_reward :  0.0


  0%|          | 5324/2000001 [51:42<402:06:24,  1.38it/s]

buffer size = 11048, epsilon = 0.09734
mean_reward :  0.0


  0%|          | 5325/2000001 [51:43<428:56:48,  1.29it/s]

buffer size = 11050, epsilon = 0.09734
mean_reward :  0.0


  0%|          | 5326/2000001 [51:44<406:38:42,  1.36it/s]

buffer size = 11052, epsilon = 0.09734
mean_reward :  0.0


  0%|          | 5327/2000001 [51:44<381:47:22,  1.45it/s]

buffer size = 11054, epsilon = 0.09734
mean_reward :  0.0


  0%|          | 5328/2000001 [51:45<365:49:46,  1.51it/s]

buffer size = 11056, epsilon = 0.09734
mean_reward :  0.0


  0%|          | 5329/2000001 [51:45<356:42:10,  1.55it/s]

buffer size = 11058, epsilon = 0.09734
mean_reward :  0.0


  0%|          | 5330/2000001 [51:46<346:44:57,  1.60it/s]

buffer size = 11060, epsilon = 0.09734
mean_reward :  0.0


  0%|          | 5331/2000001 [51:47<341:31:13,  1.62it/s]

buffer size = 11062, epsilon = 0.09734
mean_reward :  0.0


  0%|          | 5332/2000001 [51:47<334:10:14,  1.66it/s]

buffer size = 11064, epsilon = 0.09733
mean_reward :  0.0


  0%|          | 5333/2000001 [51:48<330:39:42,  1.68it/s]

buffer size = 11066, epsilon = 0.09733
mean_reward :  0.0


  0%|          | 5334/2000001 [51:48<329:48:42,  1.68it/s]

buffer size = 11068, epsilon = 0.09733
mean_reward :  0.0


  0%|          | 5335/2000001 [51:49<328:56:01,  1.68it/s]

buffer size = 11070, epsilon = 0.09733
mean_reward :  0.0


  0%|          | 5336/2000001 [51:50<332:01:03,  1.67it/s]

buffer size = 11072, epsilon = 0.09733
mean_reward :  0.0


  0%|          | 5337/2000001 [51:50<328:31:44,  1.69it/s]

buffer size = 11074, epsilon = 0.09733
mean_reward :  0.0


  0%|          | 5338/2000001 [51:51<333:08:30,  1.66it/s]

buffer size = 11076, epsilon = 0.09733
mean_reward :  0.0


  0%|          | 5339/2000001 [51:51<333:00:54,  1.66it/s]

buffer size = 11078, epsilon = 0.09733
mean_reward :  0.0


  0%|          | 5340/2000001 [51:52<335:21:47,  1.65it/s]

buffer size = 11080, epsilon = 0.09733
mean_reward :  0.0


  0%|          | 5341/2000001 [51:53<331:36:38,  1.67it/s]

buffer size = 11082, epsilon = 0.09733
mean_reward :  0.0


  0%|          | 5342/2000001 [51:53<332:21:10,  1.67it/s]

buffer size = 11084, epsilon = 0.09733
mean_reward :  0.0


  0%|          | 5343/2000001 [51:54<369:25:32,  1.50it/s]

buffer size = 11086, epsilon = 0.09733
mean_reward :  0.0


  0%|          | 5344/2000001 [51:55<412:24:05,  1.34it/s]

buffer size = 11088, epsilon = 0.09733
mean_reward :  0.0


  0%|          | 5345/2000001 [51:56<433:31:37,  1.28it/s]

buffer size = 11090, epsilon = 0.09733
mean_reward :  0.0


  0%|          | 5346/2000001 [51:57<427:30:16,  1.30it/s]

buffer size = 11092, epsilon = 0.09733
mean_reward :  0.0


  0%|          | 5347/2000001 [51:57<397:59:13,  1.39it/s]

buffer size = 11094, epsilon = 0.09733
mean_reward :  0.0


  0%|          | 5348/2000001 [51:58<376:41:22,  1.47it/s]

buffer size = 11096, epsilon = 0.09733
mean_reward :  0.0


  0%|          | 5349/2000001 [51:58<365:00:00,  1.52it/s]

buffer size = 11098, epsilon = 0.09733
mean_reward :  0.0


  0%|          | 5350/2000001 [51:59<357:19:49,  1.55it/s]

buffer size = 11100, epsilon = 0.09733
mean_reward :  0.0


  0%|          | 5351/2000001 [52:00<347:04:58,  1.60it/s]

buffer size = 11102, epsilon = 0.09733
mean_reward :  0.0


  0%|          | 5352/2000001 [52:00<343:50:11,  1.61it/s]

buffer size = 11104, epsilon = 0.09732
mean_reward :  0.0


  0%|          | 5353/2000001 [52:01<338:57:14,  1.63it/s]

buffer size = 11106, epsilon = 0.09732
mean_reward :  0.0


  0%|          | 5354/2000001 [52:01<334:25:43,  1.66it/s]

buffer size = 11108, epsilon = 0.09732
mean_reward :  0.0


  0%|          | 5355/2000001 [52:02<332:37:01,  1.67it/s]

buffer size = 11110, epsilon = 0.09732
mean_reward :  0.0


  0%|          | 5356/2000001 [52:02<332:32:17,  1.67it/s]

buffer size = 11112, epsilon = 0.09732
mean_reward :  0.0


  0%|          | 5357/2000001 [52:03<334:00:12,  1.66it/s]

buffer size = 11114, epsilon = 0.09732
mean_reward :  0.0


  0%|          | 5358/2000001 [52:04<331:55:32,  1.67it/s]

buffer size = 11116, epsilon = 0.09732
mean_reward :  0.0


  0%|          | 5359/2000001 [52:04<333:58:00,  1.66it/s]

buffer size = 11118, epsilon = 0.09732
mean_reward :  0.0


  0%|          | 5360/2000001 [52:05<331:24:18,  1.67it/s]

buffer size = 11120, epsilon = 0.09732
mean_reward :  0.0


  0%|          | 5361/2000001 [52:05<332:27:48,  1.67it/s]

buffer size = 11122, epsilon = 0.09732
mean_reward :  0.0


  0%|          | 5362/2000001 [52:06<332:29:57,  1.67it/s]

buffer size = 11124, epsilon = 0.09732
mean_reward :  0.0


  0%|          | 5363/2000001 [52:07<362:09:08,  1.53it/s]

buffer size = 11126, epsilon = 0.09732
mean_reward :  0.0


  0%|          | 5364/2000001 [52:08<394:09:15,  1.41it/s]

buffer size = 11128, epsilon = 0.09732
mean_reward :  0.0


  0%|          | 5365/2000001 [52:09<419:44:11,  1.32it/s]

buffer size = 11130, epsilon = 0.09732
mean_reward :  0.0


  0%|          | 5366/2000001 [52:09<436:33:33,  1.27it/s]

buffer size = 11132, epsilon = 0.09732
mean_reward :  0.0


  0%|          | 5367/2000001 [52:10<404:28:14,  1.37it/s]

buffer size = 11134, epsilon = 0.09732
mean_reward :  0.0


  0%|          | 5368/2000001 [52:11<381:57:28,  1.45it/s]

buffer size = 11136, epsilon = 0.09732
mean_reward :  0.0


  0%|          | 5369/2000001 [52:11<365:55:37,  1.51it/s]

buffer size = 11138, epsilon = 0.09732
mean_reward :  0.0


  0%|          | 5370/2000001 [52:12<360:32:15,  1.54it/s]

buffer size = 11140, epsilon = 0.09732
mean_reward :  0.0


  0%|          | 5371/2000001 [52:12<348:09:31,  1.59it/s]

buffer size = 11142, epsilon = 0.09732
mean_reward :  0.0


  0%|          | 5372/2000001 [52:13<345:14:02,  1.60it/s]

buffer size = 11144, epsilon = 0.09731
mean_reward :  0.0


  0%|          | 5373/2000001 [52:14<344:02:07,  1.61it/s]

buffer size = 11146, epsilon = 0.09731
mean_reward :  0.0


  0%|          | 5374/2000001 [52:14<335:29:10,  1.65it/s]

buffer size = 11148, epsilon = 0.09731
mean_reward :  0.0


  0%|          | 5375/2000001 [52:15<332:53:10,  1.66it/s]

buffer size = 11150, epsilon = 0.09731
mean_reward :  0.0


  0%|          | 5376/2000001 [52:15<330:36:26,  1.68it/s]

buffer size = 11152, epsilon = 0.09731
mean_reward :  0.0


  0%|          | 5377/2000001 [52:16<332:18:10,  1.67it/s]

buffer size = 11154, epsilon = 0.09731
mean_reward :  0.0


  0%|          | 5378/2000001 [52:17<327:18:11,  1.69it/s]

buffer size = 11156, epsilon = 0.09731
mean_reward :  0.0


  0%|          | 5379/2000001 [52:17<328:18:11,  1.69it/s]

buffer size = 11158, epsilon = 0.09731
mean_reward :  0.0


  0%|          | 5380/2000001 [52:18<328:25:18,  1.69it/s]

buffer size = 11160, epsilon = 0.09731
mean_reward :  0.0


  0%|          | 5381/2000001 [52:18<326:48:32,  1.70it/s]

buffer size = 11162, epsilon = 0.09731
mean_reward :  0.0


  0%|          | 5382/2000001 [52:19<327:55:56,  1.69it/s]

buffer size = 11164, epsilon = 0.09731
mean_reward :  0.0


  0%|          | 5383/2000001 [52:20<357:24:20,  1.55it/s]

buffer size = 11166, epsilon = 0.09731
mean_reward :  0.0


  0%|          | 5384/2000001 [52:21<384:03:18,  1.44it/s]

buffer size = 11168, epsilon = 0.09731
mean_reward :  0.0


  0%|          | 5385/2000001 [52:21<399:30:56,  1.39it/s]

buffer size = 11170, epsilon = 0.09731
mean_reward :  0.0


  0%|          | 5386/2000001 [52:22<421:34:04,  1.31it/s]

buffer size = 11172, epsilon = 0.09731
mean_reward :  0.0


  0%|          | 5387/2000001 [52:23<412:39:24,  1.34it/s]

buffer size = 11174, epsilon = 0.09731
mean_reward :  0.0


  0%|          | 5388/2000001 [52:23<390:17:05,  1.42it/s]

buffer size = 11176, epsilon = 0.09731
mean_reward :  0.0


  0%|          | 5389/2000001 [52:24<375:46:37,  1.47it/s]

buffer size = 11178, epsilon = 0.09731
mean_reward :  0.0


  0%|          | 5390/2000001 [52:25<363:26:03,  1.52it/s]

buffer size = 11180, epsilon = 0.09731
mean_reward :  0.0


  0%|          | 5391/2000001 [52:25<358:47:29,  1.54it/s]

buffer size = 11182, epsilon = 0.09731
mean_reward :  0.0


  0%|          | 5392/2000001 [52:26<349:24:22,  1.59it/s]

buffer size = 11184, epsilon = 0.09730
mean_reward :  0.0


  0%|          | 5393/2000001 [52:27<344:14:46,  1.61it/s]

buffer size = 11186, epsilon = 0.09730
mean_reward :  0.0


  0%|          | 5394/2000001 [52:27<336:10:43,  1.65it/s]

buffer size = 11188, epsilon = 0.09730
mean_reward :  0.0


  0%|          | 5395/2000001 [52:28<334:53:49,  1.65it/s]

buffer size = 11190, epsilon = 0.09730
mean_reward :  0.0


  0%|          | 5396/2000001 [52:28<332:25:58,  1.67it/s]

buffer size = 11192, epsilon = 0.09730
mean_reward :  0.0


  0%|          | 5397/2000001 [52:29<333:50:02,  1.66it/s]

buffer size = 11194, epsilon = 0.09730
mean_reward :  0.0


  0%|          | 5398/2000001 [52:29<333:58:25,  1.66it/s]

buffer size = 11196, epsilon = 0.09730
mean_reward :  0.0


  0%|          | 5399/2000001 [52:30<332:07:19,  1.67it/s]

buffer size = 11198, epsilon = 0.09730
mean_reward :  0.0


  0%|          | 5400/2000001 [52:31<332:07:03,  1.67it/s]

buffer size = 11200, epsilon = 0.09730
mean_reward :  0.0


  0%|          | 5401/2000001 [52:31<329:01:33,  1.68it/s]

buffer size = 11202, epsilon = 0.09730
mean_reward :  0.0


  0%|          | 5402/2000001 [52:32<331:38:08,  1.67it/s]

buffer size = 11204, epsilon = 0.09730
mean_reward :  0.0


  0%|          | 5403/2000001 [52:32<330:32:36,  1.68it/s]

buffer size = 11206, epsilon = 0.09730
mean_reward :  0.0


  0%|          | 5404/2000001 [52:33<362:00:30,  1.53it/s]

buffer size = 11208, epsilon = 0.09730
mean_reward :  0.0


  0%|          | 5405/2000001 [52:34<391:07:09,  1.42it/s]

buffer size = 11210, epsilon = 0.09730
mean_reward :  0.0


  0%|          | 5406/2000001 [52:35<419:52:01,  1.32it/s]

buffer size = 11212, epsilon = 0.09730
mean_reward :  0.0


  0%|          | 5407/2000001 [52:36<434:49:51,  1.27it/s]

buffer size = 11214, epsilon = 0.09730
mean_reward :  0.0


  0%|          | 5408/2000001 [52:36<408:20:46,  1.36it/s]

buffer size = 11216, epsilon = 0.09730
mean_reward :  0.0


  0%|          | 5409/2000001 [52:37<385:35:23,  1.44it/s]

buffer size = 11218, epsilon = 0.09730
mean_reward :  0.0


  0%|          | 5410/2000001 [52:38<369:10:21,  1.50it/s]

buffer size = 11220, epsilon = 0.09730
mean_reward :  0.0


  0%|          | 5411/2000001 [52:38<357:51:25,  1.55it/s]

buffer size = 11222, epsilon = 0.09730
mean_reward :  0.0


  0%|          | 5412/2000001 [52:39<348:59:05,  1.59it/s]

buffer size = 11224, epsilon = 0.09729
mean_reward :  0.0


  0%|          | 5413/2000001 [52:39<343:57:53,  1.61it/s]

buffer size = 11226, epsilon = 0.09729
mean_reward :  0.0


  0%|          | 5414/2000001 [52:40<339:49:25,  1.63it/s]

buffer size = 11228, epsilon = 0.09729
mean_reward :  0.0


  0%|          | 5415/2000001 [52:41<336:19:44,  1.65it/s]

buffer size = 11230, epsilon = 0.09729
mean_reward :  0.0


  0%|          | 5416/2000001 [52:41<335:21:33,  1.65it/s]

buffer size = 11232, epsilon = 0.09729
mean_reward :  0.0


  0%|          | 5417/2000001 [52:42<332:05:17,  1.67it/s]

buffer size = 11234, epsilon = 0.09729
mean_reward :  0.0


  0%|          | 5418/2000001 [52:42<333:17:56,  1.66it/s]

buffer size = 11236, epsilon = 0.09729
mean_reward :  0.0


  0%|          | 5419/2000001 [52:43<329:57:43,  1.68it/s]

buffer size = 11238, epsilon = 0.09729
mean_reward :  0.0


  0%|          | 5420/2000001 [52:44<334:22:34,  1.66it/s]

buffer size = 11240, epsilon = 0.09729
mean_reward :  0.0


  0%|          | 5421/2000001 [52:44<334:41:49,  1.66it/s]

buffer size = 11242, epsilon = 0.09729
mean_reward :  0.0


  0%|          | 5422/2000001 [52:45<333:45:53,  1.66it/s]

buffer size = 11244, epsilon = 0.09729
mean_reward :  0.0


  0%|          | 5423/2000001 [52:45<336:37:14,  1.65it/s]

buffer size = 11246, epsilon = 0.09729
mean_reward :  0.0


  0%|          | 5424/2000001 [52:46<342:06:17,  1.62it/s]

buffer size = 11248, epsilon = 0.09729
mean_reward :  0.0


  0%|          | 5425/2000001 [52:47<375:06:10,  1.48it/s]

buffer size = 11250, epsilon = 0.09729
mean_reward :  0.0


  0%|          | 5426/2000001 [52:48<392:32:45,  1.41it/s]

buffer size = 11252, epsilon = 0.09729
mean_reward :  0.0


  0%|          | 5427/2000001 [52:48<406:28:29,  1.36it/s]

buffer size = 11254, epsilon = 0.09729
mean_reward :  0.0


  0%|          | 5428/2000001 [52:49<440:00:04,  1.26it/s]

buffer size = 11256, epsilon = 0.09729
mean_reward :  0.0


  0%|          | 5429/2000001 [52:50<422:24:16,  1.31it/s]

buffer size = 11258, epsilon = 0.09729
mean_reward :  0.0


  0%|          | 5430/2000001 [52:51<394:46:29,  1.40it/s]

buffer size = 11260, epsilon = 0.09729
mean_reward :  0.0


  0%|          | 5431/2000001 [52:51<376:08:15,  1.47it/s]

buffer size = 11262, epsilon = 0.09729
mean_reward :  0.0


  0%|          | 5432/2000001 [52:52<364:38:39,  1.52it/s]

buffer size = 11264, epsilon = 0.09728
mean_reward :  0.0


  0%|          | 5433/2000001 [52:52<352:23:05,  1.57it/s]

buffer size = 11266, epsilon = 0.09728
mean_reward :  0.0


  0%|          | 5434/2000001 [52:53<348:56:03,  1.59it/s]

buffer size = 11268, epsilon = 0.09728
mean_reward :  0.0


  0%|          | 5435/2000001 [52:54<345:46:35,  1.60it/s]

buffer size = 11270, epsilon = 0.09728
mean_reward :  0.0


  0%|          | 5436/2000001 [52:54<351:27:33,  1.58it/s]

buffer size = 11272, epsilon = 0.09728
mean_reward :  0.0


  0%|          | 5437/2000001 [52:55<349:54:34,  1.58it/s]

buffer size = 11274, epsilon = 0.09728
mean_reward :  0.0


  0%|          | 5438/2000001 [52:56<343:02:41,  1.62it/s]

buffer size = 11276, epsilon = 0.09728
mean_reward :  0.0


  0%|          | 5439/2000001 [52:56<341:38:52,  1.62it/s]

buffer size = 11278, epsilon = 0.09728
mean_reward :  0.0


  0%|          | 5440/2000001 [52:57<336:29:07,  1.65it/s]

buffer size = 11280, epsilon = 0.09728
mean_reward :  0.0


  0%|          | 5441/2000001 [52:57<336:14:30,  1.65it/s]

buffer size = 11282, epsilon = 0.09728
mean_reward :  0.0


  0%|          | 5442/2000001 [52:58<334:21:54,  1.66it/s]

buffer size = 11284, epsilon = 0.09728
mean_reward :  0.0


  0%|          | 5443/2000001 [52:59<331:50:23,  1.67it/s]

buffer size = 11286, epsilon = 0.09728
mean_reward :  0.0


  0%|          | 5444/2000001 [52:59<332:22:54,  1.67it/s]

buffer size = 11288, epsilon = 0.09728
mean_reward :  0.0


  0%|          | 5445/2000001 [53:00<331:01:19,  1.67it/s]

buffer size = 11290, epsilon = 0.09728
mean_reward :  0.0


  0%|          | 5446/2000001 [53:01<364:09:47,  1.52it/s]

buffer size = 11292, epsilon = 0.09728
mean_reward :  0.0


  0%|          | 5447/2000001 [53:01<383:32:32,  1.44it/s]

buffer size = 11294, epsilon = 0.09728
mean_reward :  0.0


  0%|          | 5448/2000001 [53:02<396:25:52,  1.40it/s]

buffer size = 11296, epsilon = 0.09728
mean_reward :  0.0


  0%|          | 5449/2000001 [53:03<417:44:28,  1.33it/s]

buffer size = 11298, epsilon = 0.09728
mean_reward :  0.0


  0%|          | 5450/2000001 [53:04<412:53:02,  1.34it/s]

buffer size = 11300, epsilon = 0.09728
mean_reward :  0.0


  0%|          | 5451/2000001 [53:04<391:40:04,  1.41it/s]

buffer size = 11302, epsilon = 0.09728
mean_reward :  0.0


  0%|          | 5452/2000001 [53:05<372:31:53,  1.49it/s]

buffer size = 11304, epsilon = 0.09727
mean_reward :  0.0


  0%|          | 5453/2000001 [53:05<361:47:44,  1.53it/s]

buffer size = 11306, epsilon = 0.09727
mean_reward :  0.0


  0%|          | 5454/2000001 [53:06<358:32:01,  1.55it/s]

buffer size = 11308, epsilon = 0.09727
mean_reward :  0.0


  0%|          | 5455/2000001 [53:07<351:57:54,  1.57it/s]

buffer size = 11310, epsilon = 0.09727
mean_reward :  0.0


  0%|          | 5456/2000001 [53:07<342:52:06,  1.62it/s]

buffer size = 11312, epsilon = 0.09727
mean_reward :  0.0


  0%|          | 5457/2000001 [53:08<340:34:44,  1.63it/s]

buffer size = 11314, epsilon = 0.09727
mean_reward :  0.0


  0%|          | 5458/2000001 [53:09<340:42:13,  1.63it/s]

buffer size = 11316, epsilon = 0.09727
mean_reward :  0.0


  0%|          | 5459/2000001 [53:09<336:38:18,  1.65it/s]

buffer size = 11318, epsilon = 0.09727
mean_reward :  0.0


  0%|          | 5460/2000001 [53:10<337:09:12,  1.64it/s]

buffer size = 11320, epsilon = 0.09727
mean_reward :  0.0


  0%|          | 5461/2000001 [53:10<332:30:16,  1.67it/s]

buffer size = 11322, epsilon = 0.09727
mean_reward :  0.0


  0%|          | 5462/2000001 [53:11<330:22:08,  1.68it/s]

buffer size = 11324, epsilon = 0.09727
mean_reward :  0.0


  0%|          | 5463/2000001 [53:11<331:23:24,  1.67it/s]

buffer size = 11326, epsilon = 0.09727
mean_reward :  0.0


  0%|          | 5464/2000001 [53:12<334:05:07,  1.66it/s]

buffer size = 11328, epsilon = 0.09727
mean_reward :  0.0


  0%|          | 5465/2000001 [53:13<335:31:11,  1.65it/s]

buffer size = 11330, epsilon = 0.09727
mean_reward :  0.0


  0%|          | 5466/2000001 [53:13<333:05:13,  1.66it/s]

buffer size = 11332, epsilon = 0.09727
mean_reward :  0.0


  0%|          | 5467/2000001 [53:14<362:43:50,  1.53it/s]

buffer size = 11334, epsilon = 0.09727
mean_reward :  0.0


  0%|          | 5468/2000001 [53:15<384:34:23,  1.44it/s]

buffer size = 11336, epsilon = 0.09727
mean_reward :  0.0


  0%|          | 5469/2000001 [53:16<420:55:50,  1.32it/s]

buffer size = 11338, epsilon = 0.09727
mean_reward :  0.0


  0%|          | 5470/2000001 [53:17<439:32:23,  1.26it/s]

buffer size = 11340, epsilon = 0.09727
mean_reward :  0.0


  0%|          | 5471/2000001 [53:17<411:02:31,  1.35it/s]

buffer size = 11342, epsilon = 0.09727
mean_reward :  0.0


  0%|          | 5472/2000001 [53:18<389:26:40,  1.42it/s]

buffer size = 11344, epsilon = 0.09726
mean_reward :  0.0


  0%|          | 5473/2000001 [53:18<368:14:19,  1.50it/s]

buffer size = 11346, epsilon = 0.09726
mean_reward :  0.0


  0%|          | 5474/2000001 [53:19<358:19:17,  1.55it/s]

buffer size = 11348, epsilon = 0.09726
mean_reward :  0.0


  0%|          | 5475/2000001 [53:20<352:58:49,  1.57it/s]

buffer size = 11350, epsilon = 0.09726
mean_reward :  0.0


  0%|          | 5476/2000001 [53:20<346:50:59,  1.60it/s]

buffer size = 11352, epsilon = 0.09726
mean_reward :  0.0


  0%|          | 5477/2000001 [53:21<338:39:52,  1.64it/s]

buffer size = 11354, epsilon = 0.09726
mean_reward :  0.0


  0%|          | 5478/2000001 [53:21<338:37:52,  1.64it/s]

buffer size = 11356, epsilon = 0.09726
mean_reward :  0.0


  0%|          | 5479/2000001 [53:22<335:51:50,  1.65it/s]

buffer size = 11358, epsilon = 0.09726
mean_reward :  0.0


  0%|          | 5480/2000001 [53:23<335:37:09,  1.65it/s]

buffer size = 11360, epsilon = 0.09726
mean_reward :  0.0


  0%|          | 5481/2000001 [53:23<336:13:45,  1.65it/s]

buffer size = 11362, epsilon = 0.09726
mean_reward :  0.0


  0%|          | 5482/2000001 [53:24<332:37:46,  1.67it/s]

buffer size = 11364, epsilon = 0.09726
mean_reward :  0.0


  0%|          | 5483/2000001 [53:25<339:48:17,  1.63it/s]

buffer size = 11366, epsilon = 0.09726
mean_reward :  0.0


  0%|          | 5484/2000001 [53:25<335:34:14,  1.65it/s]

buffer size = 11368, epsilon = 0.09726
mean_reward :  0.0


  0%|          | 5485/2000001 [53:26<336:58:32,  1.64it/s]

buffer size = 11370, epsilon = 0.09726
mean_reward :  0.0


  0%|          | 5486/2000001 [53:26<339:09:41,  1.63it/s]

buffer size = 11372, epsilon = 0.09726
mean_reward :  0.0


  0%|          | 5487/2000001 [53:27<360:59:09,  1.53it/s]

buffer size = 11374, epsilon = 0.09726
mean_reward :  0.0


  0%|          | 5488/2000001 [53:28<382:20:38,  1.45it/s]

buffer size = 11376, epsilon = 0.09726
mean_reward :  0.0


  0%|          | 5489/2000001 [53:29<395:59:17,  1.40it/s]

buffer size = 11378, epsilon = 0.09726
mean_reward :  0.0


  0%|          | 5490/2000001 [53:29<416:23:18,  1.33it/s]

buffer size = 11380, epsilon = 0.09726
mean_reward :  0.0


  0%|          | 5491/2000001 [53:30<427:39:24,  1.30it/s]

buffer size = 11382, epsilon = 0.09726
mean_reward :  0.0


  0%|          | 5492/2000001 [53:31<400:14:22,  1.38it/s]

buffer size = 11384, epsilon = 0.09725
mean_reward :  0.0


  0%|          | 5493/2000001 [53:31<377:45:19,  1.47it/s]

buffer size = 11386, epsilon = 0.09725
mean_reward :  0.0


  0%|          | 5494/2000001 [53:32<367:40:48,  1.51it/s]

buffer size = 11388, epsilon = 0.09725
mean_reward :  0.0


  0%|          | 5495/2000001 [53:33<355:42:29,  1.56it/s]

buffer size = 11390, epsilon = 0.09725
mean_reward :  0.0


  0%|          | 5496/2000001 [53:33<348:50:51,  1.59it/s]

buffer size = 11392, epsilon = 0.09725
mean_reward :  0.0


  0%|          | 5497/2000001 [53:34<345:20:01,  1.60it/s]

buffer size = 11394, epsilon = 0.09725
mean_reward :  0.0


  0%|          | 5498/2000001 [53:35<343:10:08,  1.61it/s]

buffer size = 11396, epsilon = 0.09725
mean_reward :  0.0


  0%|          | 5499/2000001 [53:35<339:31:14,  1.63it/s]

buffer size = 11398, epsilon = 0.09725
mean_reward :  0.0


  0%|          | 5500/2000001 [53:36<335:37:56,  1.65it/s]

buffer size = 11400, epsilon = 0.09725
mean_reward :  0.0


  0%|          | 5501/2000001 [53:36<338:09:43,  1.64it/s]

buffer size = 11402, epsilon = 0.09725
mean_reward :  0.0


  0%|          | 5502/2000001 [53:37<337:17:44,  1.64it/s]

buffer size = 11404, epsilon = 0.09725
mean_reward :  0.0


  0%|          | 5503/2000001 [53:38<334:54:06,  1.65it/s]

buffer size = 11406, epsilon = 0.09725
mean_reward :  0.0


  0%|          | 5504/2000001 [53:38<335:14:49,  1.65it/s]

buffer size = 11408, epsilon = 0.09725
mean_reward :  0.0


  0%|          | 5505/2000001 [53:39<335:37:25,  1.65it/s]

buffer size = 11410, epsilon = 0.09725
mean_reward :  0.0


  0%|          | 5506/2000001 [53:39<337:28:26,  1.64it/s]

buffer size = 11412, epsilon = 0.09725
mean_reward :  0.0


  0%|          | 5507/2000001 [53:40<335:59:31,  1.65it/s]

buffer size = 11414, epsilon = 0.09725
mean_reward :  0.0


  0%|          | 5508/2000001 [53:41<366:22:35,  1.51it/s]

buffer size = 11416, epsilon = 0.09725
mean_reward :  0.0


  0%|          | 5509/2000001 [53:42<400:45:28,  1.38it/s]

buffer size = 11418, epsilon = 0.09725
mean_reward :  0.0


  0%|          | 5510/2000001 [53:43<430:27:30,  1.29it/s]

buffer size = 11420, epsilon = 0.09725
mean_reward :  0.0


  0%|          | 5511/2000001 [53:43<428:06:24,  1.29it/s]

buffer size = 11422, epsilon = 0.09725
mean_reward :  0.0


  0%|          | 5512/2000001 [53:44<398:03:58,  1.39it/s]

buffer size = 11424, epsilon = 0.09724
mean_reward :  0.0


  0%|          | 5513/2000001 [53:44<379:58:14,  1.46it/s]

buffer size = 11426, epsilon = 0.09724
mean_reward :  0.0


  0%|          | 5514/2000001 [53:45<366:26:05,  1.51it/s]

buffer size = 11428, epsilon = 0.09724
mean_reward :  0.0


  0%|          | 5515/2000001 [53:46<354:33:10,  1.56it/s]

buffer size = 11430, epsilon = 0.09724
mean_reward :  0.0


  0%|          | 5516/2000001 [53:46<347:33:51,  1.59it/s]

buffer size = 11432, epsilon = 0.09724
mean_reward :  0.0


  0%|          | 5517/2000001 [53:47<342:51:38,  1.62it/s]

buffer size = 11434, epsilon = 0.09724
mean_reward :  0.0


  0%|          | 5518/2000001 [53:47<339:17:37,  1.63it/s]

buffer size = 11436, epsilon = 0.09724
mean_reward :  0.0


  0%|          | 5519/2000001 [53:48<338:29:42,  1.64it/s]

buffer size = 11438, epsilon = 0.09724
mean_reward :  0.0


  0%|          | 5520/2000001 [53:49<337:13:34,  1.64it/s]

buffer size = 11440, epsilon = 0.09724
mean_reward :  0.0


  0%|          | 5521/2000001 [53:49<336:52:04,  1.64it/s]

buffer size = 11442, epsilon = 0.09724
mean_reward :  0.0


  0%|          | 5522/2000001 [53:50<335:30:52,  1.65it/s]

buffer size = 11444, epsilon = 0.09724
mean_reward :  0.0


  0%|          | 5523/2000001 [53:51<336:01:46,  1.65it/s]

buffer size = 11446, epsilon = 0.09724
mean_reward :  0.0


  0%|          | 5524/2000001 [53:51<335:40:58,  1.65it/s]

buffer size = 11448, epsilon = 0.09724
mean_reward :  0.0


  0%|          | 5525/2000001 [53:52<334:34:52,  1.66it/s]

buffer size = 11450, epsilon = 0.09724
mean_reward :  0.0


  0%|          | 5526/2000001 [53:52<331:33:39,  1.67it/s]

buffer size = 11452, epsilon = 0.09724
mean_reward :  0.0


  0%|          | 5527/2000001 [53:53<331:05:56,  1.67it/s]

buffer size = 11454, epsilon = 0.09724
mean_reward :  0.0


  0%|          | 5528/2000001 [53:54<366:28:45,  1.51it/s]

buffer size = 11456, epsilon = 0.09724
mean_reward :  0.0


  0%|          | 5529/2000001 [53:54<386:29:16,  1.43it/s]

buffer size = 11458, epsilon = 0.09724
mean_reward :  0.0


  0%|          | 5530/2000001 [53:55<402:22:26,  1.38it/s]

buffer size = 11460, epsilon = 0.09724
mean_reward :  0.0


  0%|          | 5531/2000001 [53:56<422:01:14,  1.31it/s]

buffer size = 11462, epsilon = 0.09724
mean_reward :  0.0


  0%|          | 5532/2000001 [53:57<421:34:23,  1.31it/s]

buffer size = 11464, epsilon = 0.09723
mean_reward :  0.0


  0%|          | 5533/2000001 [53:57<396:43:46,  1.40it/s]

buffer size = 11466, epsilon = 0.09723
mean_reward :  0.0


  0%|          | 5534/2000001 [53:58<376:31:19,  1.47it/s]

buffer size = 11468, epsilon = 0.09723
mean_reward :  0.0


  0%|          | 5535/2000001 [53:59<365:36:09,  1.52it/s]

buffer size = 11470, epsilon = 0.09723
mean_reward :  0.0


  0%|          | 5536/2000001 [53:59<357:41:05,  1.55it/s]

buffer size = 11472, epsilon = 0.09723
mean_reward :  0.0


  0%|          | 5537/2000001 [54:00<348:14:23,  1.59it/s]

buffer size = 11474, epsilon = 0.09723
mean_reward :  0.0


  0%|          | 5538/2000001 [54:00<343:27:59,  1.61it/s]

buffer size = 11476, epsilon = 0.09723
mean_reward :  0.0


  0%|          | 5539/2000001 [54:01<341:27:14,  1.62it/s]

buffer size = 11478, epsilon = 0.09723
mean_reward :  0.0


  0%|          | 5540/2000001 [54:02<338:21:06,  1.64it/s]

buffer size = 11480, epsilon = 0.09723
mean_reward :  0.0


  0%|          | 5541/2000001 [54:02<337:16:26,  1.64it/s]

buffer size = 11482, epsilon = 0.09723
mean_reward :  0.0


  0%|          | 5542/2000001 [54:03<335:08:44,  1.65it/s]

buffer size = 11484, epsilon = 0.09723
mean_reward :  0.0


  0%|          | 5543/2000001 [54:04<336:21:21,  1.65it/s]

buffer size = 11486, epsilon = 0.09723
mean_reward :  0.0


  0%|          | 5544/2000001 [54:04<332:54:43,  1.66it/s]

buffer size = 11488, epsilon = 0.09723
mean_reward :  0.0


  0%|          | 5545/2000001 [54:05<338:01:54,  1.64it/s]

buffer size = 11490, epsilon = 0.09723
mean_reward :  0.0


  0%|          | 5546/2000001 [54:05<336:49:31,  1.64it/s]

buffer size = 11492, epsilon = 0.09723
mean_reward :  0.0


  0%|          | 5547/2000001 [54:06<335:42:10,  1.65it/s]

buffer size = 11494, epsilon = 0.09723
mean_reward :  0.0


  0%|          | 5548/2000001 [54:07<338:00:02,  1.64it/s]

buffer size = 11496, epsilon = 0.09723
mean_reward :  0.0


  0%|          | 5549/2000001 [54:07<365:07:25,  1.52it/s]

buffer size = 11498, epsilon = 0.09723
mean_reward :  0.0


  0%|          | 5550/2000001 [54:08<406:21:07,  1.36it/s]

buffer size = 11500, epsilon = 0.09723
mean_reward :  0.0


  0%|          | 5551/2000001 [54:09<428:42:02,  1.29it/s]

buffer size = 11502, epsilon = 0.09723
mean_reward :  0.0


  0%|          | 5552/2000001 [54:10<433:22:21,  1.28it/s]

buffer size = 11504, epsilon = 0.09722
mean_reward :  0.0


  0%|          | 5553/2000001 [54:11<402:33:54,  1.38it/s]

buffer size = 11506, epsilon = 0.09722
mean_reward :  0.0


  0%|          | 5554/2000001 [54:11<384:07:19,  1.44it/s]

buffer size = 11508, epsilon = 0.09722
mean_reward :  0.0


  0%|          | 5555/2000001 [54:12<368:29:35,  1.50it/s]

buffer size = 11510, epsilon = 0.09722
mean_reward :  0.0


  0%|          | 5556/2000001 [54:12<361:24:38,  1.53it/s]

buffer size = 11512, epsilon = 0.09722
mean_reward :  0.0


  0%|          | 5557/2000001 [54:13<354:28:45,  1.56it/s]

buffer size = 11514, epsilon = 0.09722
mean_reward :  0.0


  0%|          | 5558/2000001 [54:14<346:42:03,  1.60it/s]

buffer size = 11516, epsilon = 0.09722
mean_reward :  0.0


  0%|          | 5559/2000001 [54:14<339:53:48,  1.63it/s]

buffer size = 11518, epsilon = 0.09722
mean_reward :  0.0


  0%|          | 5560/2000001 [54:15<335:00:10,  1.65it/s]

buffer size = 11520, epsilon = 0.09722
mean_reward :  0.0


  0%|          | 5561/2000001 [54:15<334:09:25,  1.66it/s]

buffer size = 11522, epsilon = 0.09722
mean_reward :  0.0


  0%|          | 5562/2000001 [54:16<334:29:54,  1.66it/s]

buffer size = 11524, epsilon = 0.09722
mean_reward :  0.0


  0%|          | 5563/2000001 [54:16<330:46:48,  1.67it/s]

buffer size = 11526, epsilon = 0.09722
mean_reward :  0.0


  0%|          | 5564/2000001 [54:17<334:05:35,  1.66it/s]

buffer size = 11528, epsilon = 0.09722
mean_reward :  0.0


  0%|          | 5565/2000001 [54:18<330:52:11,  1.67it/s]

buffer size = 11530, epsilon = 0.09722
mean_reward :  0.0


  0%|          | 5566/2000001 [54:18<333:05:19,  1.66it/s]

buffer size = 11532, epsilon = 0.09722
mean_reward :  0.0


  0%|          | 5567/2000001 [54:19<332:26:37,  1.67it/s]

buffer size = 11534, epsilon = 0.09722
mean_reward :  0.0


  0%|          | 5568/2000001 [54:20<335:40:45,  1.65it/s]

buffer size = 11536, epsilon = 0.09722
mean_reward :  0.0


  0%|          | 5569/2000001 [54:20<365:38:56,  1.52it/s]

buffer size = 11538, epsilon = 0.09722
mean_reward :  0.0


  0%|          | 5570/2000001 [54:21<385:01:11,  1.44it/s]

buffer size = 11540, epsilon = 0.09722
mean_reward :  0.0


  0%|          | 5571/2000001 [54:22<398:03:27,  1.39it/s]

buffer size = 11542, epsilon = 0.09722
mean_reward :  0.0


  0%|          | 5572/2000001 [54:23<428:37:06,  1.29it/s]

buffer size = 11544, epsilon = 0.09721
mean_reward :  0.0


  0%|          | 5573/2000001 [54:23<413:53:42,  1.34it/s]

buffer size = 11546, epsilon = 0.09721
mean_reward :  0.0


  0%|          | 5574/2000001 [54:24<391:10:04,  1.42it/s]

buffer size = 11548, epsilon = 0.09721
mean_reward :  0.0


  0%|          | 5575/2000001 [54:25<378:28:10,  1.46it/s]

buffer size = 11550, epsilon = 0.09721
mean_reward :  0.0


  0%|          | 5576/2000001 [54:25<365:41:40,  1.51it/s]

buffer size = 11552, epsilon = 0.09721
mean_reward :  0.0


  0%|          | 5577/2000001 [54:26<353:27:29,  1.57it/s]

buffer size = 11554, epsilon = 0.09721
mean_reward :  0.0


  0%|          | 5578/2000001 [54:26<345:30:49,  1.60it/s]

buffer size = 11556, epsilon = 0.09721
mean_reward :  0.0


  0%|          | 5579/2000001 [54:27<341:48:25,  1.62it/s]

buffer size = 11558, epsilon = 0.09721
mean_reward :  0.0


  0%|          | 5580/2000001 [54:28<337:41:27,  1.64it/s]

buffer size = 11560, epsilon = 0.09721
mean_reward :  0.0


  0%|          | 5581/2000001 [54:28<337:51:18,  1.64it/s]

buffer size = 11562, epsilon = 0.09721
mean_reward :  0.0


  0%|          | 5582/2000001 [54:29<337:43:23,  1.64it/s]

buffer size = 11564, epsilon = 0.09721
mean_reward :  0.0


  0%|          | 5583/2000001 [54:29<334:09:40,  1.66it/s]

buffer size = 11566, epsilon = 0.09721
mean_reward :  0.0


  0%|          | 5584/2000001 [54:30<334:08:01,  1.66it/s]

buffer size = 11568, epsilon = 0.09721
mean_reward :  0.0


  0%|          | 5585/2000001 [54:31<336:40:59,  1.65it/s]

buffer size = 11570, epsilon = 0.09721
mean_reward :  0.0


  0%|          | 5586/2000001 [54:31<333:43:52,  1.66it/s]

buffer size = 11572, epsilon = 0.09721
mean_reward :  0.0


  0%|          | 5587/2000001 [54:32<337:10:49,  1.64it/s]

buffer size = 11574, epsilon = 0.09721
mean_reward :  0.0


  0%|          | 5588/2000001 [54:33<335:51:59,  1.65it/s]

buffer size = 11576, epsilon = 0.09721
mean_reward :  0.0


  0%|          | 5589/2000001 [54:33<340:45:18,  1.63it/s]

buffer size = 11578, epsilon = 0.09721
mean_reward :  0.0


  0%|          | 5590/2000001 [54:34<372:19:57,  1.49it/s]

buffer size = 11580, epsilon = 0.09721
mean_reward :  0.0


  0%|          | 5591/2000001 [54:35<391:46:50,  1.41it/s]

buffer size = 11582, epsilon = 0.09720
mean_reward :  0.0


  0%|          | 5592/2000001 [54:36<414:57:12,  1.34it/s]

buffer size = 11584, epsilon = 0.09720
mean_reward :  0.0


  0%|          | 5593/2000001 [54:36<429:54:57,  1.29it/s]

buffer size = 11586, epsilon = 0.09720
mean_reward :  0.0


  0%|          | 5594/2000001 [54:37<417:40:21,  1.33it/s]

buffer size = 11588, epsilon = 0.09720
mean_reward :  0.0


  0%|          | 5595/2000001 [54:38<387:52:44,  1.43it/s]

buffer size = 11590, epsilon = 0.09720
mean_reward :  0.0


  0%|          | 5596/2000001 [54:38<371:36:55,  1.49it/s]

buffer size = 11592, epsilon = 0.09720
mean_reward :  0.0


  0%|          | 5597/2000001 [54:39<359:06:24,  1.54it/s]

buffer size = 11594, epsilon = 0.09720
mean_reward :  0.0


  0%|          | 5598/2000001 [54:40<354:44:44,  1.56it/s]

buffer size = 11596, epsilon = 0.09720
mean_reward :  0.0


  0%|          | 5599/2000001 [54:40<346:06:41,  1.60it/s]

buffer size = 11598, epsilon = 0.09720
mean_reward :  0.0


  0%|          | 5600/2000001 [54:41<344:29:23,  1.61it/s]

buffer size = 11600, epsilon = 0.09720
mean_reward :  0.0


  0%|          | 5601/2000001 [54:41<339:37:49,  1.63it/s]

buffer size = 11602, epsilon = 0.09720
mean_reward :  0.0


  0%|          | 5602/2000001 [54:42<337:15:12,  1.64it/s]

buffer size = 11604, epsilon = 0.09720
mean_reward :  0.0


  0%|          | 5603/2000001 [54:43<336:53:33,  1.64it/s]

buffer size = 11606, epsilon = 0.09720
mean_reward :  0.0


  0%|          | 5604/2000001 [54:43<336:24:13,  1.65it/s]

buffer size = 11608, epsilon = 0.09720
mean_reward :  0.0


  0%|          | 5605/2000001 [54:44<335:24:49,  1.65it/s]

buffer size = 11610, epsilon = 0.09720
mean_reward :  0.0


  0%|          | 5606/2000001 [54:44<330:34:53,  1.68it/s]

buffer size = 11612, epsilon = 0.09720
mean_reward :  0.0


  0%|          | 5607/2000001 [54:45<331:44:07,  1.67it/s]

buffer size = 11614, epsilon = 0.09720
mean_reward :  0.0


  0%|          | 5608/2000001 [54:46<331:28:48,  1.67it/s]

buffer size = 11616, epsilon = 0.09720
mean_reward :  0.0


  0%|          | 5609/2000001 [54:46<328:44:24,  1.69it/s]

buffer size = 11618, epsilon = 0.09720
mean_reward :  0.0


  0%|          | 5610/2000001 [54:47<330:17:14,  1.68it/s]

buffer size = 11620, epsilon = 0.09720
mean_reward :  0.0


  0%|          | 5611/2000001 [54:48<367:12:32,  1.51it/s]

buffer size = 11622, epsilon = 0.09720
mean_reward :  0.0


  0%|          | 5612/2000001 [54:48<388:47:22,  1.42it/s]

buffer size = 11624, epsilon = 0.09719
mean_reward :  0.0


  0%|          | 5613/2000001 [54:49<412:24:53,  1.34it/s]

buffer size = 11626, epsilon = 0.09719
mean_reward :  0.0


  0%|          | 5614/2000001 [54:50<434:46:45,  1.27it/s]

buffer size = 11628, epsilon = 0.09719
mean_reward :  0.0


  0%|          | 5615/2000001 [54:51<412:57:38,  1.34it/s]

buffer size = 11630, epsilon = 0.09719
mean_reward :  0.0


  0%|          | 5616/2000001 [54:51<392:00:40,  1.41it/s]

buffer size = 11632, epsilon = 0.09719
mean_reward :  0.0


  0%|          | 5617/2000001 [54:52<377:04:09,  1.47it/s]

buffer size = 11634, epsilon = 0.09719
mean_reward :  0.0


  0%|          | 5618/2000001 [54:53<362:56:35,  1.53it/s]

buffer size = 11636, epsilon = 0.09719
mean_reward :  0.0


  0%|          | 5619/2000001 [54:53<358:14:04,  1.55it/s]

buffer size = 11638, epsilon = 0.09719
mean_reward :  0.0


  0%|          | 5620/2000001 [54:54<349:02:04,  1.59it/s]

buffer size = 11640, epsilon = 0.09719
mean_reward :  0.0


  0%|          | 5621/2000001 [54:54<346:04:52,  1.60it/s]

buffer size = 11642, epsilon = 0.09719
mean_reward :  0.0


  0%|          | 5622/2000001 [54:55<351:46:27,  1.57it/s]

buffer size = 11644, epsilon = 0.09719
mean_reward :  0.0


  0%|          | 5623/2000001 [54:56<345:50:11,  1.60it/s]

buffer size = 11646, epsilon = 0.09719
mean_reward :  0.0


  0%|          | 5624/2000001 [54:56<344:04:55,  1.61it/s]

buffer size = 11648, epsilon = 0.09719
mean_reward :  0.0


  0%|          | 5625/2000001 [54:57<341:07:04,  1.62it/s]

buffer size = 11650, epsilon = 0.09719
mean_reward :  0.0


  0%|          | 5626/2000001 [54:57<340:06:44,  1.63it/s]

buffer size = 11652, epsilon = 0.09719
mean_reward :  0.0


  0%|          | 5627/2000001 [54:58<340:32:04,  1.63it/s]

buffer size = 11654, epsilon = 0.09719
mean_reward :  0.0


  0%|          | 5628/2000001 [54:59<338:08:28,  1.64it/s]

buffer size = 11656, epsilon = 0.09719
mean_reward :  0.0


  0%|          | 5629/2000001 [54:59<339:00:12,  1.63it/s]

buffer size = 11658, epsilon = 0.09719
mean_reward :  0.0


  0%|          | 5630/2000001 [55:00<337:28:05,  1.64it/s]

buffer size = 11660, epsilon = 0.09719
mean_reward :  0.0


  0%|          | 5631/2000001 [55:01<359:51:01,  1.54it/s]

buffer size = 11662, epsilon = 0.09719
mean_reward :  0.0


  0%|          | 5632/2000001 [55:01<388:19:40,  1.43it/s]

buffer size = 11664, epsilon = 0.09718
mean_reward :  0.0


  0%|          | 5633/2000001 [55:02<395:56:10,  1.40it/s]

buffer size = 11666, epsilon = 0.09718
mean_reward :  0.0


  0%|          | 5634/2000001 [55:03<415:30:43,  1.33it/s]

buffer size = 11668, epsilon = 0.09718
mean_reward :  0.0


  0%|          | 5635/2000001 [55:04<428:56:11,  1.29it/s]

buffer size = 11670, epsilon = 0.09718
mean_reward :  0.0


  0%|          | 5636/2000001 [55:04<400:47:31,  1.38it/s]

buffer size = 11672, epsilon = 0.09718
mean_reward :  0.0


  0%|          | 5637/2000001 [55:05<384:30:13,  1.44it/s]

buffer size = 11674, epsilon = 0.09718
mean_reward :  0.0


  0%|          | 5638/2000001 [55:06<367:21:30,  1.51it/s]

buffer size = 11676, epsilon = 0.09718
mean_reward :  0.0


  0%|          | 5639/2000001 [55:06<357:44:38,  1.55it/s]

buffer size = 11678, epsilon = 0.09718
mean_reward :  0.0


  0%|          | 5640/2000001 [55:07<354:56:13,  1.56it/s]

buffer size = 11680, epsilon = 0.09718
mean_reward :  0.0


  0%|          | 5641/2000001 [55:08<350:57:28,  1.58it/s]

buffer size = 11682, epsilon = 0.09718
mean_reward :  0.0


  0%|          | 5642/2000001 [55:08<347:31:24,  1.59it/s]

buffer size = 11684, epsilon = 0.09718
mean_reward :  0.0


  0%|          | 5643/2000001 [55:09<347:15:37,  1.60it/s]

buffer size = 11686, epsilon = 0.09718
mean_reward :  0.0


  0%|          | 5644/2000001 [55:09<346:05:59,  1.60it/s]

buffer size = 11688, epsilon = 0.09718
mean_reward :  0.0


  0%|          | 5645/2000001 [55:10<343:08:48,  1.61it/s]

buffer size = 11690, epsilon = 0.09718
mean_reward :  0.0


  0%|          | 5646/2000001 [55:11<341:52:48,  1.62it/s]

buffer size = 11692, epsilon = 0.09718
mean_reward :  0.0


  0%|          | 5647/2000001 [55:11<341:51:42,  1.62it/s]

buffer size = 11694, epsilon = 0.09718
mean_reward :  0.0


  0%|          | 5648/2000001 [55:12<340:16:10,  1.63it/s]

buffer size = 11696, epsilon = 0.09718
mean_reward :  0.0


  0%|          | 5649/2000001 [55:12<339:05:41,  1.63it/s]

buffer size = 11698, epsilon = 0.09718
mean_reward :  0.0


  0%|          | 5650/2000001 [55:13<341:28:14,  1.62it/s]

buffer size = 11700, epsilon = 0.09718
mean_reward :  0.0


  0%|          | 5651/2000001 [55:14<338:35:29,  1.64it/s]

buffer size = 11702, epsilon = 0.09718
mean_reward :  0.0


  0%|          | 5652/2000001 [55:14<371:59:07,  1.49it/s]

buffer size = 11704, epsilon = 0.09717
mean_reward :  0.0


  0%|          | 5653/2000001 [55:15<402:18:59,  1.38it/s]

buffer size = 11706, epsilon = 0.09717
mean_reward :  0.0


  0%|          | 5654/2000001 [55:16<433:44:33,  1.28it/s]

buffer size = 11708, epsilon = 0.09717
mean_reward :  0.0


  0%|          | 5655/2000001 [55:17<432:37:31,  1.28it/s]

buffer size = 11710, epsilon = 0.09717
mean_reward :  0.0


  0%|          | 5656/2000001 [55:18<405:39:26,  1.37it/s]

buffer size = 11712, epsilon = 0.09717
mean_reward :  0.0


  0%|          | 5657/2000001 [55:18<386:12:08,  1.43it/s]

buffer size = 11714, epsilon = 0.09717
mean_reward :  0.0


  0%|          | 5658/2000001 [55:19<370:46:58,  1.49it/s]

buffer size = 11716, epsilon = 0.09717
mean_reward :  0.0


  0%|          | 5659/2000001 [55:19<363:35:11,  1.52it/s]

buffer size = 11718, epsilon = 0.09717
mean_reward :  0.0


  0%|          | 5660/2000001 [55:20<356:30:13,  1.55it/s]

buffer size = 11720, epsilon = 0.09717
mean_reward :  0.0


  0%|          | 5661/2000001 [55:21<352:18:00,  1.57it/s]

buffer size = 11722, epsilon = 0.09717
mean_reward :  0.0


  0%|          | 5662/2000001 [55:21<348:52:02,  1.59it/s]

buffer size = 11724, epsilon = 0.09717
mean_reward :  0.0


  0%|          | 5663/2000001 [55:22<346:06:18,  1.60it/s]

buffer size = 11726, epsilon = 0.09717
mean_reward :  0.0


  0%|          | 5664/2000001 [55:23<345:24:10,  1.60it/s]

buffer size = 11728, epsilon = 0.09717
mean_reward :  0.0


  0%|          | 5665/2000001 [55:23<341:35:45,  1.62it/s]

buffer size = 11730, epsilon = 0.09717
mean_reward :  0.0


  0%|          | 5666/2000001 [55:24<343:02:36,  1.61it/s]

buffer size = 11732, epsilon = 0.09717
mean_reward :  0.0


  0%|          | 5667/2000001 [55:24<341:07:27,  1.62it/s]

buffer size = 11734, epsilon = 0.09717
mean_reward :  0.0


  0%|          | 5668/2000001 [55:25<342:08:56,  1.62it/s]

buffer size = 11736, epsilon = 0.09717
mean_reward :  0.0


  0%|          | 5669/2000001 [55:26<341:52:23,  1.62it/s]

buffer size = 11738, epsilon = 0.09717
mean_reward :  0.0


  0%|          | 5670/2000001 [55:26<337:20:31,  1.64it/s]

buffer size = 11740, epsilon = 0.09717
mean_reward :  0.0


  0%|          | 5671/2000001 [55:27<336:21:26,  1.65it/s]

buffer size = 11742, epsilon = 0.09717
mean_reward :  0.0


  0%|          | 5672/2000001 [55:28<371:34:10,  1.49it/s]

buffer size = 11744, epsilon = 0.09716
mean_reward :  0.0


  0%|          | 5673/2000001 [55:29<406:45:33,  1.36it/s]

buffer size = 11746, epsilon = 0.09716
mean_reward :  0.0


  0%|          | 5674/2000001 [55:29<436:16:49,  1.27it/s]

buffer size = 11748, epsilon = 0.09716
mean_reward :  0.0


  0%|          | 5675/2000001 [55:30<438:28:29,  1.26it/s]

buffer size = 11750, epsilon = 0.09716
mean_reward :  0.0


  0%|          | 5676/2000001 [55:31<408:05:27,  1.36it/s]

buffer size = 11752, epsilon = 0.09716
mean_reward :  0.0


  0%|          | 5677/2000001 [55:31<386:08:49,  1.43it/s]

buffer size = 11754, epsilon = 0.09716
mean_reward :  0.0


  0%|          | 5678/2000001 [55:32<373:01:26,  1.49it/s]

buffer size = 11756, epsilon = 0.09716
mean_reward :  0.0


  0%|          | 5679/2000001 [55:33<360:32:44,  1.54it/s]

buffer size = 11758, epsilon = 0.09716
mean_reward :  0.0


  0%|          | 5680/2000001 [55:33<355:34:10,  1.56it/s]

buffer size = 11760, epsilon = 0.09716
mean_reward :  0.0


  0%|          | 5681/2000001 [55:34<349:51:40,  1.58it/s]

buffer size = 11762, epsilon = 0.09716
mean_reward :  0.0


  0%|          | 5682/2000001 [55:35<347:53:37,  1.59it/s]

buffer size = 11764, epsilon = 0.09716
mean_reward :  0.0


  0%|          | 5683/2000001 [55:35<341:50:40,  1.62it/s]

buffer size = 11766, epsilon = 0.09716
mean_reward :  0.0


  0%|          | 5684/2000001 [55:36<343:22:48,  1.61it/s]

buffer size = 11768, epsilon = 0.09716
mean_reward :  0.0


  0%|          | 5685/2000001 [55:36<342:37:56,  1.62it/s]

buffer size = 11770, epsilon = 0.09716
mean_reward :  0.0


  0%|          | 5686/2000001 [55:37<338:02:58,  1.64it/s]

buffer size = 11772, epsilon = 0.09716
mean_reward :  0.0


  0%|          | 5687/2000001 [55:38<336:15:55,  1.65it/s]

buffer size = 11774, epsilon = 0.09716
mean_reward :  0.0


  0%|          | 5688/2000001 [55:38<333:48:32,  1.66it/s]

buffer size = 11776, epsilon = 0.09716
mean_reward :  0.0


  0%|          | 5689/2000001 [55:39<331:09:39,  1.67it/s]

buffer size = 11778, epsilon = 0.09716
mean_reward :  0.0


  0%|          | 5690/2000001 [55:39<330:37:41,  1.68it/s]

buffer size = 11780, epsilon = 0.09716
mean_reward :  0.0


  0%|          | 5691/2000001 [55:40<331:10:20,  1.67it/s]

buffer size = 11782, epsilon = 0.09716
mean_reward :  0.0


  0%|          | 5692/2000001 [55:41<365:47:40,  1.51it/s]

buffer size = 11784, epsilon = 0.09715
mean_reward :  0.0


  0%|          | 5693/2000001 [55:41<383:47:08,  1.44it/s]

buffer size = 11786, epsilon = 0.09715
mean_reward :  0.0


  0%|          | 5694/2000001 [55:42<398:00:58,  1.39it/s]

buffer size = 11788, epsilon = 0.09715
mean_reward :  0.0


  0%|          | 5695/2000001 [55:43<423:25:57,  1.31it/s]

buffer size = 11790, epsilon = 0.09715
mean_reward :  0.0


  0%|          | 5696/2000001 [55:44<416:36:49,  1.33it/s]

buffer size = 11792, epsilon = 0.09715
mean_reward :  0.0


  0%|          | 5697/2000001 [55:44<390:57:07,  1.42it/s]

buffer size = 11794, epsilon = 0.09715
mean_reward :  0.0


  0%|          | 5698/2000001 [55:45<376:13:42,  1.47it/s]

buffer size = 11796, epsilon = 0.09715
mean_reward :  0.0


  0%|          | 5699/2000001 [55:46<363:39:30,  1.52it/s]

buffer size = 11798, epsilon = 0.09715
mean_reward :  0.0


  0%|          | 5700/2000001 [55:46<354:45:06,  1.56it/s]

buffer size = 11800, epsilon = 0.09715
mean_reward :  0.0


  0%|          | 5701/2000001 [55:47<349:19:03,  1.59it/s]

buffer size = 11802, epsilon = 0.09715
mean_reward :  0.0


  0%|          | 5702/2000001 [55:47<343:01:59,  1.61it/s]

buffer size = 11804, epsilon = 0.09715
mean_reward :  0.0


  0%|          | 5703/2000001 [55:48<342:14:18,  1.62it/s]

buffer size = 11806, epsilon = 0.09715
mean_reward :  0.0


  0%|          | 5704/2000001 [55:49<341:09:05,  1.62it/s]

buffer size = 11808, epsilon = 0.09715
mean_reward :  0.0


  0%|          | 5705/2000001 [55:49<343:56:03,  1.61it/s]

buffer size = 11810, epsilon = 0.09715
mean_reward :  0.0


  0%|          | 5706/2000001 [55:50<345:50:18,  1.60it/s]

buffer size = 11812, epsilon = 0.09715
mean_reward :  0.0


  0%|          | 5707/2000001 [55:51<345:36:26,  1.60it/s]

buffer size = 11814, epsilon = 0.09715
mean_reward :  0.0


  0%|          | 5708/2000001 [55:51<345:34:56,  1.60it/s]

buffer size = 11816, epsilon = 0.09715
mean_reward :  0.0


  0%|          | 5709/2000001 [55:52<343:38:29,  1.61it/s]

buffer size = 11818, epsilon = 0.09715
mean_reward :  0.0


  0%|          | 5710/2000001 [55:52<345:15:07,  1.60it/s]

buffer size = 11820, epsilon = 0.09715
mean_reward :  0.0


  0%|          | 5711/2000001 [55:53<339:34:11,  1.63it/s]

buffer size = 11822, epsilon = 0.09715
mean_reward :  0.0


  0%|          | 5712/2000001 [55:54<355:27:31,  1.56it/s]

buffer size = 11824, epsilon = 0.09714
mean_reward :  0.0


  0%|          | 5713/2000001 [55:55<388:20:31,  1.43it/s]

buffer size = 11826, epsilon = 0.09714
mean_reward :  0.0


  0%|          | 5714/2000001 [55:55<411:24:50,  1.35it/s]

buffer size = 11828, epsilon = 0.09714
mean_reward :  0.0


  0%|          | 5715/2000001 [55:56<442:49:55,  1.25it/s]

buffer size = 11830, epsilon = 0.09714
mean_reward :  0.0


  0%|          | 5716/2000001 [55:57<432:41:26,  1.28it/s]

buffer size = 11832, epsilon = 0.09714
mean_reward :  0.0


  0%|          | 5717/2000001 [55:58<404:20:09,  1.37it/s]

buffer size = 11834, epsilon = 0.09714
mean_reward :  0.0


  0%|          | 5718/2000001 [55:58<384:19:58,  1.44it/s]

buffer size = 11836, epsilon = 0.09714
mean_reward :  0.0


  0%|          | 5719/2000001 [55:59<371:57:22,  1.49it/s]

buffer size = 11838, epsilon = 0.09714
mean_reward :  0.0


  0%|          | 5720/2000001 [56:00<362:32:53,  1.53it/s]

buffer size = 11840, epsilon = 0.09714
mean_reward :  0.0


  0%|          | 5721/2000001 [56:00<355:13:57,  1.56it/s]

buffer size = 11842, epsilon = 0.09714
mean_reward :  0.0


  0%|          | 5722/2000001 [56:01<350:14:49,  1.58it/s]

buffer size = 11844, epsilon = 0.09714
mean_reward :  0.0


  0%|          | 5723/2000001 [56:01<346:40:18,  1.60it/s]

buffer size = 11846, epsilon = 0.09714
mean_reward :  0.0


  0%|          | 5724/2000001 [56:02<344:22:37,  1.61it/s]

buffer size = 11848, epsilon = 0.09714
mean_reward :  0.0


  0%|          | 5725/2000001 [56:03<339:52:24,  1.63it/s]

buffer size = 11850, epsilon = 0.09714
mean_reward :  0.0


  0%|          | 5726/2000001 [56:03<340:54:35,  1.62it/s]

buffer size = 11852, epsilon = 0.09714
mean_reward :  0.0


  0%|          | 5727/2000001 [56:04<341:29:14,  1.62it/s]

buffer size = 11854, epsilon = 0.09714
mean_reward :  0.0


  0%|          | 5728/2000001 [56:04<339:14:23,  1.63it/s]

buffer size = 11856, epsilon = 0.09714
mean_reward :  0.0


  0%|          | 5729/2000001 [56:05<337:21:20,  1.64it/s]

buffer size = 11858, epsilon = 0.09714
mean_reward :  0.0


  0%|          | 5730/2000001 [56:06<337:38:40,  1.64it/s]

buffer size = 11860, epsilon = 0.09714
mean_reward :  0.0


  0%|          | 5731/2000001 [56:06<338:57:47,  1.63it/s]

buffer size = 11862, epsilon = 0.09713
mean_reward :  0.0


  0%|          | 5732/2000001 [56:07<358:10:44,  1.55it/s]

buffer size = 11864, epsilon = 0.09713
mean_reward :  0.0


  0%|          | 5733/2000001 [56:08<383:19:33,  1.45it/s]

buffer size = 11866, epsilon = 0.09713
mean_reward :  0.0


  0%|          | 5734/2000001 [56:09<397:41:29,  1.39it/s]

buffer size = 11868, epsilon = 0.09713
mean_reward :  0.0


  0%|          | 5735/2000001 [56:09<425:59:51,  1.30it/s]

buffer size = 11870, epsilon = 0.09713
mean_reward :  0.0


  0%|          | 5736/2000001 [56:10<432:59:04,  1.28it/s]

buffer size = 11872, epsilon = 0.09713
mean_reward :  0.0


  0%|          | 5737/2000001 [56:11<403:55:45,  1.37it/s]

buffer size = 11874, epsilon = 0.09713
mean_reward :  0.0


  0%|          | 5738/2000001 [56:11<383:44:40,  1.44it/s]

buffer size = 11876, epsilon = 0.09713
mean_reward :  0.0


  0%|          | 5739/2000001 [56:12<367:35:18,  1.51it/s]

buffer size = 11878, epsilon = 0.09713
mean_reward :  0.0


  0%|          | 5740/2000001 [56:13<359:53:48,  1.54it/s]

buffer size = 11880, epsilon = 0.09713
mean_reward :  0.0


  0%|          | 5741/2000001 [56:13<350:28:03,  1.58it/s]

buffer size = 11882, epsilon = 0.09713
mean_reward :  0.0


  0%|          | 5742/2000001 [56:14<348:03:22,  1.59it/s]

buffer size = 11884, epsilon = 0.09713
mean_reward :  0.0


  0%|          | 5743/2000001 [56:15<345:53:39,  1.60it/s]

buffer size = 11886, epsilon = 0.09713
mean_reward :  0.0


  0%|          | 5744/2000001 [56:15<344:11:43,  1.61it/s]

buffer size = 11888, epsilon = 0.09713
mean_reward :  0.0


  0%|          | 5745/2000001 [56:16<344:22:26,  1.61it/s]

buffer size = 11890, epsilon = 0.09713
mean_reward :  0.0


  0%|          | 5746/2000001 [56:16<338:36:41,  1.64it/s]

buffer size = 11892, epsilon = 0.09713
mean_reward :  0.0


  0%|          | 5747/2000001 [56:17<338:32:40,  1.64it/s]

buffer size = 11894, epsilon = 0.09713
mean_reward :  0.0


  0%|          | 5748/2000001 [56:18<338:41:34,  1.64it/s]

buffer size = 11896, epsilon = 0.09713
mean_reward :  0.0


  0%|          | 5749/2000001 [56:18<337:58:22,  1.64it/s]

buffer size = 11898, epsilon = 0.09713
mean_reward :  0.0


  0%|          | 5750/2000001 [56:19<338:39:35,  1.64it/s]

buffer size = 11900, epsilon = 0.09713
mean_reward :  0.0


  0%|          | 5751/2000001 [56:19<340:47:26,  1.63it/s]

buffer size = 11902, epsilon = 0.09713
mean_reward :  0.0


  0%|          | 5752/2000001 [56:20<338:23:22,  1.64it/s]

buffer size = 11904, epsilon = 0.09712
mean_reward :  0.0


  0%|          | 5753/2000001 [56:21<368:24:35,  1.50it/s]

buffer size = 11906, epsilon = 0.09712
mean_reward :  0.0


  0%|          | 5754/2000001 [56:22<384:37:05,  1.44it/s]

buffer size = 11908, epsilon = 0.09712
mean_reward :  0.0


  0%|          | 5755/2000001 [56:22<398:43:22,  1.39it/s]

buffer size = 11910, epsilon = 0.09712
mean_reward :  0.0


  0%|          | 5756/2000001 [56:23<422:19:51,  1.31it/s]

buffer size = 11912, epsilon = 0.09712
mean_reward :  0.0


  0%|          | 5757/2000001 [56:24<424:26:48,  1.31it/s]

buffer size = 11914, epsilon = 0.09712
mean_reward :  0.0


  0%|          | 5758/2000001 [56:25<397:34:29,  1.39it/s]

buffer size = 11916, epsilon = 0.09712
mean_reward :  0.0


  0%|          | 5759/2000001 [56:25<387:27:36,  1.43it/s]

buffer size = 11918, epsilon = 0.09712
mean_reward :  0.0


  0%|          | 5760/2000001 [56:26<370:26:45,  1.50it/s]

buffer size = 11920, epsilon = 0.09712
mean_reward :  0.0


  0%|          | 5761/2000001 [56:26<361:35:27,  1.53it/s]

buffer size = 11922, epsilon = 0.09712
mean_reward :  0.0


  0%|          | 5762/2000001 [56:27<356:45:54,  1.55it/s]

buffer size = 11924, epsilon = 0.09712
mean_reward :  0.0


  0%|          | 5763/2000001 [56:28<350:30:19,  1.58it/s]

buffer size = 11926, epsilon = 0.09712
mean_reward :  0.0


  0%|          | 5764/2000001 [56:28<348:59:46,  1.59it/s]

buffer size = 11928, epsilon = 0.09712
mean_reward :  0.0


  0%|          | 5765/2000001 [56:29<347:16:47,  1.60it/s]

buffer size = 11930, epsilon = 0.09712
mean_reward :  0.0


  0%|          | 5766/2000001 [56:30<344:19:46,  1.61it/s]

buffer size = 11932, epsilon = 0.09712
mean_reward :  0.0


  0%|          | 5767/2000001 [56:30<343:48:26,  1.61it/s]

buffer size = 11934, epsilon = 0.09712
mean_reward :  0.0


  0%|          | 5768/2000001 [56:31<342:56:52,  1.62it/s]

buffer size = 11936, epsilon = 0.09712
mean_reward :  0.0


  0%|          | 5769/2000001 [56:31<341:26:45,  1.62it/s]

buffer size = 11938, epsilon = 0.09712
mean_reward :  0.0


  0%|          | 5770/2000001 [56:32<339:37:21,  1.63it/s]

buffer size = 11940, epsilon = 0.09712
mean_reward :  0.0


  0%|          | 5771/2000001 [56:33<338:55:13,  1.63it/s]

buffer size = 11942, epsilon = 0.09712
mean_reward :  0.0


  0%|          | 5772/2000001 [56:33<338:02:30,  1.64it/s]

buffer size = 11944, epsilon = 0.09711
mean_reward :  0.0


  0%|          | 5773/2000001 [56:34<340:15:58,  1.63it/s]

buffer size = 11946, epsilon = 0.09711
mean_reward :  0.0


  0%|          | 5774/2000001 [56:35<376:21:31,  1.47it/s]

buffer size = 11948, epsilon = 0.09711
mean_reward :  0.0


  0%|          | 5775/2000001 [56:35<391:36:54,  1.41it/s]

buffer size = 11950, epsilon = 0.09711
mean_reward :  0.0


  0%|          | 5776/2000001 [56:36<409:40:21,  1.35it/s]

buffer size = 11952, epsilon = 0.09711
mean_reward :  0.0


  0%|          | 5777/2000001 [56:37<426:56:34,  1.30it/s]

buffer size = 11954, epsilon = 0.09711
mean_reward :  0.0


  0%|          | 5778/2000001 [56:38<425:08:55,  1.30it/s]

buffer size = 11956, epsilon = 0.09711
mean_reward :  0.0


  0%|          | 5779/2000001 [56:38<398:30:16,  1.39it/s]

buffer size = 11958, epsilon = 0.09711
mean_reward :  0.0


  0%|          | 5780/2000001 [56:39<387:20:33,  1.43it/s]

buffer size = 11960, epsilon = 0.09711
mean_reward :  0.0


  0%|          | 5781/2000001 [56:40<367:59:36,  1.51it/s]

buffer size = 11962, epsilon = 0.09711
mean_reward :  0.0


  0%|          | 5782/2000001 [56:40<360:24:30,  1.54it/s]

buffer size = 11964, epsilon = 0.09711
mean_reward :  0.0


  0%|          | 5783/2000001 [56:41<355:27:56,  1.56it/s]

buffer size = 11966, epsilon = 0.09711
mean_reward :  0.0


  0%|          | 5784/2000001 [56:42<348:34:29,  1.59it/s]

buffer size = 11968, epsilon = 0.09711
mean_reward :  0.0


  0%|          | 5785/2000001 [56:42<346:47:55,  1.60it/s]

buffer size = 11970, epsilon = 0.09711
mean_reward :  0.0


  0%|          | 5786/2000001 [56:43<343:35:46,  1.61it/s]

buffer size = 11972, epsilon = 0.09711
mean_reward :  0.0


  0%|          | 5787/2000001 [56:43<343:08:22,  1.61it/s]

buffer size = 11974, epsilon = 0.09711
mean_reward :  0.0


  0%|          | 5788/2000001 [56:44<340:14:04,  1.63it/s]

buffer size = 11976, epsilon = 0.09711
mean_reward :  0.0


  0%|          | 5789/2000001 [56:45<340:56:00,  1.62it/s]

buffer size = 11978, epsilon = 0.09711
mean_reward :  0.0


  0%|          | 5790/2000001 [56:45<342:21:42,  1.62it/s]

buffer size = 11980, epsilon = 0.09711
mean_reward :  0.0


  0%|          | 5791/2000001 [56:46<339:24:25,  1.63it/s]

buffer size = 11982, epsilon = 0.09711
mean_reward :  0.0


  0%|          | 5792/2000001 [56:46<340:02:31,  1.63it/s]

buffer size = 11984, epsilon = 0.09710
mean_reward :  0.0


  0%|          | 5793/2000001 [56:47<341:52:12,  1.62it/s]

buffer size = 11986, epsilon = 0.09710
mean_reward :  0.0


  0%|          | 5794/2000001 [56:48<340:05:43,  1.63it/s]

buffer size = 11988, epsilon = 0.09710
mean_reward :  0.0


  0%|          | 5795/2000001 [56:48<369:52:06,  1.50it/s]

buffer size = 11990, epsilon = 0.09710
mean_reward :  0.0


  0%|          | 5796/2000001 [56:49<389:23:48,  1.42it/s]

buffer size = 11992, epsilon = 0.09710
mean_reward :  0.0


  0%|          | 5797/2000001 [56:50<399:57:48,  1.38it/s]

buffer size = 11994, epsilon = 0.09710
mean_reward :  0.0


  0%|          | 5798/2000001 [56:51<420:56:17,  1.32it/s]

buffer size = 11996, epsilon = 0.09710
mean_reward :  0.0


  0%|          | 5799/2000001 [56:52<434:37:01,  1.27it/s]

buffer size = 11998, epsilon = 0.09710
mean_reward :  0.0


  0%|          | 5800/2000001 [56:52<405:14:54,  1.37it/s]

buffer size = 12000, epsilon = 0.09710
mean_reward :  0.0


  0%|          | 5801/2000001 [56:53<385:25:34,  1.44it/s]

buffer size = 12002, epsilon = 0.09710
mean_reward :  0.0


  0%|          | 5802/2000001 [56:54<370:09:34,  1.50it/s]

buffer size = 12004, epsilon = 0.09710
mean_reward :  0.0


  0%|          | 5803/2000001 [56:54<365:01:33,  1.52it/s]

buffer size = 12006, epsilon = 0.09710
mean_reward :  0.0


  0%|          | 5804/2000001 [56:55<358:45:12,  1.54it/s]

buffer size = 12008, epsilon = 0.09710
mean_reward :  0.0


  0%|          | 5805/2000001 [56:55<356:00:25,  1.56it/s]

buffer size = 12010, epsilon = 0.09710
mean_reward :  0.0


  0%|          | 5806/2000001 [56:56<355:32:01,  1.56it/s]

buffer size = 12012, epsilon = 0.09710
mean_reward :  0.0


  0%|          | 5807/2000001 [56:57<348:50:17,  1.59it/s]

buffer size = 12014, epsilon = 0.09710
mean_reward :  0.0


  0%|          | 5808/2000001 [56:57<345:57:54,  1.60it/s]

buffer size = 12016, epsilon = 0.09710
mean_reward :  0.0


  0%|          | 5809/2000001 [56:58<345:53:38,  1.60it/s]

buffer size = 12018, epsilon = 0.09710
mean_reward :  0.0


  0%|          | 5810/2000001 [56:58<342:39:41,  1.62it/s]

buffer size = 12020, epsilon = 0.09710
mean_reward :  0.0


  0%|          | 5811/2000001 [56:59<340:32:00,  1.63it/s]

buffer size = 12022, epsilon = 0.09710
mean_reward :  0.0


  0%|          | 5812/2000001 [57:00<337:44:50,  1.64it/s]

buffer size = 12024, epsilon = 0.09709
mean_reward :  0.0


  0%|          | 5813/2000001 [57:00<339:23:30,  1.63it/s]

buffer size = 12026, epsilon = 0.09709
mean_reward :  0.0


  0%|          | 5814/2000001 [57:01<339:37:21,  1.63it/s]

buffer size = 12028, epsilon = 0.09709
mean_reward :  0.0


  0%|          | 5815/2000001 [57:02<337:16:03,  1.64it/s]

buffer size = 12030, epsilon = 0.09709
mean_reward :  0.0


  0%|          | 5816/2000001 [57:02<366:46:03,  1.51it/s]

buffer size = 12032, epsilon = 0.09709
mean_reward :  0.0


  0%|          | 5817/2000001 [57:03<401:07:37,  1.38it/s]

buffer size = 12034, epsilon = 0.09709
mean_reward :  0.0


  0%|          | 5818/2000001 [57:04<424:07:09,  1.31it/s]

buffer size = 12036, epsilon = 0.09709
mean_reward :  0.0


  0%|          | 5819/2000001 [57:05<437:17:03,  1.27it/s]

buffer size = 12038, epsilon = 0.09709
mean_reward :  0.0


  0%|          | 5820/2000001 [57:06<413:41:35,  1.34it/s]

buffer size = 12040, epsilon = 0.09709
mean_reward :  0.0


  0%|          | 5821/2000001 [57:06<396:41:08,  1.40it/s]

buffer size = 12042, epsilon = 0.09709
mean_reward :  0.0


  0%|          | 5822/2000001 [57:07<378:13:46,  1.46it/s]

buffer size = 12044, epsilon = 0.09709
mean_reward :  0.0


  0%|          | 5823/2000001 [57:07<368:40:37,  1.50it/s]

buffer size = 12046, epsilon = 0.09709
mean_reward :  0.0


  0%|          | 5824/2000001 [57:08<358:40:07,  1.54it/s]

buffer size = 12048, epsilon = 0.09709
mean_reward :  0.0


  0%|          | 5825/2000001 [57:09<354:41:01,  1.56it/s]

buffer size = 12050, epsilon = 0.09709
mean_reward :  0.0


  0%|          | 5826/2000001 [57:09<350:32:42,  1.58it/s]

buffer size = 12052, epsilon = 0.09709
mean_reward :  0.0


  0%|          | 5827/2000001 [57:10<349:01:23,  1.59it/s]

buffer size = 12054, epsilon = 0.09709
mean_reward :  0.0


  0%|          | 5828/2000001 [57:10<345:15:04,  1.60it/s]

buffer size = 12056, epsilon = 0.09709
mean_reward :  0.0


  0%|          | 5829/2000001 [57:11<341:50:15,  1.62it/s]

buffer size = 12058, epsilon = 0.09709
mean_reward :  0.0


  0%|          | 5830/2000001 [57:12<344:38:03,  1.61it/s]

buffer size = 12060, epsilon = 0.09709
mean_reward :  0.0


  0%|          | 5831/2000001 [57:12<341:21:31,  1.62it/s]

buffer size = 12062, epsilon = 0.09709
mean_reward :  0.0


  0%|          | 5832/2000001 [57:13<343:32:08,  1.61it/s]

buffer size = 12064, epsilon = 0.09708
mean_reward :  0.0


  0%|          | 5833/2000001 [57:14<343:19:23,  1.61it/s]

buffer size = 12066, epsilon = 0.09708
mean_reward :  0.0


  0%|          | 5834/2000001 [57:14<343:08:43,  1.61it/s]

buffer size = 12068, epsilon = 0.09708
mean_reward :  0.0


  0%|          | 5835/2000001 [57:15<341:15:27,  1.62it/s]

buffer size = 12070, epsilon = 0.09708
mean_reward :  0.0


  0%|          | 5836/2000001 [57:16<375:23:59,  1.48it/s]

buffer size = 12072, epsilon = 0.09708
mean_reward :  0.0


  0%|          | 5837/2000001 [57:16<392:30:05,  1.41it/s]

buffer size = 12074, epsilon = 0.09708
mean_reward :  0.0


  0%|          | 5838/2000001 [57:17<405:59:29,  1.36it/s]

buffer size = 12076, epsilon = 0.09708
mean_reward :  0.0


  0%|          | 5839/2000001 [57:18<430:28:51,  1.29it/s]

buffer size = 12078, epsilon = 0.09708
mean_reward :  0.0


  0%|          | 5840/2000001 [57:19<430:51:03,  1.29it/s]

buffer size = 12080, epsilon = 0.09708
mean_reward :  0.0


  0%|          | 5841/2000001 [57:19<407:06:43,  1.36it/s]

buffer size = 12082, epsilon = 0.09708
mean_reward :  0.0


  0%|          | 5842/2000001 [57:20<387:09:22,  1.43it/s]

buffer size = 12084, epsilon = 0.09708
mean_reward :  0.0


  0%|          | 5843/2000001 [57:21<367:44:25,  1.51it/s]

buffer size = 12086, epsilon = 0.09708
mean_reward :  0.0


  0%|          | 5844/2000001 [57:21<361:10:57,  1.53it/s]

buffer size = 12088, epsilon = 0.09708
mean_reward :  0.0


  0%|          | 5845/2000001 [57:22<353:12:15,  1.57it/s]

buffer size = 12090, epsilon = 0.09708
mean_reward :  0.0


  0%|          | 5846/2000001 [57:23<349:18:49,  1.59it/s]

buffer size = 12092, epsilon = 0.09708
mean_reward :  0.0


  0%|          | 5847/2000001 [57:23<347:30:50,  1.59it/s]

buffer size = 12094, epsilon = 0.09708
mean_reward :  0.0


  0%|          | 5848/2000001 [57:24<345:21:41,  1.60it/s]

buffer size = 12096, epsilon = 0.09708
mean_reward :  0.0


  0%|          | 5849/2000001 [57:24<348:43:37,  1.59it/s]

buffer size = 12098, epsilon = 0.09708
mean_reward :  0.0


  0%|          | 5850/2000001 [57:25<345:18:48,  1.60it/s]

buffer size = 12100, epsilon = 0.09708
mean_reward :  0.0


  0%|          | 5851/2000001 [57:26<349:00:37,  1.59it/s]

buffer size = 12102, epsilon = 0.09708
mean_reward :  0.0


  0%|          | 5852/2000001 [57:26<345:16:11,  1.60it/s]

buffer size = 12104, epsilon = 0.09707
mean_reward :  0.0


  0%|          | 5853/2000001 [57:27<341:31:12,  1.62it/s]

buffer size = 12106, epsilon = 0.09707
mean_reward :  0.0


  0%|          | 5854/2000001 [57:27<342:45:32,  1.62it/s]

buffer size = 12108, epsilon = 0.09707
mean_reward :  0.0


  0%|          | 5855/2000001 [57:28<344:46:13,  1.61it/s]

buffer size = 12110, epsilon = 0.09707
mean_reward :  0.0


  0%|          | 5856/2000001 [57:29<359:09:51,  1.54it/s]

buffer size = 12112, epsilon = 0.09707
mean_reward :  0.0


  0%|          | 5857/2000001 [57:30<383:20:37,  1.44it/s]

buffer size = 12114, epsilon = 0.09707
mean_reward :  0.0


  0%|          | 5858/2000001 [57:30<395:18:16,  1.40it/s]

buffer size = 12116, epsilon = 0.09707
mean_reward :  0.0


  0%|          | 5859/2000001 [57:31<415:27:44,  1.33it/s]

buffer size = 12118, epsilon = 0.09707
mean_reward :  0.0


  0%|          | 5860/2000001 [57:32<439:12:45,  1.26it/s]

buffer size = 12120, epsilon = 0.09707
mean_reward :  0.0


  0%|          | 5861/2000001 [57:33<411:54:26,  1.34it/s]

buffer size = 12122, epsilon = 0.09707
mean_reward :  0.0


  0%|          | 5862/2000001 [57:33<390:30:38,  1.42it/s]

buffer size = 12124, epsilon = 0.09707
mean_reward :  0.0


  0%|          | 5863/2000001 [57:34<376:42:31,  1.47it/s]

buffer size = 12126, epsilon = 0.09707
mean_reward :  0.0


  0%|          | 5864/2000001 [57:35<365:19:59,  1.52it/s]

buffer size = 12128, epsilon = 0.09707
mean_reward :  0.0


  0%|          | 5865/2000001 [57:35<359:17:13,  1.54it/s]

buffer size = 12130, epsilon = 0.09707
mean_reward :  0.0


  0%|          | 5866/2000001 [57:36<353:20:47,  1.57it/s]

buffer size = 12132, epsilon = 0.09707
mean_reward :  0.0


  0%|          | 5867/2000001 [57:36<346:17:21,  1.60it/s]

buffer size = 12134, epsilon = 0.09707
mean_reward :  0.0


  0%|          | 5868/2000001 [57:37<345:09:54,  1.60it/s]

buffer size = 12136, epsilon = 0.09707
mean_reward :  0.0


  0%|          | 5869/2000001 [57:38<343:47:35,  1.61it/s]

buffer size = 12138, epsilon = 0.09707
mean_reward :  0.0


  0%|          | 5870/2000001 [57:38<344:24:54,  1.61it/s]

buffer size = 12140, epsilon = 0.09707
mean_reward :  0.0


  0%|          | 5871/2000001 [57:39<343:22:44,  1.61it/s]

buffer size = 12142, epsilon = 0.09707
mean_reward :  0.0


  0%|          | 5872/2000001 [57:40<341:58:04,  1.62it/s]

buffer size = 12144, epsilon = 0.09706
mean_reward :  0.0


  0%|          | 5873/2000001 [57:40<340:53:09,  1.62it/s]

buffer size = 12146, epsilon = 0.09706
mean_reward :  0.0


  0%|          | 5874/2000001 [57:41<343:45:50,  1.61it/s]

buffer size = 12148, epsilon = 0.09706
mean_reward :  0.0


  0%|          | 5875/2000001 [57:41<342:45:33,  1.62it/s]

buffer size = 12150, epsilon = 0.09706
mean_reward :  0.0


  0%|          | 5876/2000001 [57:42<340:51:41,  1.63it/s]

buffer size = 12152, epsilon = 0.09706
mean_reward :  0.0


  0%|          | 5877/2000001 [57:43<374:15:13,  1.48it/s]

buffer size = 12154, epsilon = 0.09706
mean_reward :  0.0


  0%|          | 5878/2000001 [57:44<394:47:20,  1.40it/s]

buffer size = 12156, epsilon = 0.09706
mean_reward :  0.0


  0%|          | 5879/2000001 [57:44<414:16:18,  1.34it/s]

buffer size = 12158, epsilon = 0.09706
mean_reward :  0.0


  0%|          | 5880/2000001 [57:45<438:17:57,  1.26it/s]

buffer size = 12160, epsilon = 0.09706
mean_reward :  0.0


  0%|          | 5881/2000001 [57:46<422:46:57,  1.31it/s]

buffer size = 12162, epsilon = 0.09706
mean_reward :  0.0


  0%|          | 5882/2000001 [57:47<397:17:39,  1.39it/s]

buffer size = 12164, epsilon = 0.09706
mean_reward :  0.0


  0%|          | 5883/2000001 [57:47<380:08:08,  1.46it/s]

buffer size = 12166, epsilon = 0.09706
mean_reward :  0.0


  0%|          | 5884/2000001 [57:48<364:15:36,  1.52it/s]

buffer size = 12168, epsilon = 0.09706
mean_reward :  0.0


  0%|          | 5885/2000001 [57:48<354:49:42,  1.56it/s]

buffer size = 12170, epsilon = 0.09706
mean_reward :  0.0


  0%|          | 5886/2000001 [57:49<354:30:19,  1.56it/s]

buffer size = 12172, epsilon = 0.09706
mean_reward :  0.0


  0%|          | 5887/2000001 [57:50<354:41:16,  1.56it/s]

buffer size = 12174, epsilon = 0.09706
mean_reward :  0.0


  0%|          | 5888/2000001 [57:50<346:29:29,  1.60it/s]

buffer size = 12176, epsilon = 0.09706
mean_reward :  0.0


  0%|          | 5889/2000001 [57:51<347:07:37,  1.60it/s]

buffer size = 12178, epsilon = 0.09706
mean_reward :  0.0


  0%|          | 5890/2000001 [57:52<345:42:06,  1.60it/s]

buffer size = 12180, epsilon = 0.09706
mean_reward :  0.0


  0%|          | 5891/2000001 [57:52<345:17:04,  1.60it/s]

buffer size = 12182, epsilon = 0.09706
mean_reward :  0.0


  0%|          | 5892/2000001 [57:53<343:01:40,  1.61it/s]

buffer size = 12184, epsilon = 0.09705
mean_reward :  0.0


  0%|          | 5893/2000001 [57:53<341:03:06,  1.62it/s]

buffer size = 12186, epsilon = 0.09705
mean_reward :  0.0


  0%|          | 5894/2000001 [57:54<342:25:17,  1.62it/s]

buffer size = 12188, epsilon = 0.09705
mean_reward :  0.0


  0%|          | 5895/2000001 [57:55<340:52:09,  1.63it/s]

buffer size = 12190, epsilon = 0.09705
mean_reward :  0.0


  0%|          | 5896/2000001 [57:55<346:45:51,  1.60it/s]

buffer size = 12192, epsilon = 0.09705
mean_reward :  0.0


  0%|          | 5897/2000001 [57:56<369:36:51,  1.50it/s]

buffer size = 12194, epsilon = 0.09705
mean_reward :  0.0


  0%|          | 5898/2000001 [57:57<396:43:37,  1.40it/s]

buffer size = 12196, epsilon = 0.09705
mean_reward :  0.0


  0%|          | 5899/2000001 [57:58<416:49:53,  1.33it/s]

buffer size = 12198, epsilon = 0.09705
mean_reward :  0.0


  0%|          | 5900/2000001 [57:59<437:49:10,  1.27it/s]

buffer size = 12200, epsilon = 0.09705
mean_reward :  0.0


  0%|          | 5901/2000001 [57:59<425:29:06,  1.30it/s]

buffer size = 12202, epsilon = 0.09705
mean_reward :  0.0


  0%|          | 5902/2000001 [58:00<399:50:44,  1.39it/s]

buffer size = 12204, epsilon = 0.09705
mean_reward :  0.0


  0%|          | 5903/2000001 [58:01<382:00:06,  1.45it/s]

buffer size = 12206, epsilon = 0.09705
mean_reward :  0.0


  0%|          | 5904/2000001 [58:01<367:04:00,  1.51it/s]

buffer size = 12208, epsilon = 0.09705
mean_reward :  0.0


  0%|          | 5905/2000001 [58:02<358:24:21,  1.55it/s]

buffer size = 12210, epsilon = 0.09705
mean_reward :  0.0


  0%|          | 5906/2000001 [58:02<355:50:02,  1.56it/s]

buffer size = 12212, epsilon = 0.09705
mean_reward :  0.0


  0%|          | 5907/2000001 [58:03<349:16:20,  1.59it/s]

buffer size = 12214, epsilon = 0.09705
mean_reward :  0.0


  0%|          | 5908/2000001 [58:04<351:20:53,  1.58it/s]

buffer size = 12216, epsilon = 0.09705
mean_reward :  0.0


  0%|          | 5909/2000001 [58:04<346:28:24,  1.60it/s]

buffer size = 12218, epsilon = 0.09705
mean_reward :  0.0


  0%|          | 5910/2000001 [58:05<347:04:32,  1.60it/s]

buffer size = 12220, epsilon = 0.09705
mean_reward :  0.0


  0%|          | 5911/2000001 [58:05<346:48:25,  1.60it/s]

buffer size = 12222, epsilon = 0.09705
mean_reward :  0.0


  0%|          | 5912/2000001 [58:06<345:19:31,  1.60it/s]

buffer size = 12224, epsilon = 0.09704
mean_reward :  0.0


  0%|          | 5913/2000001 [58:07<343:31:42,  1.61it/s]

buffer size = 12226, epsilon = 0.09704
mean_reward :  0.0


  0%|          | 5914/2000001 [58:07<342:01:00,  1.62it/s]

buffer size = 12228, epsilon = 0.09704
mean_reward :  0.0


  0%|          | 5915/2000001 [58:08<341:43:59,  1.62it/s]

buffer size = 12230, epsilon = 0.09704
mean_reward :  0.0


  0%|          | 5916/2000001 [58:09<340:22:57,  1.63it/s]

buffer size = 12232, epsilon = 0.09704
mean_reward :  0.0


  0%|          | 5917/2000001 [58:09<372:41:42,  1.49it/s]

buffer size = 12234, epsilon = 0.09704
mean_reward :  0.0


  0%|          | 5918/2000001 [58:10<412:34:49,  1.34it/s]

buffer size = 12236, epsilon = 0.09704
mean_reward :  0.0


  0%|          | 5919/2000001 [58:11<444:09:10,  1.25it/s]

buffer size = 12238, epsilon = 0.09704
mean_reward :  0.0


  0%|          | 5920/2000001 [58:12<440:47:51,  1.26it/s]

buffer size = 12240, epsilon = 0.09704
mean_reward :  0.0


  0%|          | 5921/2000001 [58:13<410:16:32,  1.35it/s]

buffer size = 12242, epsilon = 0.09704
mean_reward :  0.0


  0%|          | 5922/2000001 [58:13<391:41:56,  1.41it/s]

buffer size = 12244, epsilon = 0.09704
mean_reward :  0.0


  0%|          | 5923/2000001 [58:14<372:25:05,  1.49it/s]

buffer size = 12246, epsilon = 0.09704
mean_reward :  0.0


  0%|          | 5924/2000001 [58:14<366:57:56,  1.51it/s]

buffer size = 12248, epsilon = 0.09704
mean_reward :  0.0


  0%|          | 5925/2000001 [58:15<361:28:52,  1.53it/s]

buffer size = 12250, epsilon = 0.09704
mean_reward :  0.0


  0%|          | 5926/2000001 [58:16<353:22:51,  1.57it/s]

buffer size = 12252, epsilon = 0.09704
mean_reward :  0.0


  0%|          | 5927/2000001 [58:16<349:03:03,  1.59it/s]

buffer size = 12254, epsilon = 0.09704
mean_reward :  0.0


  0%|          | 5928/2000001 [58:17<343:57:33,  1.61it/s]

buffer size = 12256, epsilon = 0.09704
mean_reward :  0.0


  0%|          | 5929/2000001 [58:18<345:18:43,  1.60it/s]

buffer size = 12258, epsilon = 0.09704
mean_reward :  0.0


  0%|          | 5930/2000001 [58:18<342:08:43,  1.62it/s]

buffer size = 12260, epsilon = 0.09704
mean_reward :  0.0


  0%|          | 5931/2000001 [58:19<345:16:38,  1.60it/s]

buffer size = 12262, epsilon = 0.09704
mean_reward :  0.0


  0%|          | 5932/2000001 [58:19<345:57:57,  1.60it/s]

buffer size = 12264, epsilon = 0.09703
mean_reward :  0.0


  0%|          | 5933/2000001 [58:20<344:03:40,  1.61it/s]

buffer size = 12266, epsilon = 0.09703
mean_reward :  0.0


  0%|          | 5934/2000001 [58:21<343:50:36,  1.61it/s]

buffer size = 12268, epsilon = 0.09703
mean_reward :  0.0


  0%|          | 5935/2000001 [58:21<340:28:14,  1.63it/s]

buffer size = 12270, epsilon = 0.09703
mean_reward :  0.0


  0%|          | 5936/2000001 [58:22<363:07:10,  1.53it/s]

buffer size = 12272, epsilon = 0.09703
mean_reward :  0.0


  0%|          | 5937/2000001 [58:23<385:08:57,  1.44it/s]

buffer size = 12274, epsilon = 0.09703
mean_reward :  0.0


  0%|          | 5938/2000001 [58:24<410:30:24,  1.35it/s]

buffer size = 12276, epsilon = 0.09703
mean_reward :  0.0


  0%|          | 5939/2000001 [58:25<438:56:55,  1.26it/s]

buffer size = 12278, epsilon = 0.09703
mean_reward :  0.0


  0%|          | 5940/2000001 [58:25<434:59:56,  1.27it/s]

buffer size = 12280, epsilon = 0.09703
mean_reward :  0.0


  0%|          | 5941/2000001 [58:26<407:22:30,  1.36it/s]

buffer size = 12282, epsilon = 0.09703
mean_reward :  0.0


  0%|          | 5942/2000001 [58:27<384:04:08,  1.44it/s]

buffer size = 12284, epsilon = 0.09703
mean_reward :  0.0


  0%|          | 5943/2000001 [58:27<370:58:00,  1.49it/s]

buffer size = 12286, epsilon = 0.09703
mean_reward :  0.0


  0%|          | 5944/2000001 [58:28<362:45:32,  1.53it/s]

buffer size = 12288, epsilon = 0.09703
mean_reward :  0.0


  0%|          | 5945/2000001 [58:28<357:30:00,  1.55it/s]

buffer size = 12290, epsilon = 0.09703
mean_reward :  0.0


  0%|          | 5946/2000001 [58:29<353:47:07,  1.57it/s]

buffer size = 12292, epsilon = 0.09703
mean_reward :  0.0


  0%|          | 5947/2000001 [58:30<347:37:28,  1.59it/s]

buffer size = 12294, epsilon = 0.09703
mean_reward :  0.0


  0%|          | 5948/2000001 [58:30<345:42:06,  1.60it/s]

buffer size = 12296, epsilon = 0.09703
mean_reward :  0.0


  0%|          | 5949/2000001 [58:31<344:23:15,  1.61it/s]

buffer size = 12298, epsilon = 0.09703
mean_reward :  0.0


  0%|          | 5950/2000001 [58:31<340:08:00,  1.63it/s]

buffer size = 12300, epsilon = 0.09703
mean_reward :  0.0


  0%|          | 5951/2000001 [58:32<342:06:07,  1.62it/s]

buffer size = 12302, epsilon = 0.09703
mean_reward :  0.0


  0%|          | 5952/2000001 [58:33<339:57:53,  1.63it/s]

buffer size = 12304, epsilon = 0.09702
mean_reward :  0.0


  0%|          | 5953/2000001 [58:33<340:40:11,  1.63it/s]

buffer size = 12306, epsilon = 0.09702
mean_reward :  0.0


  0%|          | 5954/2000001 [58:34<339:41:59,  1.63it/s]

buffer size = 12308, epsilon = 0.09702
mean_reward :  0.0


  0%|          | 5955/2000001 [58:35<341:43:18,  1.62it/s]

buffer size = 12310, epsilon = 0.09702
mean_reward :  0.0


  0%|          | 5956/2000001 [58:35<357:35:33,  1.55it/s]

buffer size = 12312, epsilon = 0.09702
mean_reward :  0.0


  0%|          | 5957/2000001 [58:36<382:48:29,  1.45it/s]

buffer size = 12314, epsilon = 0.09702
mean_reward :  0.0


  0%|          | 5958/2000001 [58:37<398:57:01,  1.39it/s]

buffer size = 12316, epsilon = 0.09702
mean_reward :  0.0


  0%|          | 5959/2000001 [58:38<414:35:52,  1.34it/s]

buffer size = 12318, epsilon = 0.09702
mean_reward :  0.0


  0%|          | 5960/2000001 [58:38<433:06:30,  1.28it/s]

buffer size = 12320, epsilon = 0.09702
mean_reward :  0.0


  0%|          | 5961/2000001 [58:39<417:01:43,  1.33it/s]

buffer size = 12322, epsilon = 0.09702
mean_reward :  0.0


  0%|          | 5962/2000001 [58:40<394:37:40,  1.40it/s]

buffer size = 12324, epsilon = 0.09702
mean_reward :  0.0


  0%|          | 5963/2000001 [58:40<379:00:30,  1.46it/s]

buffer size = 12326, epsilon = 0.09702
mean_reward :  0.0


  0%|          | 5964/2000001 [58:41<369:51:17,  1.50it/s]

buffer size = 12328, epsilon = 0.09702
mean_reward :  0.0


  0%|          | 5965/2000001 [58:42<364:10:32,  1.52it/s]

buffer size = 12330, epsilon = 0.09702
mean_reward :  0.0


  0%|          | 5966/2000001 [58:42<356:22:56,  1.55it/s]

buffer size = 12332, epsilon = 0.09702
mean_reward :  0.0


  0%|          | 5967/2000001 [58:43<352:51:18,  1.57it/s]

buffer size = 12334, epsilon = 0.09702
mean_reward :  0.0


  0%|          | 5968/2000001 [58:44<349:58:22,  1.58it/s]

buffer size = 12336, epsilon = 0.09702
mean_reward :  0.0


  0%|          | 5969/2000001 [58:44<347:26:12,  1.59it/s]

buffer size = 12338, epsilon = 0.09702
mean_reward :  0.0


  0%|          | 5970/2000001 [58:45<348:48:31,  1.59it/s]

buffer size = 12340, epsilon = 0.09702
mean_reward :  0.0


  0%|          | 5971/2000001 [58:45<346:07:20,  1.60it/s]

buffer size = 12342, epsilon = 0.09702
mean_reward :  0.0


  0%|          | 5972/2000001 [58:46<347:09:59,  1.60it/s]

buffer size = 12344, epsilon = 0.09701
mean_reward :  0.0


  0%|          | 5973/2000001 [58:47<344:23:43,  1.61it/s]

buffer size = 12346, epsilon = 0.09701
mean_reward :  0.0


  0%|          | 5974/2000001 [58:47<345:11:31,  1.60it/s]

buffer size = 12348, epsilon = 0.09701
mean_reward :  0.0


  0%|          | 5975/2000001 [58:48<346:36:54,  1.60it/s]

buffer size = 12350, epsilon = 0.09701
mean_reward :  0.0


  0%|          | 5976/2000001 [58:49<358:13:50,  1.55it/s]

buffer size = 12352, epsilon = 0.09701
mean_reward :  0.0


  0%|          | 5977/2000001 [58:49<385:46:56,  1.44it/s]

buffer size = 12354, epsilon = 0.09701
mean_reward :  0.0


  0%|          | 5978/2000001 [58:50<401:04:01,  1.38it/s]

buffer size = 12356, epsilon = 0.09701
mean_reward :  0.0


  0%|          | 5979/2000001 [58:51<418:17:05,  1.32it/s]

buffer size = 12358, epsilon = 0.09701
mean_reward :  0.0


  0%|          | 5980/2000001 [58:52<438:16:49,  1.26it/s]

buffer size = 12360, epsilon = 0.09701
mean_reward :  0.0


  0%|          | 5981/2000001 [58:53<410:19:50,  1.35it/s]

buffer size = 12362, epsilon = 0.09701
mean_reward :  0.0


  0%|          | 5982/2000001 [58:53<390:55:38,  1.42it/s]

buffer size = 12364, epsilon = 0.09701
mean_reward :  0.0


  0%|          | 5983/2000001 [58:54<376:59:32,  1.47it/s]

buffer size = 12366, epsilon = 0.09701
mean_reward :  0.0


  0%|          | 5984/2000001 [58:54<368:59:06,  1.50it/s]

buffer size = 12368, epsilon = 0.09701
mean_reward :  0.0


  0%|          | 5985/2000001 [58:55<360:35:03,  1.54it/s]

buffer size = 12370, epsilon = 0.09701
mean_reward :  0.0


  0%|          | 5986/2000001 [58:56<360:27:14,  1.54it/s]

buffer size = 12372, epsilon = 0.09701
mean_reward :  0.0


  0%|          | 5987/2000001 [58:56<354:25:53,  1.56it/s]

buffer size = 12374, epsilon = 0.09701
mean_reward :  0.0


  0%|          | 5988/2000001 [58:57<347:56:35,  1.59it/s]

buffer size = 12376, epsilon = 0.09701
mean_reward :  0.0


  0%|          | 5989/2000001 [58:57<346:52:33,  1.60it/s]

buffer size = 12378, epsilon = 0.09701
mean_reward :  0.0


  0%|          | 5990/2000001 [58:58<343:29:40,  1.61it/s]

buffer size = 12380, epsilon = 0.09701
mean_reward :  0.0


  0%|          | 5991/2000001 [58:59<348:17:57,  1.59it/s]

buffer size = 12382, epsilon = 0.09701
mean_reward :  0.0


  0%|          | 5992/2000001 [58:59<344:30:39,  1.61it/s]

buffer size = 12384, epsilon = 0.09700
mean_reward :  0.0


  0%|          | 5993/2000001 [59:00<344:19:41,  1.61it/s]

buffer size = 12386, epsilon = 0.09700
mean_reward :  0.0


  0%|          | 5994/2000001 [59:01<345:41:41,  1.60it/s]

buffer size = 12388, epsilon = 0.09700
mean_reward :  0.0


  0%|          | 5995/2000001 [59:01<342:02:28,  1.62it/s]

buffer size = 12390, epsilon = 0.09700
mean_reward :  0.0


  0%|          | 5996/2000001 [59:02<342:18:02,  1.62it/s]

buffer size = 12392, epsilon = 0.09700
mean_reward :  0.0


  0%|          | 5997/2000001 [59:03<376:53:58,  1.47it/s]

buffer size = 12394, epsilon = 0.09700
mean_reward :  0.0


  0%|          | 5998/2000001 [59:03<390:24:39,  1.42it/s]

buffer size = 12396, epsilon = 0.09700
mean_reward :  0.0


  0%|          | 5999/2000001 [59:04<401:49:59,  1.38it/s]

buffer size = 12398, epsilon = 0.09700
mean_reward :  0.0


  0%|          | 6000/2000001 [59:05<419:13:54,  1.32it/s]

buffer size = 12400, epsilon = 0.09700
mean_reward :  0.0


  0%|          | 6001/2000001 [59:06<438:13:58,  1.26it/s]

buffer size = 12402, epsilon = 0.09700
mean_reward :  0.0


  0%|          | 6002/2000001 [59:07<412:40:51,  1.34it/s]

buffer size = 12404, epsilon = 0.09700
mean_reward :  0.0


  0%|          | 6003/2000001 [59:07<391:07:13,  1.42it/s]

buffer size = 12406, epsilon = 0.09700
mean_reward :  0.0


  0%|          | 6004/2000001 [59:08<374:13:17,  1.48it/s]

buffer size = 12408, epsilon = 0.09700
mean_reward :  0.0


  0%|          | 6005/2000001 [59:08<365:40:59,  1.51it/s]

buffer size = 12410, epsilon = 0.09700
mean_reward :  0.0


  0%|          | 6006/2000001 [59:09<356:31:06,  1.55it/s]

buffer size = 12412, epsilon = 0.09700
mean_reward :  0.0


  0%|          | 6007/2000001 [59:10<353:43:43,  1.57it/s]

buffer size = 12414, epsilon = 0.09700
mean_reward :  0.0


  0%|          | 6008/2000001 [59:10<351:52:57,  1.57it/s]

buffer size = 12416, epsilon = 0.09700
mean_reward :  0.0


  0%|          | 6009/2000001 [59:11<348:33:22,  1.59it/s]

buffer size = 12418, epsilon = 0.09700
mean_reward :  0.0


  0%|          | 6010/2000001 [59:11<345:35:17,  1.60it/s]

buffer size = 12420, epsilon = 0.09700
mean_reward :  0.0


  0%|          | 6011/2000001 [59:12<343:10:03,  1.61it/s]

buffer size = 12422, epsilon = 0.09700
mean_reward :  0.0


  0%|          | 6012/2000001 [59:13<344:18:42,  1.61it/s]

buffer size = 12424, epsilon = 0.09699
mean_reward :  0.0


  0%|          | 6013/2000001 [59:13<345:23:34,  1.60it/s]

buffer size = 12426, epsilon = 0.09699
mean_reward :  0.0


  0%|          | 6014/2000001 [59:14<345:18:34,  1.60it/s]

buffer size = 12428, epsilon = 0.09699
mean_reward :  0.0


  0%|          | 6015/2000001 [59:15<346:21:51,  1.60it/s]

buffer size = 12430, epsilon = 0.09699
mean_reward :  0.0


  0%|          | 6016/2000001 [59:15<342:56:19,  1.62it/s]

buffer size = 12432, epsilon = 0.09699
mean_reward :  0.0


  0%|          | 6017/2000001 [59:16<347:35:40,  1.59it/s]

buffer size = 12434, epsilon = 0.09699
mean_reward :  0.0


  0%|          | 6018/2000001 [59:17<384:46:58,  1.44it/s]

buffer size = 12436, epsilon = 0.09699
mean_reward :  0.0


  0%|          | 6019/2000001 [59:17<405:17:08,  1.37it/s]

buffer size = 12438, epsilon = 0.09699
mean_reward :  0.0


  0%|          | 6020/2000001 [59:18<447:42:58,  1.24it/s]

buffer size = 12440, epsilon = 0.09699
mean_reward :  0.0


  0%|          | 6021/2000001 [59:19<444:37:59,  1.25it/s]

buffer size = 12442, epsilon = 0.09699
mean_reward :  0.0


  0%|          | 6022/2000001 [59:20<415:10:23,  1.33it/s]

buffer size = 12444, epsilon = 0.09699
mean_reward :  0.0


  0%|          | 6023/2000001 [59:20<388:58:42,  1.42it/s]

buffer size = 12446, epsilon = 0.09699
mean_reward :  0.0


  0%|          | 6024/2000001 [59:21<376:26:14,  1.47it/s]

buffer size = 12448, epsilon = 0.09699
mean_reward :  0.0


  0%|          | 6025/2000001 [59:22<367:32:23,  1.51it/s]

buffer size = 12450, epsilon = 0.09699
mean_reward :  0.0


  0%|          | 6026/2000001 [59:22<357:46:43,  1.55it/s]

buffer size = 12452, epsilon = 0.09699
mean_reward :  0.0


  0%|          | 6027/2000001 [59:23<355:03:54,  1.56it/s]

buffer size = 12454, epsilon = 0.09699
mean_reward :  0.0


  0%|          | 6028/2000001 [59:24<351:06:48,  1.58it/s]

buffer size = 12456, epsilon = 0.09699
mean_reward :  0.0


  0%|          | 6029/2000001 [59:24<352:55:42,  1.57it/s]

buffer size = 12458, epsilon = 0.09699
mean_reward :  0.0


  0%|          | 6030/2000001 [59:25<347:56:50,  1.59it/s]

buffer size = 12460, epsilon = 0.09699
mean_reward :  0.0


  0%|          | 6031/2000001 [59:25<346:45:39,  1.60it/s]

buffer size = 12462, epsilon = 0.09699
mean_reward :  0.0


  0%|          | 6032/2000001 [59:26<350:33:17,  1.58it/s]

buffer size = 12464, epsilon = 0.09698
mean_reward :  0.0


  0%|          | 6033/2000001 [59:27<361:32:39,  1.53it/s]

buffer size = 12466, epsilon = 0.09698
mean_reward :  0.0


  0%|          | 6034/2000001 [59:28<381:25:00,  1.45it/s]

buffer size = 12468, epsilon = 0.09698
mean_reward :  0.0


  0%|          | 6035/2000001 [59:28<373:35:48,  1.48it/s]

buffer size = 12470, epsilon = 0.09698
mean_reward :  0.0


  0%|          | 6036/2000001 [59:29<365:41:12,  1.51it/s]

buffer size = 12472, epsilon = 0.09698
mean_reward :  0.0


  0%|          | 6037/2000001 [59:30<391:28:36,  1.41it/s]

buffer size = 12474, epsilon = 0.09698
mean_reward :  0.0


  0%|          | 6038/2000001 [59:30<402:33:56,  1.38it/s]

buffer size = 12476, epsilon = 0.09698
mean_reward :  0.0


  0%|          | 6039/2000001 [59:31<409:51:56,  1.35it/s]

buffer size = 12478, epsilon = 0.09698
mean_reward :  0.0


  0%|          | 6040/2000001 [59:32<429:06:55,  1.29it/s]

buffer size = 12480, epsilon = 0.09698
mean_reward :  0.0


  0%|          | 6041/2000001 [59:33<437:40:00,  1.27it/s]

buffer size = 12482, epsilon = 0.09698
mean_reward :  0.0


  0%|          | 6042/2000001 [59:34<410:34:23,  1.35it/s]

buffer size = 12484, epsilon = 0.09698
mean_reward :  0.0


  0%|          | 6043/2000001 [59:34<389:30:53,  1.42it/s]

buffer size = 12486, epsilon = 0.09698
mean_reward :  0.0


  0%|          | 6044/2000001 [59:35<381:26:53,  1.45it/s]

buffer size = 12488, epsilon = 0.09698
mean_reward :  0.0


  0%|          | 6045/2000001 [59:35<370:27:59,  1.50it/s]

buffer size = 12490, epsilon = 0.09698
mean_reward :  0.0


  0%|          | 6046/2000001 [59:36<362:54:14,  1.53it/s]

buffer size = 12492, epsilon = 0.09698
mean_reward :  0.0


  0%|          | 6047/2000001 [59:37<360:36:39,  1.54it/s]

buffer size = 12494, epsilon = 0.09698
mean_reward :  0.0


  0%|          | 6048/2000001 [59:37<354:46:41,  1.56it/s]

buffer size = 12496, epsilon = 0.09698
mean_reward :  0.0


  0%|          | 6049/2000001 [59:38<353:32:00,  1.57it/s]

buffer size = 12498, epsilon = 0.09698
mean_reward :  0.0


  0%|          | 6050/2000001 [59:39<349:33:20,  1.58it/s]

buffer size = 12500, epsilon = 0.09698
mean_reward :  0.0


  0%|          | 6051/2000001 [59:39<351:06:13,  1.58it/s]

buffer size = 12502, epsilon = 0.09698
mean_reward :  0.0


  0%|          | 6052/2000001 [59:40<349:01:40,  1.59it/s]

buffer size = 12504, epsilon = 0.09697
mean_reward :  0.0


  0%|          | 6053/2000001 [59:40<346:53:24,  1.60it/s]

buffer size = 12506, epsilon = 0.09697
mean_reward :  0.0


  0%|          | 6054/2000001 [59:41<346:36:42,  1.60it/s]

buffer size = 12508, epsilon = 0.09697
mean_reward :  0.0


  0%|          | 6055/2000001 [59:42<345:11:57,  1.60it/s]

buffer size = 12510, epsilon = 0.09697
mean_reward :  0.0


  0%|          | 6056/2000001 [59:42<348:45:00,  1.59it/s]

buffer size = 12512, epsilon = 0.09697
mean_reward :  0.0


  0%|          | 6057/2000001 [59:43<368:21:35,  1.50it/s]

buffer size = 12514, epsilon = 0.09697
mean_reward :  0.0


  0%|          | 6058/2000001 [59:44<398:05:10,  1.39it/s]

buffer size = 12516, epsilon = 0.09697
mean_reward :  0.0


  0%|          | 6059/2000001 [59:45<416:05:11,  1.33it/s]

buffer size = 12518, epsilon = 0.09697
mean_reward :  0.0


  0%|          | 6060/2000001 [59:46<450:19:51,  1.23it/s]

buffer size = 12520, epsilon = 0.09697
mean_reward :  0.0


  0%|          | 6061/2000001 [59:46<423:36:20,  1.31it/s]

buffer size = 12522, epsilon = 0.09697
mean_reward :  0.0


  0%|          | 6062/2000001 [59:47<400:31:10,  1.38it/s]

buffer size = 12524, epsilon = 0.09697
mean_reward :  0.0


  0%|          | 6063/2000001 [59:48<383:39:48,  1.44it/s]

buffer size = 12526, epsilon = 0.09697
mean_reward :  0.0


  0%|          | 6064/2000001 [59:48<373:22:58,  1.48it/s]

buffer size = 12528, epsilon = 0.09697
mean_reward :  0.0


  0%|          | 6065/2000001 [59:49<362:35:21,  1.53it/s]

buffer size = 12530, epsilon = 0.09697
mean_reward :  0.0


  0%|          | 6066/2000001 [59:49<357:01:12,  1.55it/s]

buffer size = 12532, epsilon = 0.09697
mean_reward :  0.0


  0%|          | 6067/2000001 [59:50<352:35:40,  1.57it/s]

buffer size = 12534, epsilon = 0.09697
mean_reward :  0.0


  0%|          | 6068/2000001 [59:51<349:43:20,  1.58it/s]

buffer size = 12536, epsilon = 0.09697
mean_reward :  0.0


  0%|          | 6069/2000001 [59:51<345:33:29,  1.60it/s]

buffer size = 12538, epsilon = 0.09697
mean_reward :  0.0


  0%|          | 6070/2000001 [59:52<341:43:22,  1.62it/s]

buffer size = 12540, epsilon = 0.09697
mean_reward :  0.0


  0%|          | 6071/2000001 [59:52<341:40:46,  1.62it/s]

buffer size = 12542, epsilon = 0.09697
mean_reward :  0.0


  0%|          | 6072/2000001 [59:53<340:56:55,  1.62it/s]

buffer size = 12544, epsilon = 0.09696
mean_reward :  0.0


  0%|          | 6073/2000001 [59:54<338:17:03,  1.64it/s]

buffer size = 12546, epsilon = 0.09696
mean_reward :  0.0


  0%|          | 6074/2000001 [59:54<335:51:12,  1.65it/s]

buffer size = 12548, epsilon = 0.09696
mean_reward :  0.0


  0%|          | 6075/2000001 [59:55<336:27:27,  1.65it/s]

buffer size = 12550, epsilon = 0.09696
mean_reward :  0.0


  0%|          | 6076/2000001 [59:56<338:48:56,  1.63it/s]

buffer size = 12552, epsilon = 0.09696
mean_reward :  0.0


  0%|          | 6077/2000001 [59:56<372:12:32,  1.49it/s]

buffer size = 12554, epsilon = 0.09696
mean_reward :  0.0


  0%|          | 6078/2000001 [59:57<405:24:59,  1.37it/s]

buffer size = 12556, epsilon = 0.09696
mean_reward :  0.0


  0%|          | 6079/2000001 [59:58<422:31:59,  1.31it/s]

buffer size = 12558, epsilon = 0.09696
mean_reward :  0.0


  0%|          | 6080/2000001 [59:59<433:12:05,  1.28it/s]

buffer size = 12560, epsilon = 0.09696
mean_reward :  0.0


  0%|          | 6081/2000001 [1:00:00<413:11:00,  1.34it/s]

buffer size = 12562, epsilon = 0.09696
mean_reward :  0.0


  0%|          | 6082/2000001 [1:00:00<392:00:56,  1.41it/s]

buffer size = 12564, epsilon = 0.09696
mean_reward :  0.0


  0%|          | 6083/2000001 [1:00:01<375:08:47,  1.48it/s]

buffer size = 12566, epsilon = 0.09696
mean_reward :  0.0


  0%|          | 6084/2000001 [1:00:01<368:19:02,  1.50it/s]

buffer size = 12568, epsilon = 0.09696
mean_reward :  0.0


  0%|          | 6085/2000001 [1:00:02<363:44:47,  1.52it/s]

buffer size = 12570, epsilon = 0.09696
mean_reward :  0.0


  0%|          | 6086/2000001 [1:00:03<359:44:07,  1.54it/s]

buffer size = 12572, epsilon = 0.09696
mean_reward :  0.0


  0%|          | 6087/2000001 [1:00:03<354:41:11,  1.56it/s]

buffer size = 12574, epsilon = 0.09696
mean_reward :  0.0


  0%|          | 6088/2000001 [1:00:04<368:13:43,  1.50it/s]

buffer size = 12576, epsilon = 0.09696
mean_reward :  0.0


  0%|          | 6089/2000001 [1:00:05<360:22:54,  1.54it/s]

buffer size = 12578, epsilon = 0.09696
mean_reward :  0.0


  0%|          | 6090/2000001 [1:00:05<352:41:31,  1.57it/s]

buffer size = 12580, epsilon = 0.09696
mean_reward :  0.0


  0%|          | 6091/2000001 [1:00:06<346:44:07,  1.60it/s]

buffer size = 12582, epsilon = 0.09695
mean_reward :  0.0


  0%|          | 6092/2000001 [1:00:06<344:23:27,  1.61it/s]

buffer size = 12584, epsilon = 0.09695
mean_reward :  0.0


  0%|          | 6093/2000001 [1:00:07<338:37:41,  1.64it/s]

buffer size = 12586, epsilon = 0.09695
mean_reward :  0.0


  0%|          | 6094/2000001 [1:00:08<336:43:22,  1.64it/s]

buffer size = 12588, epsilon = 0.09695
mean_reward :  0.0


  0%|          | 6095/2000001 [1:00:08<340:28:33,  1.63it/s]

buffer size = 12590, epsilon = 0.09695
mean_reward :  0.0


  0%|          | 6096/2000001 [1:00:09<336:54:49,  1.64it/s]

buffer size = 12592, epsilon = 0.09695
mean_reward :  0.0


  0%|          | 6097/2000001 [1:00:10<368:56:53,  1.50it/s]

buffer size = 12594, epsilon = 0.09695
mean_reward :  0.0


  0%|          | 6098/2000001 [1:00:10<391:47:10,  1.41it/s]

buffer size = 12596, epsilon = 0.09695
mean_reward :  0.0


  0%|          | 6099/2000001 [1:00:11<424:47:55,  1.30it/s]

buffer size = 12598, epsilon = 0.09695
mean_reward :  0.0


  0%|          | 6100/2000001 [1:00:12<441:16:42,  1.26it/s]

buffer size = 12600, epsilon = 0.09695
mean_reward :  0.0


  0%|          | 6101/2000001 [1:00:13<411:27:42,  1.35it/s]

buffer size = 12602, epsilon = 0.09695
mean_reward :  0.0


  0%|          | 6102/2000001 [1:00:13<388:02:11,  1.43it/s]

buffer size = 12604, epsilon = 0.09695
mean_reward :  0.0


  0%|          | 6103/2000001 [1:00:14<372:56:16,  1.49it/s]

buffer size = 12606, epsilon = 0.09695
mean_reward :  0.0


  0%|          | 6104/2000001 [1:00:15<360:46:49,  1.54it/s]

buffer size = 12608, epsilon = 0.09695
mean_reward :  0.0


  0%|          | 6105/2000001 [1:00:15<355:15:20,  1.56it/s]

buffer size = 12610, epsilon = 0.09695
mean_reward :  0.0


  0%|          | 6106/2000001 [1:00:16<352:38:36,  1.57it/s]

buffer size = 12612, epsilon = 0.09695
mean_reward :  0.0


  0%|          | 6107/2000001 [1:00:17<348:34:12,  1.59it/s]

buffer size = 12614, epsilon = 0.09695
mean_reward :  0.0


  0%|          | 6108/2000001 [1:00:17<349:06:43,  1.59it/s]

buffer size = 12616, epsilon = 0.09695
mean_reward :  0.0


  0%|          | 6109/2000001 [1:00:18<347:48:17,  1.59it/s]

buffer size = 12618, epsilon = 0.09695
mean_reward :  0.0


  0%|          | 6110/2000001 [1:00:18<342:27:45,  1.62it/s]

buffer size = 12620, epsilon = 0.09695
mean_reward :  0.0


  0%|          | 6111/2000001 [1:00:19<344:38:30,  1.61it/s]

buffer size = 12622, epsilon = 0.09695
mean_reward :  0.0


  0%|          | 6112/2000001 [1:00:20<345:57:16,  1.60it/s]

buffer size = 12624, epsilon = 0.09694
mean_reward :  0.0


  0%|          | 6113/2000001 [1:00:20<343:22:33,  1.61it/s]

buffer size = 12626, epsilon = 0.09694
mean_reward :  0.0


  0%|          | 6114/2000001 [1:00:21<339:28:24,  1.63it/s]

buffer size = 12628, epsilon = 0.09694
mean_reward :  0.0


  0%|          | 6115/2000001 [1:00:21<336:45:39,  1.64it/s]

buffer size = 12630, epsilon = 0.09694
mean_reward :  0.0


  0%|          | 6116/2000001 [1:00:22<338:45:31,  1.63it/s]

buffer size = 12632, epsilon = 0.09694
mean_reward :  0.0


  0%|          | 6117/2000001 [1:00:23<368:13:23,  1.50it/s]

buffer size = 12634, epsilon = 0.09694
mean_reward :  0.0


  0%|          | 6118/2000001 [1:00:24<397:05:50,  1.39it/s]

buffer size = 12636, epsilon = 0.09694
mean_reward :  0.0


  0%|          | 6119/2000001 [1:00:24<411:17:47,  1.35it/s]

buffer size = 12638, epsilon = 0.09694
mean_reward :  0.0


  0%|          | 6120/2000001 [1:00:25<452:34:32,  1.22it/s]

buffer size = 12640, epsilon = 0.09694
mean_reward :  0.0


  0%|          | 6121/2000001 [1:00:26<428:45:58,  1.29it/s]

buffer size = 12642, epsilon = 0.09694
mean_reward :  0.0


  0%|          | 6122/2000001 [1:00:27<404:26:22,  1.37it/s]

buffer size = 12644, epsilon = 0.09694
mean_reward :  0.0


  0%|          | 6123/2000001 [1:00:27<387:41:31,  1.43it/s]

buffer size = 12646, epsilon = 0.09694
mean_reward :  0.0


  0%|          | 6124/2000001 [1:00:28<370:21:57,  1.50it/s]

buffer size = 12648, epsilon = 0.09694
mean_reward :  0.0


  0%|          | 6125/2000001 [1:00:29<361:27:59,  1.53it/s]

buffer size = 12650, epsilon = 0.09694
mean_reward :  0.0


  0%|          | 6126/2000001 [1:00:29<353:11:29,  1.57it/s]

buffer size = 12652, epsilon = 0.09694
mean_reward :  0.0


  0%|          | 6127/2000001 [1:00:30<344:44:01,  1.61it/s]

buffer size = 12654, epsilon = 0.09694
mean_reward :  0.0


  0%|          | 6128/2000001 [1:00:30<343:30:15,  1.61it/s]

buffer size = 12656, epsilon = 0.09694
mean_reward :  0.0


  0%|          | 6129/2000001 [1:00:31<339:43:33,  1.63it/s]

buffer size = 12658, epsilon = 0.09694
mean_reward :  0.0


  0%|          | 6130/2000001 [1:00:32<342:11:46,  1.62it/s]

buffer size = 12660, epsilon = 0.09694
mean_reward :  0.0


  0%|          | 6131/2000001 [1:00:32<341:56:01,  1.62it/s]

buffer size = 12662, epsilon = 0.09694
mean_reward :  0.0


  0%|          | 6132/2000001 [1:00:33<346:07:50,  1.60it/s]

buffer size = 12664, epsilon = 0.09693
mean_reward :  0.0


  0%|          | 6133/2000001 [1:00:34<344:25:16,  1.61it/s]

buffer size = 12666, epsilon = 0.09693
mean_reward :  0.0


  0%|          | 6134/2000001 [1:00:34<340:03:42,  1.63it/s]

buffer size = 12668, epsilon = 0.09693
mean_reward :  0.0


  0%|          | 6135/2000001 [1:00:35<344:34:31,  1.61it/s]

buffer size = 12670, epsilon = 0.09693
mean_reward :  0.0


  0%|          | 6136/2000001 [1:00:35<341:06:05,  1.62it/s]

buffer size = 12672, epsilon = 0.09693
mean_reward :  0.0


  0%|          | 6137/2000001 [1:00:36<368:26:46,  1.50it/s]

buffer size = 12674, epsilon = 0.09693
mean_reward :  0.0


  0%|          | 6138/2000001 [1:00:37<394:29:53,  1.40it/s]

buffer size = 12676, epsilon = 0.09693
mean_reward :  0.0


  0%|          | 6139/2000001 [1:00:38<409:45:43,  1.35it/s]

buffer size = 12678, epsilon = 0.09693
mean_reward :  0.0


  0%|          | 6140/2000001 [1:00:39<431:31:53,  1.28it/s]

buffer size = 12680, epsilon = 0.09693
mean_reward :  0.0


  0%|          | 6141/2000001 [1:00:39<413:58:35,  1.34it/s]

buffer size = 12682, epsilon = 0.09693
mean_reward :  0.0


  0%|          | 6142/2000001 [1:00:40<387:12:27,  1.43it/s]

buffer size = 12684, epsilon = 0.09693
mean_reward :  0.0


  0%|          | 6143/2000001 [1:00:40<369:24:05,  1.50it/s]

buffer size = 12686, epsilon = 0.09693
mean_reward :  0.0


  0%|          | 6144/2000001 [1:00:41<356:58:38,  1.55it/s]

buffer size = 12688, epsilon = 0.09693
mean_reward :  0.0


  0%|          | 6145/2000001 [1:00:42<352:42:07,  1.57it/s]

buffer size = 12690, epsilon = 0.09693
mean_reward :  0.0


  0%|          | 6146/2000001 [1:00:42<352:28:57,  1.57it/s]

buffer size = 12692, epsilon = 0.09693
mean_reward :  0.0


  0%|          | 6147/2000001 [1:00:43<351:05:50,  1.58it/s]

buffer size = 12694, epsilon = 0.09693
mean_reward :  0.0


  0%|          | 6148/2000001 [1:00:44<350:19:09,  1.58it/s]

buffer size = 12696, epsilon = 0.09693
mean_reward :  0.0


  0%|          | 6149/2000001 [1:00:44<348:46:28,  1.59it/s]

buffer size = 12698, epsilon = 0.09693
mean_reward :  0.0


  0%|          | 6150/2000001 [1:00:45<344:45:05,  1.61it/s]

buffer size = 12700, epsilon = 0.09693
mean_reward :  0.0


  0%|          | 6151/2000001 [1:00:45<344:19:00,  1.61it/s]

buffer size = 12702, epsilon = 0.09693
mean_reward :  0.0


  0%|          | 6152/2000001 [1:00:46<343:25:41,  1.61it/s]

buffer size = 12704, epsilon = 0.09692
mean_reward :  0.0


  0%|          | 6153/2000001 [1:00:47<342:07:12,  1.62it/s]

buffer size = 12706, epsilon = 0.09692
mean_reward :  0.0


  0%|          | 6154/2000001 [1:00:47<345:00:26,  1.61it/s]

buffer size = 12708, epsilon = 0.09692
mean_reward :  0.0


  0%|          | 6155/2000001 [1:00:48<344:24:23,  1.61it/s]

buffer size = 12710, epsilon = 0.09692
mean_reward :  0.0


  0%|          | 6156/2000001 [1:00:49<346:26:16,  1.60it/s]

buffer size = 12712, epsilon = 0.09692
mean_reward :  0.0


  0%|          | 6157/2000001 [1:00:49<373:55:45,  1.48it/s]

buffer size = 12714, epsilon = 0.09692
mean_reward :  0.0


  0%|          | 6158/2000001 [1:00:50<414:46:35,  1.34it/s]

buffer size = 12716, epsilon = 0.09692
mean_reward :  0.0


  0%|          | 6159/2000001 [1:00:51<435:18:11,  1.27it/s]

buffer size = 12718, epsilon = 0.09692
mean_reward :  0.0


  0%|          | 6160/2000001 [1:00:52<447:50:20,  1.24it/s]

buffer size = 12720, epsilon = 0.09692
mean_reward :  0.0


  0%|          | 6161/2000001 [1:00:53<417:27:30,  1.33it/s]

buffer size = 12722, epsilon = 0.09692
mean_reward :  0.0


  0%|          | 6162/2000001 [1:00:53<389:12:07,  1.42it/s]

buffer size = 12724, epsilon = 0.09692
mean_reward :  0.0


  0%|          | 6163/2000001 [1:00:54<375:40:03,  1.47it/s]

buffer size = 12726, epsilon = 0.09692
mean_reward :  0.0


  0%|          | 6164/2000001 [1:00:54<370:09:13,  1.50it/s]

buffer size = 12728, epsilon = 0.09692
mean_reward :  0.0


  0%|          | 6165/2000001 [1:00:55<362:07:57,  1.53it/s]

buffer size = 12730, epsilon = 0.09692
mean_reward :  0.0


  0%|          | 6166/2000001 [1:00:56<358:44:48,  1.54it/s]

buffer size = 12732, epsilon = 0.09692
mean_reward :  0.0


  0%|          | 6167/2000001 [1:00:56<358:16:27,  1.55it/s]

buffer size = 12734, epsilon = 0.09692
mean_reward :  0.0


  0%|          | 6168/2000001 [1:00:57<356:05:46,  1.56it/s]

buffer size = 12736, epsilon = 0.09692
mean_reward :  0.0


  0%|          | 6169/2000001 [1:00:58<349:25:09,  1.59it/s]

buffer size = 12738, epsilon = 0.09692
mean_reward :  0.0


  0%|          | 6170/2000001 [1:00:58<350:48:31,  1.58it/s]

buffer size = 12740, epsilon = 0.09692
mean_reward :  0.0


  0%|          | 6171/2000001 [1:00:59<354:50:41,  1.56it/s]

buffer size = 12742, epsilon = 0.09692
mean_reward :  0.0


  0%|          | 6172/2000001 [1:01:00<347:16:39,  1.59it/s]

buffer size = 12744, epsilon = 0.09691
mean_reward :  0.0


  0%|          | 6173/2000001 [1:01:00<350:43:18,  1.58it/s]

buffer size = 12746, epsilon = 0.09691
mean_reward :  0.0


  0%|          | 6174/2000001 [1:01:01<348:57:49,  1.59it/s]

buffer size = 12748, epsilon = 0.09691
mean_reward :  0.0


  0%|          | 6175/2000001 [1:01:01<347:24:21,  1.59it/s]

buffer size = 12750, epsilon = 0.09691
mean_reward :  0.0


  0%|          | 6176/2000001 [1:01:02<352:22:35,  1.57it/s]

buffer size = 12752, epsilon = 0.09691
mean_reward :  0.0


  0%|          | 6177/2000001 [1:01:03<397:51:03,  1.39it/s]

buffer size = 12754, epsilon = 0.09691
mean_reward :  0.0


  0%|          | 6178/2000001 [1:01:04<407:35:51,  1.36it/s]

buffer size = 12756, epsilon = 0.09691
mean_reward :  0.0


  0%|          | 6179/2000001 [1:01:05<425:49:22,  1.30it/s]

buffer size = 12758, epsilon = 0.09691
mean_reward :  0.0


  0%|          | 6180/2000001 [1:01:05<444:15:35,  1.25it/s]

buffer size = 12760, epsilon = 0.09691
mean_reward :  0.0


  0%|          | 6181/2000001 [1:01:06<416:20:36,  1.33it/s]

buffer size = 12762, epsilon = 0.09691
mean_reward :  0.0


  0%|          | 6182/2000001 [1:01:07<398:55:09,  1.39it/s]

buffer size = 12764, epsilon = 0.09691
mean_reward :  0.0


  0%|          | 6183/2000001 [1:01:07<383:39:30,  1.44it/s]

buffer size = 12766, epsilon = 0.09691
mean_reward :  0.0


  0%|          | 6184/2000001 [1:01:08<374:01:36,  1.48it/s]

buffer size = 12768, epsilon = 0.09691
mean_reward :  0.0


  0%|          | 6185/2000001 [1:01:09<366:14:59,  1.51it/s]

buffer size = 12770, epsilon = 0.09691
mean_reward :  0.0


  0%|          | 6186/2000001 [1:01:09<366:25:39,  1.51it/s]

buffer size = 12772, epsilon = 0.09691
mean_reward :  0.0


  0%|          | 6187/2000001 [1:01:10<360:19:39,  1.54it/s]

buffer size = 12774, epsilon = 0.09691
mean_reward :  0.0


  0%|          | 6188/2000001 [1:01:11<358:10:05,  1.55it/s]

buffer size = 12776, epsilon = 0.09691
mean_reward :  0.0


  0%|          | 6189/2000001 [1:01:11<355:26:51,  1.56it/s]

buffer size = 12778, epsilon = 0.09691
mean_reward :  0.0


  0%|          | 6190/2000001 [1:01:12<354:08:17,  1.56it/s]

buffer size = 12780, epsilon = 0.09691
mean_reward :  0.0


  0%|          | 6191/2000001 [1:01:12<352:43:28,  1.57it/s]

buffer size = 12782, epsilon = 0.09691
mean_reward :  0.0


  0%|          | 6192/2000001 [1:01:13<350:28:20,  1.58it/s]

buffer size = 12784, epsilon = 0.09690
mean_reward :  0.0


  0%|          | 6193/2000001 [1:01:14<351:13:46,  1.58it/s]

buffer size = 12786, epsilon = 0.09690
mean_reward :  0.0


  0%|          | 6194/2000001 [1:01:14<346:03:14,  1.60it/s]

buffer size = 12788, epsilon = 0.09690
mean_reward :  0.0


  0%|          | 6195/2000001 [1:01:15<347:10:16,  1.60it/s]

buffer size = 12790, epsilon = 0.09690
mean_reward :  0.0


  0%|          | 6196/2000001 [1:01:16<366:56:38,  1.51it/s]

buffer size = 12792, epsilon = 0.09690
mean_reward :  0.0


  0%|          | 6197/2000001 [1:01:17<398:23:25,  1.39it/s]

buffer size = 12794, epsilon = 0.09690
mean_reward :  0.0


  0%|          | 6198/2000001 [1:01:17<423:21:41,  1.31it/s]

buffer size = 12796, epsilon = 0.09690
mean_reward :  0.0


  0%|          | 6199/2000001 [1:01:18<443:39:47,  1.25it/s]

buffer size = 12798, epsilon = 0.09690
mean_reward :  0.0


  0%|          | 6200/2000001 [1:01:19<427:26:09,  1.30it/s]

buffer size = 12800, epsilon = 0.09690
mean_reward :  0.0


  0%|          | 6201/2000001 [1:01:20<407:45:05,  1.36it/s]

buffer size = 12802, epsilon = 0.09690
mean_reward :  0.0


  0%|          | 6202/2000001 [1:01:20<391:21:30,  1.42it/s]

buffer size = 12804, epsilon = 0.09690
mean_reward :  0.0


  0%|          | 6203/2000001 [1:01:21<379:31:50,  1.46it/s]

buffer size = 12806, epsilon = 0.09690
mean_reward :  0.0


  0%|          | 6204/2000001 [1:01:22<369:11:08,  1.50it/s]

buffer size = 12808, epsilon = 0.09690
mean_reward :  0.0


  0%|          | 6205/2000001 [1:01:22<361:40:40,  1.53it/s]

buffer size = 12810, epsilon = 0.09690
mean_reward :  0.0


  0%|          | 6206/2000001 [1:01:23<358:29:36,  1.54it/s]

buffer size = 12812, epsilon = 0.09690
mean_reward :  0.0


  0%|          | 6207/2000001 [1:01:23<356:53:06,  1.55it/s]

buffer size = 12814, epsilon = 0.09690
mean_reward :  0.0


  0%|          | 6208/2000001 [1:01:24<357:46:41,  1.55it/s]

buffer size = 12816, epsilon = 0.09690
mean_reward :  0.0


  0%|          | 6209/2000001 [1:01:25<353:57:47,  1.56it/s]

buffer size = 12818, epsilon = 0.09690
mean_reward :  0.0


  0%|          | 6210/2000001 [1:01:25<352:12:59,  1.57it/s]

buffer size = 12820, epsilon = 0.09690
mean_reward :  0.0


  0%|          | 6211/2000001 [1:01:26<356:37:17,  1.55it/s]

buffer size = 12822, epsilon = 0.09690
mean_reward :  0.0


  0%|          | 6212/2000001 [1:01:27<352:13:57,  1.57it/s]

buffer size = 12824, epsilon = 0.09689
mean_reward :  0.0


  0%|          | 6213/2000001 [1:01:27<351:15:28,  1.58it/s]

buffer size = 12826, epsilon = 0.09689
mean_reward :  0.0


  0%|          | 6214/2000001 [1:01:28<346:14:39,  1.60it/s]

buffer size = 12828, epsilon = 0.09689
mean_reward :  0.0


  0%|          | 6215/2000001 [1:01:29<350:53:08,  1.58it/s]

buffer size = 12830, epsilon = 0.09689
mean_reward :  0.0


  0%|          | 6216/2000001 [1:01:29<394:06:27,  1.41it/s]

buffer size = 12832, epsilon = 0.09689
mean_reward :  0.0


  0%|          | 6217/2000001 [1:01:30<414:08:57,  1.34it/s]

buffer size = 12834, epsilon = 0.09689
mean_reward :  0.0


  0%|          | 6218/2000001 [1:01:31<423:14:05,  1.31it/s]

buffer size = 12836, epsilon = 0.09689
mean_reward :  0.0


  0%|          | 6219/2000001 [1:01:32<431:15:37,  1.28it/s]

buffer size = 12838, epsilon = 0.09689
mean_reward :  0.0


  0%|          | 6220/2000001 [1:01:33<428:07:52,  1.29it/s]

buffer size = 12840, epsilon = 0.09689
mean_reward :  0.0


  0%|          | 6221/2000001 [1:01:33<408:33:39,  1.36it/s]

buffer size = 12842, epsilon = 0.09689
mean_reward :  0.0


  0%|          | 6222/2000001 [1:01:34<390:43:44,  1.42it/s]

buffer size = 12844, epsilon = 0.09689
mean_reward :  0.0


  0%|          | 6223/2000001 [1:01:35<382:00:16,  1.45it/s]

buffer size = 12846, epsilon = 0.09689
mean_reward :  0.0


  0%|          | 6224/2000001 [1:01:35<372:21:52,  1.49it/s]

buffer size = 12848, epsilon = 0.09689
mean_reward :  0.0


  0%|          | 6225/2000001 [1:01:36<360:52:41,  1.53it/s]

buffer size = 12850, epsilon = 0.09689
mean_reward :  0.0


  0%|          | 6226/2000001 [1:01:36<357:48:49,  1.55it/s]

buffer size = 12852, epsilon = 0.09689
mean_reward :  0.0


  0%|          | 6227/2000001 [1:01:37<355:24:16,  1.56it/s]

buffer size = 12854, epsilon = 0.09689
mean_reward :  0.0


  0%|          | 6228/2000001 [1:01:38<349:47:52,  1.58it/s]

buffer size = 12856, epsilon = 0.09689
mean_reward :  0.0


  0%|          | 6229/2000001 [1:01:38<352:17:12,  1.57it/s]

buffer size = 12858, epsilon = 0.09689
mean_reward :  0.0


  0%|          | 6230/2000001 [1:01:39<353:02:10,  1.57it/s]

buffer size = 12860, epsilon = 0.09689
mean_reward :  0.0


  0%|          | 6231/2000001 [1:01:40<352:03:14,  1.57it/s]

buffer size = 12862, epsilon = 0.09688
mean_reward :  0.0


  0%|          | 6232/2000001 [1:01:40<354:59:53,  1.56it/s]

buffer size = 12864, epsilon = 0.09688
mean_reward :  0.0


  0%|          | 6233/2000001 [1:01:41<350:18:41,  1.58it/s]

buffer size = 12866, epsilon = 0.09688
mean_reward :  0.0


  0%|          | 6234/2000001 [1:01:42<353:33:47,  1.57it/s]

buffer size = 12868, epsilon = 0.09688
mean_reward :  0.0


  0%|          | 6235/2000001 [1:01:42<370:38:34,  1.49it/s]

buffer size = 12870, epsilon = 0.09688
mean_reward :  0.0


  0%|          | 6236/2000001 [1:01:43<408:13:13,  1.36it/s]

buffer size = 12872, epsilon = 0.09688
mean_reward :  0.0


  0%|          | 6237/2000001 [1:01:44<436:15:47,  1.27it/s]

buffer size = 12874, epsilon = 0.09688
mean_reward :  0.0


  0%|          | 6238/2000001 [1:01:45<454:31:08,  1.22it/s]

buffer size = 12876, epsilon = 0.09688
mean_reward :  0.0


  0%|          | 6239/2000001 [1:01:46<423:45:47,  1.31it/s]

buffer size = 12878, epsilon = 0.09688
mean_reward :  0.0


  0%|          | 6240/2000001 [1:01:46<398:31:57,  1.39it/s]

buffer size = 12880, epsilon = 0.09688
mean_reward :  0.0


  0%|          | 6241/2000001 [1:01:47<381:34:32,  1.45it/s]

buffer size = 12882, epsilon = 0.09688
mean_reward :  0.0


  0%|          | 6242/2000001 [1:01:47<373:32:22,  1.48it/s]

buffer size = 12884, epsilon = 0.09688
mean_reward :  0.0


  0%|          | 6243/2000001 [1:01:48<366:19:12,  1.51it/s]

buffer size = 12886, epsilon = 0.09688
mean_reward :  0.0


  0%|          | 6244/2000001 [1:01:49<361:32:54,  1.53it/s]

buffer size = 12888, epsilon = 0.09688
mean_reward :  0.0


  0%|          | 6245/2000001 [1:01:49<356:45:50,  1.55it/s]

buffer size = 12890, epsilon = 0.09688
mean_reward :  0.0


  0%|          | 6246/2000001 [1:01:50<353:40:39,  1.57it/s]

buffer size = 12892, epsilon = 0.09688
mean_reward :  0.0


  0%|          | 6247/2000001 [1:01:51<352:27:09,  1.57it/s]

buffer size = 12894, epsilon = 0.09688
mean_reward :  0.0


  0%|          | 6248/2000001 [1:01:51<352:44:54,  1.57it/s]

buffer size = 12896, epsilon = 0.09688
mean_reward :  0.0


  0%|          | 6249/2000001 [1:01:52<353:09:17,  1.57it/s]

buffer size = 12898, epsilon = 0.09688
mean_reward :  0.0


  0%|          | 6250/2000001 [1:01:52<348:24:24,  1.59it/s]

buffer size = 12900, epsilon = 0.09688
mean_reward :  0.0


  0%|          | 6251/2000001 [1:01:53<349:33:22,  1.58it/s]

buffer size = 12902, epsilon = 0.09688
mean_reward :  0.0


  0%|          | 6252/2000001 [1:01:54<350:25:18,  1.58it/s]

buffer size = 12904, epsilon = 0.09687
mean_reward :  0.0


  0%|          | 6253/2000001 [1:01:54<352:06:17,  1.57it/s]

buffer size = 12906, epsilon = 0.09687
mean_reward :  0.0


  0%|          | 6254/2000001 [1:01:55<359:33:00,  1.54it/s]

buffer size = 12908, epsilon = 0.09687
mean_reward :  0.0


  0%|          | 6255/2000001 [1:01:56<393:11:18,  1.41it/s]

buffer size = 12910, epsilon = 0.09687
mean_reward :  0.0


  0%|          | 6256/2000001 [1:01:57<429:26:02,  1.29it/s]

buffer size = 12912, epsilon = 0.09687
mean_reward :  0.0


  0%|          | 6257/2000001 [1:01:58<445:42:48,  1.24it/s]

buffer size = 12914, epsilon = 0.09687
mean_reward :  0.0


  0%|          | 6258/2000001 [1:01:59<449:24:21,  1.23it/s]

buffer size = 12916, epsilon = 0.09687
mean_reward :  0.0


  0%|          | 6259/2000001 [1:01:59<421:53:40,  1.31it/s]

buffer size = 12918, epsilon = 0.09687
mean_reward :  0.0


  0%|          | 6260/2000001 [1:02:00<400:42:58,  1.38it/s]

buffer size = 12920, epsilon = 0.09687
mean_reward :  0.0


  0%|          | 6261/2000001 [1:02:00<385:13:53,  1.44it/s]

buffer size = 12922, epsilon = 0.09687
mean_reward :  0.0


  0%|          | 6262/2000001 [1:02:01<374:27:31,  1.48it/s]

buffer size = 12924, epsilon = 0.09687
mean_reward :  0.0


  0%|          | 6263/2000001 [1:02:02<367:18:47,  1.51it/s]

buffer size = 12926, epsilon = 0.09687
mean_reward :  0.0


  0%|          | 6264/2000001 [1:02:02<363:44:54,  1.52it/s]

buffer size = 12928, epsilon = 0.09687
mean_reward :  0.0


  0%|          | 6265/2000001 [1:02:03<360:27:34,  1.54it/s]

buffer size = 12930, epsilon = 0.09687
mean_reward :  0.0


  0%|          | 6266/2000001 [1:02:04<364:45:54,  1.52it/s]

buffer size = 12932, epsilon = 0.09687
mean_reward :  0.0


  0%|          | 6267/2000001 [1:02:04<361:22:08,  1.53it/s]

buffer size = 12934, epsilon = 0.09687
mean_reward :  0.0


  0%|          | 6268/2000001 [1:02:05<358:04:29,  1.55it/s]

buffer size = 12936, epsilon = 0.09687
mean_reward :  0.0


  0%|          | 6269/2000001 [1:02:06<357:39:32,  1.55it/s]

buffer size = 12938, epsilon = 0.09687
mean_reward :  0.0


  0%|          | 6270/2000001 [1:02:06<357:39:26,  1.55it/s]

buffer size = 12940, epsilon = 0.09687
mean_reward :  0.0


  0%|          | 6271/2000001 [1:02:07<360:14:29,  1.54it/s]

buffer size = 12942, epsilon = 0.09687
mean_reward :  0.0


  0%|          | 6272/2000001 [1:02:08<359:28:39,  1.54it/s]

buffer size = 12944, epsilon = 0.09686
mean_reward :  0.0


  0%|          | 6273/2000001 [1:02:08<352:44:22,  1.57it/s]

buffer size = 12946, epsilon = 0.09686
mean_reward :  0.0


  0%|          | 6274/2000001 [1:02:09<381:58:07,  1.45it/s]

buffer size = 12948, epsilon = 0.09686
mean_reward :  0.0


  0%|          | 6275/2000001 [1:02:10<400:15:52,  1.38it/s]

buffer size = 12950, epsilon = 0.09686
mean_reward :  0.0


  0%|          | 6276/2000001 [1:02:11<405:38:10,  1.37it/s]

buffer size = 12952, epsilon = 0.09686
mean_reward :  0.0


  0%|          | 6277/2000001 [1:02:11<434:23:37,  1.27it/s]

buffer size = 12954, epsilon = 0.09686
mean_reward :  0.0


  0%|          | 6278/2000001 [1:02:12<448:35:23,  1.23it/s]

buffer size = 12956, epsilon = 0.09686
mean_reward :  0.0


  0%|          | 6279/2000001 [1:02:13<430:00:47,  1.29it/s]

buffer size = 12958, epsilon = 0.09686
mean_reward :  0.0


  0%|          | 6280/2000001 [1:02:14<401:00:02,  1.38it/s]

buffer size = 12960, epsilon = 0.09686
mean_reward :  0.0


  0%|          | 6281/2000001 [1:02:14<386:40:13,  1.43it/s]

buffer size = 12962, epsilon = 0.09686
mean_reward :  0.0


  0%|          | 6282/2000001 [1:02:15<376:10:59,  1.47it/s]

buffer size = 12964, epsilon = 0.09686
mean_reward :  0.0


  0%|          | 6283/2000001 [1:02:16<369:24:36,  1.50it/s]

buffer size = 12966, epsilon = 0.09686
mean_reward :  0.0


  0%|          | 6284/2000001 [1:02:16<362:53:30,  1.53it/s]

buffer size = 12968, epsilon = 0.09686
mean_reward :  0.0


  0%|          | 6285/2000001 [1:02:17<357:14:38,  1.55it/s]

buffer size = 12970, epsilon = 0.09686
mean_reward :  0.0


  0%|          | 6286/2000001 [1:02:17<359:11:16,  1.54it/s]

buffer size = 12972, epsilon = 0.09686
mean_reward :  0.0


  0%|          | 6287/2000001 [1:02:18<352:42:54,  1.57it/s]

buffer size = 12974, epsilon = 0.09686
mean_reward :  0.0


  0%|          | 6288/2000001 [1:02:19<352:07:17,  1.57it/s]

buffer size = 12976, epsilon = 0.09686
mean_reward :  0.0


  0%|          | 6289/2000001 [1:02:19<351:37:57,  1.57it/s]

buffer size = 12978, epsilon = 0.09686
mean_reward :  0.0


  0%|          | 6290/2000001 [1:02:20<351:58:38,  1.57it/s]

buffer size = 12980, epsilon = 0.09686
mean_reward :  0.0


  0%|          | 6291/2000001 [1:02:21<353:45:53,  1.57it/s]

buffer size = 12982, epsilon = 0.09686
mean_reward :  0.0


  0%|          | 6292/2000001 [1:02:21<356:40:15,  1.55it/s]

buffer size = 12984, epsilon = 0.09685
mean_reward :  0.0


  0%|          | 6293/2000001 [1:02:22<351:38:41,  1.57it/s]

buffer size = 12986, epsilon = 0.09685
mean_reward :  0.0


  0%|          | 6294/2000001 [1:02:23<386:11:52,  1.43it/s]

buffer size = 12988, epsilon = 0.09685
mean_reward :  0.0


  0%|          | 6295/2000001 [1:02:24<404:32:20,  1.37it/s]

buffer size = 12990, epsilon = 0.09685
mean_reward :  0.0


  0%|          | 6296/2000001 [1:02:24<412:17:02,  1.34it/s]

buffer size = 12992, epsilon = 0.09685
mean_reward :  0.0


  0%|          | 6297/2000001 [1:02:25<433:15:56,  1.28it/s]

buffer size = 12994, epsilon = 0.09685
mean_reward :  0.0


  0%|          | 6298/2000001 [1:02:26<443:44:09,  1.25it/s]

buffer size = 12996, epsilon = 0.09685
mean_reward :  0.0


  0%|          | 6299/2000001 [1:02:27<418:53:03,  1.32it/s]

buffer size = 12998, epsilon = 0.09685
mean_reward :  0.0


  0%|          | 6300/2000001 [1:02:27<398:25:35,  1.39it/s]

buffer size = 13000, epsilon = 0.09685
mean_reward :  0.0


  0%|          | 6301/2000001 [1:02:28<386:18:42,  1.43it/s]

buffer size = 13002, epsilon = 0.09685
mean_reward :  0.0


  0%|          | 6302/2000001 [1:02:29<376:07:08,  1.47it/s]

buffer size = 13004, epsilon = 0.09685
mean_reward :  0.0


  0%|          | 6303/2000001 [1:02:29<369:19:29,  1.50it/s]

buffer size = 13006, epsilon = 0.09685
mean_reward :  0.0


  0%|          | 6304/2000001 [1:02:30<360:38:41,  1.54it/s]

buffer size = 13008, epsilon = 0.09685
mean_reward :  0.0


  0%|          | 6305/2000001 [1:02:30<354:56:32,  1.56it/s]

buffer size = 13010, epsilon = 0.09685
mean_reward :  0.0


  0%|          | 6306/2000001 [1:02:31<355:15:13,  1.56it/s]

buffer size = 13012, epsilon = 0.09685
mean_reward :  0.0


  0%|          | 6307/2000001 [1:02:32<352:08:25,  1.57it/s]

buffer size = 13014, epsilon = 0.09685
mean_reward :  0.0


  0%|          | 6308/2000001 [1:02:32<352:36:16,  1.57it/s]

buffer size = 13016, epsilon = 0.09685
mean_reward :  0.0


  0%|          | 6309/2000001 [1:02:33<352:28:03,  1.57it/s]

buffer size = 13018, epsilon = 0.09685
mean_reward :  0.0


  0%|          | 6310/2000001 [1:02:34<345:40:10,  1.60it/s]

buffer size = 13020, epsilon = 0.09685
mean_reward :  0.0


  0%|          | 6311/2000001 [1:02:34<347:45:19,  1.59it/s]

buffer size = 13022, epsilon = 0.09685
mean_reward :  0.0


  0%|          | 6312/2000001 [1:02:35<349:21:18,  1.59it/s]

buffer size = 13024, epsilon = 0.09684
mean_reward :  0.0


  0%|          | 6313/2000001 [1:02:35<349:59:28,  1.58it/s]

buffer size = 13026, epsilon = 0.09684
mean_reward :  0.0


  0%|          | 6314/2000001 [1:02:36<379:44:03,  1.46it/s]

buffer size = 13028, epsilon = 0.09684
mean_reward :  0.0


  0%|          | 6315/2000001 [1:02:37<410:48:02,  1.35it/s]

buffer size = 13030, epsilon = 0.09684
mean_reward :  0.0


  0%|          | 6316/2000001 [1:02:38<426:21:29,  1.30it/s]

buffer size = 13032, epsilon = 0.09684
mean_reward :  0.0


  0%|          | 6317/2000001 [1:02:39<452:42:59,  1.22it/s]

buffer size = 13034, epsilon = 0.09684
mean_reward :  0.0


  0%|          | 6318/2000001 [1:02:40<428:34:45,  1.29it/s]

buffer size = 13036, epsilon = 0.09684
mean_reward :  0.0


  0%|          | 6319/2000001 [1:02:40<399:10:22,  1.39it/s]

buffer size = 13038, epsilon = 0.09684
mean_reward :  0.0


  0%|          | 6320/2000001 [1:02:41<389:46:01,  1.42it/s]

buffer size = 13040, epsilon = 0.09684
mean_reward :  0.0


  0%|          | 6321/2000001 [1:02:42<379:23:56,  1.46it/s]

buffer size = 13042, epsilon = 0.09684
mean_reward :  0.0


  0%|          | 6322/2000001 [1:02:42<369:02:53,  1.50it/s]

buffer size = 13044, epsilon = 0.09684
mean_reward :  0.0


  0%|          | 6323/2000001 [1:02:43<366:49:06,  1.51it/s]

buffer size = 13046, epsilon = 0.09684
mean_reward :  0.0


  0%|          | 6324/2000001 [1:02:43<361:08:22,  1.53it/s]

buffer size = 13048, epsilon = 0.09684
mean_reward :  0.0


  0%|          | 6325/2000001 [1:02:44<359:41:40,  1.54it/s]

buffer size = 13050, epsilon = 0.09684
mean_reward :  0.0


  0%|          | 6326/2000001 [1:02:45<356:58:12,  1.55it/s]

buffer size = 13052, epsilon = 0.09684
mean_reward :  0.0


  0%|          | 6327/2000001 [1:02:45<354:49:34,  1.56it/s]

buffer size = 13054, epsilon = 0.09684
mean_reward :  0.0


  0%|          | 6328/2000001 [1:02:46<354:09:20,  1.56it/s]

buffer size = 13056, epsilon = 0.09684
mean_reward :  0.0


  0%|          | 6329/2000001 [1:02:47<353:58:30,  1.56it/s]

buffer size = 13058, epsilon = 0.09684
mean_reward :  0.0


  0%|          | 6330/2000001 [1:02:47<350:01:01,  1.58it/s]

buffer size = 13060, epsilon = 0.09684
mean_reward :  0.0


  0%|          | 6331/2000001 [1:02:48<356:32:10,  1.55it/s]

buffer size = 13062, epsilon = 0.09684
mean_reward :  0.0


  0%|          | 6332/2000001 [1:02:49<352:36:05,  1.57it/s]

buffer size = 13064, epsilon = 0.09683
mean_reward :  0.0


  0%|          | 6333/2000001 [1:02:49<372:15:31,  1.49it/s]

buffer size = 13066, epsilon = 0.09683
mean_reward :  0.0


  0%|          | 6334/2000001 [1:02:50<396:17:37,  1.40it/s]

buffer size = 13068, epsilon = 0.09683
mean_reward :  0.0


  0%|          | 6335/2000001 [1:02:51<409:13:55,  1.35it/s]

buffer size = 13070, epsilon = 0.09683
mean_reward :  0.0


  0%|          | 6336/2000001 [1:02:52<421:58:06,  1.31it/s]

buffer size = 13072, epsilon = 0.09683
mean_reward :  0.0


  0%|          | 6337/2000001 [1:02:53<440:08:41,  1.26it/s]

buffer size = 13074, epsilon = 0.09683
mean_reward :  0.0


  0%|          | 6338/2000001 [1:02:53<423:07:09,  1.31it/s]

buffer size = 13076, epsilon = 0.09683
mean_reward :  0.0


  0%|          | 6339/2000001 [1:02:54<403:11:31,  1.37it/s]

buffer size = 13078, epsilon = 0.09683
mean_reward :  0.0


  0%|          | 6340/2000001 [1:02:55<388:03:45,  1.43it/s]

buffer size = 13080, epsilon = 0.09683
mean_reward :  0.0


  0%|          | 6341/2000001 [1:02:55<377:06:29,  1.47it/s]

buffer size = 13082, epsilon = 0.09683
mean_reward :  0.0


  0%|          | 6342/2000001 [1:02:56<369:22:46,  1.50it/s]

buffer size = 13084, epsilon = 0.09683
mean_reward :  0.0


  0%|          | 6343/2000001 [1:02:56<365:55:11,  1.51it/s]

buffer size = 13086, epsilon = 0.09683
mean_reward :  0.0


  0%|          | 6344/2000001 [1:02:57<358:24:05,  1.55it/s]

buffer size = 13088, epsilon = 0.09683
mean_reward :  0.0


  0%|          | 6345/2000001 [1:02:58<353:22:12,  1.57it/s]

buffer size = 13090, epsilon = 0.09683
mean_reward :  0.0


  0%|          | 6346/2000001 [1:02:58<354:28:12,  1.56it/s]

buffer size = 13092, epsilon = 0.09683
mean_reward :  0.0


  0%|          | 6347/2000001 [1:02:59<355:04:46,  1.56it/s]

buffer size = 13094, epsilon = 0.09683
mean_reward :  0.0


  0%|          | 6348/2000001 [1:03:00<355:38:45,  1.56it/s]

buffer size = 13096, epsilon = 0.09683
mean_reward :  0.0


  0%|          | 6349/2000001 [1:03:00<352:56:09,  1.57it/s]

buffer size = 13098, epsilon = 0.09683
mean_reward :  0.0


  0%|          | 6350/2000001 [1:03:01<354:37:03,  1.56it/s]

buffer size = 13100, epsilon = 0.09683
mean_reward :  0.0


  0%|          | 6351/2000001 [1:03:02<354:09:48,  1.56it/s]

buffer size = 13102, epsilon = 0.09683
mean_reward :  0.0


  0%|          | 6352/2000001 [1:03:02<350:47:06,  1.58it/s]

buffer size = 13104, epsilon = 0.09682
mean_reward :  0.0


  0%|          | 6353/2000001 [1:03:03<351:40:19,  1.57it/s]

buffer size = 13106, epsilon = 0.09682
mean_reward :  0.0


  0%|          | 6354/2000001 [1:03:04<400:55:48,  1.38it/s]

buffer size = 13108, epsilon = 0.09682
mean_reward :  0.0


  0%|          | 6355/2000001 [1:03:05<432:05:07,  1.28it/s]

buffer size = 13110, epsilon = 0.09682
mean_reward :  0.0


  0%|          | 6356/2000001 [1:03:06<461:45:11,  1.20it/s]

buffer size = 13112, epsilon = 0.09682
mean_reward :  0.0


  0%|          | 6357/2000001 [1:03:06<435:56:38,  1.27it/s]

buffer size = 13114, epsilon = 0.09682
mean_reward :  0.0


  0%|          | 6358/2000001 [1:03:07<413:54:06,  1.34it/s]

buffer size = 13116, epsilon = 0.09682
mean_reward :  0.0


  0%|          | 6359/2000001 [1:03:08<391:20:03,  1.42it/s]

buffer size = 13118, epsilon = 0.09682
mean_reward :  0.0


  0%|          | 6360/2000001 [1:03:08<380:57:15,  1.45it/s]

buffer size = 13120, epsilon = 0.09682
mean_reward :  0.0


  0%|          | 6361/2000001 [1:03:09<373:33:47,  1.48it/s]

buffer size = 13122, epsilon = 0.09682
mean_reward :  0.0


  0%|          | 6362/2000001 [1:03:09<366:06:16,  1.51it/s]

buffer size = 13124, epsilon = 0.09682
mean_reward :  0.0


  0%|          | 6363/2000001 [1:03:10<364:16:34,  1.52it/s]

buffer size = 13126, epsilon = 0.09682
mean_reward :  0.0


  0%|          | 6364/2000001 [1:03:11<357:55:38,  1.55it/s]

buffer size = 13128, epsilon = 0.09682
mean_reward :  0.0


  0%|          | 6365/2000001 [1:03:11<357:03:42,  1.55it/s]

buffer size = 13130, epsilon = 0.09682
mean_reward :  0.0


  0%|          | 6366/2000001 [1:03:12<354:21:55,  1.56it/s]

buffer size = 13132, epsilon = 0.09682
mean_reward :  0.0


  0%|          | 6367/2000001 [1:03:13<351:54:35,  1.57it/s]

buffer size = 13134, epsilon = 0.09682
mean_reward :  0.0


  0%|          | 6368/2000001 [1:03:13<351:32:24,  1.58it/s]

buffer size = 13136, epsilon = 0.09682
mean_reward :  0.0


  0%|          | 6369/2000001 [1:03:14<348:42:08,  1.59it/s]

buffer size = 13138, epsilon = 0.09682
mean_reward :  0.0


  0%|          | 6370/2000001 [1:03:15<350:12:16,  1.58it/s]

buffer size = 13140, epsilon = 0.09682
mean_reward :  0.0


  0%|          | 6371/2000001 [1:03:15<348:12:01,  1.59it/s]

buffer size = 13142, epsilon = 0.09682
mean_reward :  0.0


  0%|          | 6372/2000001 [1:03:16<373:29:50,  1.48it/s]

buffer size = 13144, epsilon = 0.09681
mean_reward :  0.0


  0%|          | 6373/2000001 [1:03:17<415:10:27,  1.33it/s]

buffer size = 13146, epsilon = 0.09681
mean_reward :  0.0


  0%|          | 6374/2000001 [1:03:18<430:57:46,  1.28it/s]

buffer size = 13148, epsilon = 0.09681
mean_reward :  0.0


  0%|          | 6375/2000001 [1:03:19<444:00:52,  1.25it/s]

buffer size = 13150, epsilon = 0.09681
mean_reward :  0.0


  0%|          | 6376/2000001 [1:03:19<425:26:11,  1.30it/s]

buffer size = 13152, epsilon = 0.09681
mean_reward :  0.0


  0%|          | 6377/2000001 [1:03:20<404:41:17,  1.37it/s]

buffer size = 13154, epsilon = 0.09681
mean_reward :  0.0


  0%|          | 6378/2000001 [1:03:20<386:51:49,  1.43it/s]

buffer size = 13156, epsilon = 0.09681
mean_reward :  0.0


  0%|          | 6379/2000001 [1:03:21<376:12:33,  1.47it/s]

buffer size = 13158, epsilon = 0.09681
mean_reward :  0.0


  0%|          | 6380/2000001 [1:03:22<373:40:19,  1.48it/s]

buffer size = 13160, epsilon = 0.09681
mean_reward :  0.0


  0%|          | 6381/2000001 [1:03:22<363:22:51,  1.52it/s]

buffer size = 13162, epsilon = 0.09681
mean_reward :  0.0


  0%|          | 6382/2000001 [1:03:23<359:41:26,  1.54it/s]

buffer size = 13164, epsilon = 0.09681
mean_reward :  0.0


  0%|          | 6383/2000001 [1:03:24<355:07:24,  1.56it/s]

buffer size = 13166, epsilon = 0.09681
mean_reward :  0.0


  0%|          | 6384/2000001 [1:03:24<355:18:17,  1.56it/s]

buffer size = 13168, epsilon = 0.09681
mean_reward :  0.0


  0%|          | 6385/2000001 [1:03:25<352:20:04,  1.57it/s]

buffer size = 13170, epsilon = 0.09681
mean_reward :  0.0


  0%|          | 6386/2000001 [1:03:26<355:16:24,  1.56it/s]

buffer size = 13172, epsilon = 0.09681
mean_reward :  0.0


  0%|          | 6387/2000001 [1:03:26<355:25:22,  1.56it/s]

buffer size = 13174, epsilon = 0.09681
mean_reward :  0.0


  0%|          | 6388/2000001 [1:03:27<355:06:46,  1.56it/s]

buffer size = 13176, epsilon = 0.09681
mean_reward :  0.0


  0%|          | 6389/2000001 [1:03:28<356:02:36,  1.56it/s]

buffer size = 13178, epsilon = 0.09681
mean_reward :  0.0


  0%|          | 6390/2000001 [1:03:28<355:31:19,  1.56it/s]

buffer size = 13180, epsilon = 0.09681
mean_reward :  0.0


  0%|          | 6391/2000001 [1:03:29<362:32:59,  1.53it/s]

buffer size = 13182, epsilon = 0.09681
mean_reward :  0.0


  0%|          | 6392/2000001 [1:03:30<392:52:05,  1.41it/s]

buffer size = 13184, epsilon = 0.09680
mean_reward :  0.0


  0%|          | 6393/2000001 [1:03:30<412:32:59,  1.34it/s]

buffer size = 13186, epsilon = 0.09680
mean_reward :  0.0


  0%|          | 6394/2000001 [1:03:31<425:36:00,  1.30it/s]

buffer size = 13188, epsilon = 0.09680
mean_reward :  0.0


  0%|          | 6395/2000001 [1:03:32<447:38:28,  1.24it/s]

buffer size = 13190, epsilon = 0.09680
mean_reward :  0.0


  0%|          | 6396/2000001 [1:03:33<440:24:07,  1.26it/s]

buffer size = 13192, epsilon = 0.09680
mean_reward :  0.0


  0%|          | 6397/2000001 [1:03:34<411:29:44,  1.35it/s]

buffer size = 13194, epsilon = 0.09680
mean_reward :  0.0


  0%|          | 6398/2000001 [1:03:34<397:09:50,  1.39it/s]

buffer size = 13196, epsilon = 0.09680
mean_reward :  0.0


  0%|          | 6399/2000001 [1:03:35<376:52:04,  1.47it/s]

buffer size = 13198, epsilon = 0.09680
mean_reward :  0.0


  0%|          | 6400/2000001 [1:03:35<369:30:31,  1.50it/s]

buffer size = 13200, epsilon = 0.09680
mean_reward :  0.0


  0%|          | 6401/2000001 [1:03:36<367:46:33,  1.51it/s]

buffer size = 13202, epsilon = 0.09680
mean_reward :  0.0


  0%|          | 6402/2000001 [1:03:37<365:03:00,  1.52it/s]

buffer size = 13204, epsilon = 0.09680
mean_reward :  0.0


  0%|          | 6403/2000001 [1:03:37<363:29:23,  1.52it/s]

buffer size = 13206, epsilon = 0.09680
mean_reward :  0.0


  0%|          | 6404/2000001 [1:03:38<360:37:47,  1.54it/s]

buffer size = 13208, epsilon = 0.09680
mean_reward :  0.0


  0%|          | 6405/2000001 [1:03:39<359:26:06,  1.54it/s]

buffer size = 13210, epsilon = 0.09680
mean_reward :  0.0


  0%|          | 6406/2000001 [1:03:39<359:29:06,  1.54it/s]

buffer size = 13212, epsilon = 0.09680
mean_reward :  0.0


  0%|          | 6407/2000001 [1:03:40<357:05:53,  1.55it/s]

buffer size = 13214, epsilon = 0.09680
mean_reward :  0.0


  0%|          | 6408/2000001 [1:03:41<356:31:20,  1.55it/s]

buffer size = 13216, epsilon = 0.09680
mean_reward :  0.0


  0%|          | 6409/2000001 [1:03:41<358:16:47,  1.55it/s]

buffer size = 13218, epsilon = 0.09680
mean_reward :  0.0


  0%|          | 6410/2000001 [1:03:42<355:59:55,  1.56it/s]

buffer size = 13220, epsilon = 0.09680
mean_reward :  0.0


  0%|          | 6411/2000001 [1:03:43<355:53:46,  1.56it/s]

buffer size = 13222, epsilon = 0.09680
mean_reward :  0.0


  0%|          | 6412/2000001 [1:03:43<384:21:52,  1.44it/s]

buffer size = 13224, epsilon = 0.09679
mean_reward :  0.0


  0%|          | 6413/2000001 [1:03:44<398:33:06,  1.39it/s]

buffer size = 13226, epsilon = 0.09679
mean_reward :  0.0


  0%|          | 6414/2000001 [1:03:45<414:27:55,  1.34it/s]

buffer size = 13228, epsilon = 0.09679
mean_reward :  0.0


  0%|          | 6415/2000001 [1:03:46<437:32:15,  1.27it/s]

buffer size = 13230, epsilon = 0.09679
mean_reward :  0.0


  0%|          | 6416/2000001 [1:03:47<447:42:46,  1.24it/s]

buffer size = 13232, epsilon = 0.09679
mean_reward :  0.0


  0%|          | 6417/2000001 [1:03:47<416:06:54,  1.33it/s]

buffer size = 13234, epsilon = 0.09679
mean_reward :  0.0


  0%|          | 6418/2000001 [1:03:48<399:43:14,  1.39it/s]

buffer size = 13236, epsilon = 0.09679
mean_reward :  0.0


  0%|          | 6419/2000001 [1:03:49<382:10:44,  1.45it/s]

buffer size = 13238, epsilon = 0.09679
mean_reward :  0.0


  0%|          | 6420/2000001 [1:03:49<378:20:54,  1.46it/s]

buffer size = 13240, epsilon = 0.09679
mean_reward :  0.0


  0%|          | 6421/2000001 [1:03:50<372:03:25,  1.49it/s]

buffer size = 13242, epsilon = 0.09679
mean_reward :  0.0


  0%|          | 6422/2000001 [1:03:51<361:26:07,  1.53it/s]

buffer size = 13244, epsilon = 0.09679
mean_reward :  0.0


  0%|          | 6423/2000001 [1:03:51<361:08:36,  1.53it/s]

buffer size = 13246, epsilon = 0.09679
mean_reward :  0.0


  0%|          | 6424/2000001 [1:03:52<361:06:36,  1.53it/s]

buffer size = 13248, epsilon = 0.09679
mean_reward :  0.0


  0%|          | 6425/2000001 [1:03:52<360:48:36,  1.53it/s]

buffer size = 13250, epsilon = 0.09679
mean_reward :  0.0


  0%|          | 6426/2000001 [1:03:53<359:10:48,  1.54it/s]

buffer size = 13252, epsilon = 0.09679
mean_reward :  0.0


  0%|          | 6427/2000001 [1:03:54<355:25:05,  1.56it/s]

buffer size = 13254, epsilon = 0.09679
mean_reward :  0.0


  0%|          | 6428/2000001 [1:03:54<359:32:52,  1.54it/s]

buffer size = 13256, epsilon = 0.09679
mean_reward :  0.0


  0%|          | 6429/2000001 [1:03:55<355:31:40,  1.56it/s]

buffer size = 13258, epsilon = 0.09679
mean_reward :  0.0


  0%|          | 6430/2000001 [1:03:56<356:10:07,  1.55it/s]

buffer size = 13260, epsilon = 0.09679
mean_reward :  0.0


  0%|          | 6431/2000001 [1:03:56<353:59:12,  1.56it/s]

buffer size = 13262, epsilon = 0.09679
mean_reward :  0.0


  0%|          | 6432/2000001 [1:03:57<382:48:23,  1.45it/s]

buffer size = 13264, epsilon = 0.09678
mean_reward :  0.0


  0%|          | 6433/2000001 [1:03:58<398:24:16,  1.39it/s]

buffer size = 13266, epsilon = 0.09678
mean_reward :  0.0


  0%|          | 6434/2000001 [1:03:59<412:05:43,  1.34it/s]

buffer size = 13268, epsilon = 0.09678
mean_reward :  0.0


  0%|          | 6435/2000001 [1:04:00<436:00:47,  1.27it/s]

buffer size = 13270, epsilon = 0.09678
mean_reward :  0.0


  0%|          | 6436/2000001 [1:04:00<442:38:11,  1.25it/s]

buffer size = 13272, epsilon = 0.09678
mean_reward :  0.0


  0%|          | 6437/2000001 [1:04:01<430:53:33,  1.29it/s]

buffer size = 13274, epsilon = 0.09678
mean_reward :  0.0


  0%|          | 6438/2000001 [1:04:02<409:05:57,  1.35it/s]

buffer size = 13276, epsilon = 0.09678
mean_reward :  0.0


  0%|          | 6439/2000001 [1:04:02<392:10:39,  1.41it/s]

buffer size = 13278, epsilon = 0.09678
mean_reward :  0.0


  0%|          | 6440/2000001 [1:04:03<383:17:46,  1.44it/s]

buffer size = 13280, epsilon = 0.09678
mean_reward :  0.0


  0%|          | 6441/2000001 [1:04:04<376:39:55,  1.47it/s]

buffer size = 13282, epsilon = 0.09678
mean_reward :  0.0


  0%|          | 6442/2000001 [1:04:04<368:21:03,  1.50it/s]

buffer size = 13284, epsilon = 0.09678
mean_reward :  0.0


  0%|          | 6443/2000001 [1:04:05<367:30:59,  1.51it/s]

buffer size = 13286, epsilon = 0.09678
mean_reward :  0.0


  0%|          | 6444/2000001 [1:04:06<363:47:13,  1.52it/s]

buffer size = 13288, epsilon = 0.09678
mean_reward :  0.0


  0%|          | 6445/2000001 [1:04:06<359:53:25,  1.54it/s]

buffer size = 13290, epsilon = 0.09678
mean_reward :  0.0


  0%|          | 6446/2000001 [1:04:07<361:18:57,  1.53it/s]

buffer size = 13292, epsilon = 0.09678
mean_reward :  0.0


  0%|          | 6447/2000001 [1:04:08<358:51:42,  1.54it/s]

buffer size = 13294, epsilon = 0.09678
mean_reward :  0.0


  0%|          | 6448/2000001 [1:04:08<356:53:32,  1.55it/s]

buffer size = 13296, epsilon = 0.09678
mean_reward :  0.0


  0%|          | 6449/2000001 [1:04:09<357:10:26,  1.55it/s]

buffer size = 13298, epsilon = 0.09678
mean_reward :  0.0


  0%|          | 6450/2000001 [1:04:10<359:34:58,  1.54it/s]

buffer size = 13300, epsilon = 0.09678
mean_reward :  0.0


  0%|          | 6451/2000001 [1:04:10<367:00:07,  1.51it/s]

buffer size = 13302, epsilon = 0.09677
mean_reward :  0.0


  0%|          | 6452/2000001 [1:04:11<399:41:44,  1.39it/s]

buffer size = 13304, epsilon = 0.09677
mean_reward :  0.0


  0%|          | 6453/2000001 [1:04:12<432:25:33,  1.28it/s]

buffer size = 13306, epsilon = 0.09677
mean_reward :  0.0


  0%|          | 6454/2000001 [1:04:13<458:59:09,  1.21it/s]

buffer size = 13308, epsilon = 0.09677
mean_reward :  0.0


  0%|          | 6455/2000001 [1:04:14<447:31:55,  1.24it/s]

buffer size = 13310, epsilon = 0.09677
mean_reward :  0.0


  0%|          | 6456/2000001 [1:04:14<416:35:47,  1.33it/s]

buffer size = 13312, epsilon = 0.09677
mean_reward :  0.0


  0%|          | 6457/2000001 [1:04:15<400:49:46,  1.38it/s]

buffer size = 13314, epsilon = 0.09677
mean_reward :  0.0


  0%|          | 6458/2000001 [1:04:16<387:46:13,  1.43it/s]

buffer size = 13316, epsilon = 0.09677
mean_reward :  0.0


  0%|          | 6459/2000001 [1:04:16<374:02:56,  1.48it/s]

buffer size = 13318, epsilon = 0.09677
mean_reward :  0.0


  0%|          | 6460/2000001 [1:04:17<368:24:41,  1.50it/s]

buffer size = 13320, epsilon = 0.09677
mean_reward :  0.0


  0%|          | 6461/2000001 [1:04:18<365:40:21,  1.51it/s]

buffer size = 13322, epsilon = 0.09677
mean_reward :  0.0


  0%|          | 6462/2000001 [1:04:18<360:57:03,  1.53it/s]

buffer size = 13324, epsilon = 0.09677
mean_reward :  0.0


  0%|          | 6463/2000001 [1:04:19<362:23:42,  1.53it/s]

buffer size = 13326, epsilon = 0.09677
mean_reward :  0.0


  0%|          | 6464/2000001 [1:04:20<361:36:37,  1.53it/s]

buffer size = 13328, epsilon = 0.09677
mean_reward :  0.0


  0%|          | 6465/2000001 [1:04:20<358:52:12,  1.54it/s]

buffer size = 13330, epsilon = 0.09677
mean_reward :  0.0


  0%|          | 6466/2000001 [1:04:21<358:17:14,  1.55it/s]

buffer size = 13332, epsilon = 0.09677
mean_reward :  0.0


  0%|          | 6467/2000001 [1:04:21<353:23:24,  1.57it/s]

buffer size = 13334, epsilon = 0.09677
mean_reward :  0.0


  0%|          | 6468/2000001 [1:04:22<357:05:53,  1.55it/s]

buffer size = 13336, epsilon = 0.09677
mean_reward :  0.0


  0%|          | 6469/2000001 [1:04:23<357:33:01,  1.55it/s]

buffer size = 13338, epsilon = 0.09677
mean_reward :  0.0


  0%|          | 6470/2000001 [1:04:23<359:24:02,  1.54it/s]

buffer size = 13340, epsilon = 0.09677
mean_reward :  0.0


  0%|          | 6471/2000001 [1:04:24<399:22:31,  1.39it/s]

buffer size = 13342, epsilon = 0.09677
mean_reward :  0.0


  0%|          | 6472/2000001 [1:04:25<408:36:25,  1.36it/s]

buffer size = 13344, epsilon = 0.09676
mean_reward :  0.0


  0%|          | 6473/2000001 [1:04:26<417:28:56,  1.33it/s]

buffer size = 13346, epsilon = 0.09676
mean_reward :  0.0


  0%|          | 6474/2000001 [1:04:27<448:20:13,  1.24it/s]

buffer size = 13348, epsilon = 0.09676
mean_reward :  0.0


  0%|          | 6475/2000001 [1:04:28<443:11:18,  1.25it/s]

buffer size = 13350, epsilon = 0.09676
mean_reward :  0.0


  0%|          | 6476/2000001 [1:04:28<414:16:09,  1.34it/s]

buffer size = 13352, epsilon = 0.09676
mean_reward :  0.0


  0%|          | 6477/2000001 [1:04:29<397:12:32,  1.39it/s]

buffer size = 13354, epsilon = 0.09676
mean_reward :  0.0


  0%|          | 6478/2000001 [1:04:29<387:47:50,  1.43it/s]

buffer size = 13356, epsilon = 0.09676
mean_reward :  0.0


  0%|          | 6479/2000001 [1:04:30<380:36:42,  1.45it/s]

buffer size = 13358, epsilon = 0.09676
mean_reward :  0.0


  0%|          | 6480/2000001 [1:04:31<372:58:05,  1.48it/s]

buffer size = 13360, epsilon = 0.09676
mean_reward :  0.0


  0%|          | 6481/2000001 [1:04:31<368:36:48,  1.50it/s]

buffer size = 13362, epsilon = 0.09676
mean_reward :  0.0


  0%|          | 6482/2000001 [1:04:32<364:08:08,  1.52it/s]

buffer size = 13364, epsilon = 0.09676
mean_reward :  0.0


  0%|          | 6483/2000001 [1:04:33<364:11:30,  1.52it/s]

buffer size = 13366, epsilon = 0.09676
mean_reward :  0.0


  0%|          | 6484/2000001 [1:04:33<363:22:55,  1.52it/s]

buffer size = 13368, epsilon = 0.09676
mean_reward :  0.0


  0%|          | 6485/2000001 [1:04:34<360:48:09,  1.53it/s]

buffer size = 13370, epsilon = 0.09676
mean_reward :  0.0


  0%|          | 6486/2000001 [1:04:35<359:35:59,  1.54it/s]

buffer size = 13372, epsilon = 0.09676
mean_reward :  0.0


  0%|          | 6487/2000001 [1:04:35<356:11:57,  1.55it/s]

buffer size = 13374, epsilon = 0.09676
mean_reward :  0.0


  0%|          | 6488/2000001 [1:04:36<353:19:45,  1.57it/s]

buffer size = 13376, epsilon = 0.09676
mean_reward :  0.0


  0%|          | 6489/2000001 [1:04:37<353:52:12,  1.56it/s]

buffer size = 13378, epsilon = 0.09676
mean_reward :  0.0


  0%|          | 6490/2000001 [1:04:37<379:23:07,  1.46it/s]

buffer size = 13380, epsilon = 0.09676
mean_reward :  0.0


  0%|          | 6491/2000001 [1:04:38<410:37:23,  1.35it/s]

buffer size = 13382, epsilon = 0.09676
mean_reward :  0.0


  0%|          | 6492/2000001 [1:04:39<429:18:49,  1.29it/s]

buffer size = 13384, epsilon = 0.09675
mean_reward :  0.0


  0%|          | 6493/2000001 [1:04:40<437:23:00,  1.27it/s]

buffer size = 13386, epsilon = 0.09675
mean_reward :  0.0


  0%|          | 6494/2000001 [1:04:41<438:36:25,  1.26it/s]

buffer size = 13388, epsilon = 0.09675
mean_reward :  0.0


  0%|          | 6495/2000001 [1:04:41<417:09:39,  1.33it/s]

buffer size = 13390, epsilon = 0.09675
mean_reward :  0.0


  0%|          | 6496/2000001 [1:04:42<397:26:28,  1.39it/s]

buffer size = 13392, epsilon = 0.09675
mean_reward :  0.0


  0%|          | 6497/2000001 [1:04:43<384:21:59,  1.44it/s]

buffer size = 13394, epsilon = 0.09675
mean_reward :  0.0


  0%|          | 6498/2000001 [1:04:43<376:24:28,  1.47it/s]

buffer size = 13396, epsilon = 0.09675
mean_reward :  0.0


  0%|          | 6499/2000001 [1:04:44<384:21:02,  1.44it/s]

buffer size = 13398, epsilon = 0.09675
mean_reward :  0.0


  0%|          | 6500/2000001 [1:04:45<381:22:19,  1.45it/s]

buffer size = 13400, epsilon = 0.09675
mean_reward :  0.0


  0%|          | 6501/2000001 [1:04:45<377:10:24,  1.47it/s]

buffer size = 13402, epsilon = 0.09675
mean_reward :  0.0


  0%|          | 6502/2000001 [1:04:46<369:56:49,  1.50it/s]

buffer size = 13404, epsilon = 0.09675
mean_reward :  0.0


  0%|          | 6503/2000001 [1:04:47<364:59:36,  1.52it/s]

buffer size = 13406, epsilon = 0.09675
mean_reward :  0.0


  0%|          | 6504/2000001 [1:04:47<362:49:33,  1.53it/s]

buffer size = 13408, epsilon = 0.09675
mean_reward :  0.0


  0%|          | 6505/2000001 [1:04:48<356:21:01,  1.55it/s]

buffer size = 13410, epsilon = 0.09675
mean_reward :  0.0


  0%|          | 6506/2000001 [1:04:49<356:40:23,  1.55it/s]

buffer size = 13412, epsilon = 0.09675
mean_reward :  0.0


  0%|          | 6507/2000001 [1:04:49<359:49:00,  1.54it/s]

buffer size = 13414, epsilon = 0.09675
mean_reward :  0.0


  0%|          | 6508/2000001 [1:04:50<360:50:23,  1.53it/s]

buffer size = 13416, epsilon = 0.09675
mean_reward :  0.0


  0%|          | 6509/2000001 [1:04:51<380:10:41,  1.46it/s]

buffer size = 13418, epsilon = 0.09675
mean_reward :  0.0


  0%|          | 6510/2000001 [1:04:51<411:05:57,  1.35it/s]

buffer size = 13420, epsilon = 0.09675
mean_reward :  0.0


  0%|          | 6511/2000001 [1:04:52<429:27:21,  1.29it/s]

buffer size = 13422, epsilon = 0.09675
mean_reward :  0.0


  0%|          | 6512/2000001 [1:04:53<439:02:44,  1.26it/s]

buffer size = 13424, epsilon = 0.09674
mean_reward :  0.0


  0%|          | 6513/2000001 [1:04:54<445:22:41,  1.24it/s]

buffer size = 13426, epsilon = 0.09674
mean_reward :  0.0


  0%|          | 6514/2000001 [1:04:55<422:45:39,  1.31it/s]

buffer size = 13428, epsilon = 0.09674
mean_reward :  0.0


  0%|          | 6515/2000001 [1:04:55<402:59:53,  1.37it/s]

buffer size = 13430, epsilon = 0.09674
mean_reward :  0.0


  0%|          | 6516/2000001 [1:04:56<388:55:32,  1.42it/s]

buffer size = 13432, epsilon = 0.09674
mean_reward :  0.0


  0%|          | 6517/2000001 [1:04:57<374:53:11,  1.48it/s]

buffer size = 13434, epsilon = 0.09674
mean_reward :  0.0


  0%|          | 6518/2000001 [1:04:57<372:43:40,  1.49it/s]

buffer size = 13436, epsilon = 0.09674
mean_reward :  0.0


  0%|          | 6519/2000001 [1:04:58<370:11:17,  1.50it/s]

buffer size = 13438, epsilon = 0.09674
mean_reward :  0.0


  0%|          | 6520/2000001 [1:04:59<366:20:19,  1.51it/s]

buffer size = 13440, epsilon = 0.09674
mean_reward :  0.0


  0%|          | 6521/2000001 [1:04:59<363:37:26,  1.52it/s]

buffer size = 13442, epsilon = 0.09674
mean_reward :  0.0


  0%|          | 6522/2000001 [1:05:00<358:39:24,  1.54it/s]

buffer size = 13444, epsilon = 0.09674
mean_reward :  0.0


  0%|          | 6523/2000001 [1:05:00<357:19:00,  1.55it/s]

buffer size = 13446, epsilon = 0.09674
mean_reward :  0.0


  0%|          | 6524/2000001 [1:05:01<354:19:42,  1.56it/s]

buffer size = 13448, epsilon = 0.09674
mean_reward :  0.0


  0%|          | 6525/2000001 [1:05:02<354:44:52,  1.56it/s]

buffer size = 13450, epsilon = 0.09674
mean_reward :  0.0


  0%|          | 6526/2000001 [1:05:02<358:38:03,  1.54it/s]

buffer size = 13452, epsilon = 0.09674
mean_reward :  0.0


  0%|          | 6527/2000001 [1:05:03<359:53:50,  1.54it/s]

buffer size = 13454, epsilon = 0.09674
mean_reward :  0.0


  0%|          | 6528/2000001 [1:05:04<354:25:45,  1.56it/s]

buffer size = 13456, epsilon = 0.09674
mean_reward :  0.0


  0%|          | 6529/2000001 [1:05:05<388:21:54,  1.43it/s]

buffer size = 13458, epsilon = 0.09674
mean_reward :  0.0


  0%|          | 6530/2000001 [1:05:05<402:12:26,  1.38it/s]

buffer size = 13460, epsilon = 0.09674
mean_reward :  0.0


  0%|          | 6531/2000001 [1:05:06<410:31:14,  1.35it/s]

buffer size = 13462, epsilon = 0.09674
mean_reward :  0.0


  0%|          | 6532/2000001 [1:05:07<426:51:20,  1.30it/s]

buffer size = 13464, epsilon = 0.09673
mean_reward :  0.0


  0%|          | 6533/2000001 [1:05:08<441:12:20,  1.26it/s]

buffer size = 13466, epsilon = 0.09673
mean_reward :  0.0


  0%|          | 6534/2000001 [1:05:09<431:39:13,  1.28it/s]

buffer size = 13468, epsilon = 0.09673
mean_reward :  0.0


  0%|          | 6535/2000001 [1:05:09<408:08:11,  1.36it/s]

buffer size = 13470, epsilon = 0.09673
mean_reward :  0.0


  0%|          | 6536/2000001 [1:05:10<392:13:32,  1.41it/s]

buffer size = 13472, epsilon = 0.09673
mean_reward :  0.0


  0%|          | 6537/2000001 [1:05:10<383:08:58,  1.45it/s]

buffer size = 13474, epsilon = 0.09673
mean_reward :  0.0


  0%|          | 6538/2000001 [1:05:11<370:35:45,  1.49it/s]

buffer size = 13476, epsilon = 0.09673
mean_reward :  0.0


  0%|          | 6539/2000001 [1:05:12<369:58:58,  1.50it/s]

buffer size = 13478, epsilon = 0.09673
mean_reward :  0.0


  0%|          | 6540/2000001 [1:05:12<364:52:04,  1.52it/s]

buffer size = 13480, epsilon = 0.09673
mean_reward :  0.0


  0%|          | 6541/2000001 [1:05:13<361:46:53,  1.53it/s]

buffer size = 13482, epsilon = 0.09673
mean_reward :  0.0


  0%|          | 6542/2000001 [1:05:14<358:56:25,  1.54it/s]

buffer size = 13484, epsilon = 0.09673
mean_reward :  0.0


  0%|          | 6543/2000001 [1:05:14<358:13:40,  1.55it/s]

buffer size = 13486, epsilon = 0.09673
mean_reward :  0.0


  0%|          | 6544/2000001 [1:05:15<358:12:07,  1.55it/s]

buffer size = 13488, epsilon = 0.09673
mean_reward :  0.0


  0%|          | 6545/2000001 [1:05:16<358:48:50,  1.54it/s]

buffer size = 13490, epsilon = 0.09673
mean_reward :  0.0


  0%|          | 6546/2000001 [1:05:16<354:03:22,  1.56it/s]

buffer size = 13492, epsilon = 0.09673
mean_reward :  0.0


  0%|          | 6547/2000001 [1:05:17<355:11:17,  1.56it/s]

buffer size = 13494, epsilon = 0.09673
mean_reward :  0.0


  0%|          | 6548/2000001 [1:05:17<356:45:38,  1.55it/s]

buffer size = 13496, epsilon = 0.09673
mean_reward :  0.0


  0%|          | 6549/2000001 [1:05:18<383:51:18,  1.44it/s]

buffer size = 13498, epsilon = 0.09673
mean_reward :  0.0


  0%|          | 6550/2000001 [1:05:19<419:53:07,  1.32it/s]

buffer size = 13500, epsilon = 0.09673
mean_reward :  0.0


  0%|          | 6551/2000001 [1:05:20<446:21:10,  1.24it/s]

buffer size = 13502, epsilon = 0.09673
mean_reward :  0.0


  0%|          | 6552/2000001 [1:05:21<461:04:47,  1.20it/s]

buffer size = 13504, epsilon = 0.09672
mean_reward :  0.0


  0%|          | 6553/2000001 [1:05:22<427:11:54,  1.30it/s]

buffer size = 13506, epsilon = 0.09672
mean_reward :  0.0


  0%|          | 6554/2000001 [1:05:22<408:57:18,  1.35it/s]

buffer size = 13508, epsilon = 0.09672
mean_reward :  0.0


  0%|          | 6555/2000001 [1:05:23<391:38:56,  1.41it/s]

buffer size = 13510, epsilon = 0.09672
mean_reward :  0.0


  0%|          | 6556/2000001 [1:05:24<379:39:59,  1.46it/s]

buffer size = 13512, epsilon = 0.09672
mean_reward :  0.0


  0%|          | 6557/2000001 [1:05:24<375:34:41,  1.47it/s]

buffer size = 13514, epsilon = 0.09672
mean_reward :  0.0


  0%|          | 6558/2000001 [1:05:25<365:53:47,  1.51it/s]

buffer size = 13516, epsilon = 0.09672
mean_reward :  0.0


  0%|          | 6559/2000001 [1:05:26<364:11:17,  1.52it/s]

buffer size = 13518, epsilon = 0.09672
mean_reward :  0.0


  0%|          | 6560/2000001 [1:05:26<363:34:53,  1.52it/s]

buffer size = 13520, epsilon = 0.09672
mean_reward :  0.0


  0%|          | 6561/2000001 [1:05:27<356:37:33,  1.55it/s]

buffer size = 13522, epsilon = 0.09672
mean_reward :  0.0


  0%|          | 6562/2000001 [1:05:27<363:23:36,  1.52it/s]

buffer size = 13524, epsilon = 0.09672
mean_reward :  0.0


  0%|          | 6563/2000001 [1:05:28<358:55:47,  1.54it/s]

buffer size = 13526, epsilon = 0.09672
mean_reward :  0.0


  0%|          | 6564/2000001 [1:05:29<357:21:11,  1.55it/s]

buffer size = 13528, epsilon = 0.09672
mean_reward :  0.0


  0%|          | 6565/2000001 [1:05:29<355:56:21,  1.56it/s]

buffer size = 13530, epsilon = 0.09672
mean_reward :  0.0


  0%|          | 6566/2000001 [1:05:30<355:15:32,  1.56it/s]

buffer size = 13532, epsilon = 0.09672
mean_reward :  0.0


  0%|          | 6567/2000001 [1:05:31<354:22:05,  1.56it/s]

buffer size = 13534, epsilon = 0.09672
mean_reward :  0.0


  0%|          | 6568/2000001 [1:05:31<373:29:49,  1.48it/s]

buffer size = 13536, epsilon = 0.09672
mean_reward :  0.0


  0%|          | 6569/2000001 [1:05:32<390:58:12,  1.42it/s]

buffer size = 13538, epsilon = 0.09672
mean_reward :  0.0


  0%|          | 6570/2000001 [1:05:33<400:59:26,  1.38it/s]

buffer size = 13540, epsilon = 0.09672
mean_reward :  0.0


  0%|          | 6571/2000001 [1:05:34<423:07:17,  1.31it/s]

buffer size = 13542, epsilon = 0.09672
mean_reward :  0.0


  0%|          | 6572/2000001 [1:05:35<441:19:06,  1.25it/s]

buffer size = 13544, epsilon = 0.09671
mean_reward :  0.0


  0%|          | 6573/2000001 [1:05:35<432:37:28,  1.28it/s]

buffer size = 13546, epsilon = 0.09671
mean_reward :  0.0


  0%|          | 6574/2000001 [1:05:36<411:10:54,  1.35it/s]

buffer size = 13548, epsilon = 0.09671
mean_reward :  0.0


  0%|          | 6575/2000001 [1:05:37<397:50:18,  1.39it/s]

buffer size = 13550, epsilon = 0.09671
mean_reward :  0.0


  0%|          | 6576/2000001 [1:05:37<382:05:55,  1.45it/s]

buffer size = 13552, epsilon = 0.09671
mean_reward :  0.0


  0%|          | 6577/2000001 [1:05:38<377:47:42,  1.47it/s]

buffer size = 13554, epsilon = 0.09671
mean_reward :  0.0


  0%|          | 6578/2000001 [1:05:39<371:04:47,  1.49it/s]

buffer size = 13556, epsilon = 0.09671
mean_reward :  0.0


  0%|          | 6579/2000001 [1:05:39<366:02:26,  1.51it/s]

buffer size = 13558, epsilon = 0.09671
mean_reward :  0.0


  0%|          | 6580/2000001 [1:05:40<363:17:30,  1.52it/s]

buffer size = 13560, epsilon = 0.09671
mean_reward :  0.0


  0%|          | 6581/2000001 [1:05:41<358:23:14,  1.55it/s]

buffer size = 13562, epsilon = 0.09671
mean_reward :  0.0


  0%|          | 6582/2000001 [1:05:41<360:02:17,  1.54it/s]

buffer size = 13564, epsilon = 0.09671
mean_reward :  0.0


  0%|          | 6583/2000001 [1:05:42<357:33:34,  1.55it/s]

buffer size = 13566, epsilon = 0.09671
mean_reward :  0.0


  0%|          | 6584/2000001 [1:05:43<360:21:56,  1.54it/s]

buffer size = 13568, epsilon = 0.09671
mean_reward :  0.0


  0%|          | 6585/2000001 [1:05:43<361:12:06,  1.53it/s]

buffer size = 13570, epsilon = 0.09671
mean_reward :  0.0


  0%|          | 6586/2000001 [1:05:44<356:10:14,  1.55it/s]

buffer size = 13572, epsilon = 0.09671
mean_reward :  0.0


  0%|          | 6587/2000001 [1:05:44<357:17:38,  1.55it/s]

buffer size = 13574, epsilon = 0.09671
mean_reward :  0.0


  0%|          | 6588/2000001 [1:05:45<390:31:01,  1.42it/s]

buffer size = 13576, epsilon = 0.09671
mean_reward :  0.0


  0%|          | 6589/2000001 [1:05:46<408:29:33,  1.36it/s]

buffer size = 13578, epsilon = 0.09671
mean_reward :  0.0


  0%|          | 6590/2000001 [1:05:47<427:55:49,  1.29it/s]

buffer size = 13580, epsilon = 0.09671
mean_reward :  0.0


  0%|          | 6591/2000001 [1:05:48<442:12:22,  1.25it/s]

buffer size = 13582, epsilon = 0.09670
mean_reward :  0.0


  0%|          | 6592/2000001 [1:05:49<445:45:38,  1.24it/s]

buffer size = 13584, epsilon = 0.09670
mean_reward :  0.0


  0%|          | 6593/2000001 [1:05:49<422:26:27,  1.31it/s]

buffer size = 13586, epsilon = 0.09670
mean_reward :  0.0


  0%|          | 6594/2000001 [1:05:50<401:41:07,  1.38it/s]

buffer size = 13588, epsilon = 0.09670
mean_reward :  0.0


  0%|          | 6595/2000001 [1:05:51<388:34:24,  1.43it/s]

buffer size = 13590, epsilon = 0.09670
mean_reward :  0.0


  0%|          | 6596/2000001 [1:05:51<379:28:00,  1.46it/s]

buffer size = 13592, epsilon = 0.09670
mean_reward :  0.0


  0%|          | 6597/2000001 [1:05:52<370:55:43,  1.49it/s]

buffer size = 13594, epsilon = 0.09670
mean_reward :  0.0


  0%|          | 6598/2000001 [1:05:53<365:20:00,  1.52it/s]

buffer size = 13596, epsilon = 0.09670
mean_reward :  0.0


  0%|          | 6599/2000001 [1:05:53<360:45:33,  1.53it/s]

buffer size = 13598, epsilon = 0.09670
mean_reward :  0.0


  0%|          | 6600/2000001 [1:05:54<361:12:59,  1.53it/s]

buffer size = 13600, epsilon = 0.09670
mean_reward :  0.0


  0%|          | 6601/2000001 [1:05:54<360:24:00,  1.54it/s]

buffer size = 13602, epsilon = 0.09670
mean_reward :  0.0


  0%|          | 6602/2000001 [1:05:55<360:53:22,  1.53it/s]

buffer size = 13604, epsilon = 0.09670
mean_reward :  0.0


  0%|          | 6603/2000001 [1:05:56<361:43:18,  1.53it/s]

buffer size = 13606, epsilon = 0.09670
mean_reward :  0.0


  0%|          | 6604/2000001 [1:05:56<356:15:47,  1.55it/s]

buffer size = 13608, epsilon = 0.09670
mean_reward :  0.0


  0%|          | 6605/2000001 [1:05:57<357:56:27,  1.55it/s]

buffer size = 13610, epsilon = 0.09670
mean_reward :  0.0


  0%|          | 6606/2000001 [1:05:58<360:21:00,  1.54it/s]

buffer size = 13612, epsilon = 0.09670
mean_reward :  0.0


  0%|          | 6607/2000001 [1:05:59<384:57:09,  1.44it/s]

buffer size = 13614, epsilon = 0.09670
mean_reward :  0.0


  0%|          | 6608/2000001 [1:05:59<419:26:52,  1.32it/s]

buffer size = 13616, epsilon = 0.09670
mean_reward :  0.0


  0%|          | 6609/2000001 [1:06:00<433:16:21,  1.28it/s]

buffer size = 13618, epsilon = 0.09670
mean_reward :  0.0


  0%|          | 6610/2000001 [1:06:01<451:05:23,  1.23it/s]

buffer size = 13620, epsilon = 0.09670
mean_reward :  0.0


  0%|          | 6611/2000001 [1:06:02<443:33:04,  1.25it/s]

buffer size = 13622, epsilon = 0.09670
mean_reward :  0.0


  0%|          | 6612/2000001 [1:06:03<417:15:17,  1.33it/s]

buffer size = 13624, epsilon = 0.09669
mean_reward :  0.0


  0%|          | 6613/2000001 [1:06:03<404:37:27,  1.37it/s]

buffer size = 13626, epsilon = 0.09669
mean_reward :  0.0


  0%|          | 6614/2000001 [1:06:04<387:53:26,  1.43it/s]

buffer size = 13628, epsilon = 0.09669
mean_reward :  0.0


  0%|          | 6615/2000001 [1:06:05<384:13:45,  1.44it/s]

buffer size = 13630, epsilon = 0.09669
mean_reward :  0.0


  0%|          | 6616/2000001 [1:06:05<377:35:37,  1.47it/s]

buffer size = 13632, epsilon = 0.09669
mean_reward :  0.0


  0%|          | 6617/2000001 [1:06:06<369:08:53,  1.50it/s]

buffer size = 13634, epsilon = 0.09669
mean_reward :  0.0


  0%|          | 6618/2000001 [1:06:07<372:13:56,  1.49it/s]

buffer size = 13636, epsilon = 0.09669
mean_reward :  0.0


  0%|          | 6619/2000001 [1:06:07<367:58:19,  1.50it/s]

buffer size = 13638, epsilon = 0.09669
mean_reward :  0.0


  0%|          | 6620/2000001 [1:06:08<364:05:21,  1.52it/s]

buffer size = 13640, epsilon = 0.09669
mean_reward :  0.0


  0%|          | 6621/2000001 [1:06:08<365:00:37,  1.52it/s]

buffer size = 13642, epsilon = 0.09669
mean_reward :  0.0


  0%|          | 6622/2000001 [1:06:09<363:40:16,  1.52it/s]

buffer size = 13644, epsilon = 0.09669
mean_reward :  0.0


  0%|          | 6623/2000001 [1:06:10<365:38:08,  1.51it/s]

buffer size = 13646, epsilon = 0.09669
mean_reward :  0.0


  0%|          | 6624/2000001 [1:06:10<358:05:21,  1.55it/s]

buffer size = 13648, epsilon = 0.09669
mean_reward :  0.0


  0%|          | 6625/2000001 [1:06:11<361:32:23,  1.53it/s]

buffer size = 13650, epsilon = 0.09669
mean_reward :  0.0


  0%|          | 6626/2000001 [1:06:12<388:57:59,  1.42it/s]

buffer size = 13652, epsilon = 0.09669
mean_reward :  0.0


  0%|          | 6627/2000001 [1:06:13<421:49:47,  1.31it/s]

buffer size = 13654, epsilon = 0.09669
mean_reward :  0.0


  0%|          | 6628/2000001 [1:06:14<446:57:27,  1.24it/s]

buffer size = 13656, epsilon = 0.09669
mean_reward :  0.0


  0%|          | 6629/2000001 [1:06:15<456:06:40,  1.21it/s]

buffer size = 13658, epsilon = 0.09669
mean_reward :  0.0


  0%|          | 6630/2000001 [1:06:15<436:39:32,  1.27it/s]

buffer size = 13660, epsilon = 0.09669
mean_reward :  0.0


  0%|          | 6631/2000001 [1:06:16<411:18:23,  1.35it/s]

buffer size = 13662, epsilon = 0.09669
mean_reward :  0.0


  0%|          | 6632/2000001 [1:06:17<396:16:31,  1.40it/s]

buffer size = 13664, epsilon = 0.09668
mean_reward :  0.0


  0%|          | 6633/2000001 [1:06:17<387:42:35,  1.43it/s]

buffer size = 13666, epsilon = 0.09668
mean_reward :  0.0


  0%|          | 6634/2000001 [1:06:18<375:39:47,  1.47it/s]

buffer size = 13668, epsilon = 0.09668
mean_reward :  0.0


  0%|          | 6635/2000001 [1:06:18<369:41:12,  1.50it/s]

buffer size = 13670, epsilon = 0.09668
mean_reward :  0.0


  0%|          | 6636/2000001 [1:06:19<367:59:59,  1.50it/s]

buffer size = 13672, epsilon = 0.09668
mean_reward :  0.0


  0%|          | 6637/2000001 [1:06:20<368:13:19,  1.50it/s]

buffer size = 13674, epsilon = 0.09668
mean_reward :  0.0


  0%|          | 6638/2000001 [1:06:20<365:35:44,  1.51it/s]

buffer size = 13676, epsilon = 0.09668
mean_reward :  0.0


  0%|          | 6639/2000001 [1:06:21<367:01:06,  1.51it/s]

buffer size = 13678, epsilon = 0.09668
mean_reward :  0.0


  0%|          | 6640/2000001 [1:06:22<361:42:05,  1.53it/s]

buffer size = 13680, epsilon = 0.09668
mean_reward :  0.0


  0%|          | 6641/2000001 [1:06:22<363:34:52,  1.52it/s]

buffer size = 13682, epsilon = 0.09668
mean_reward :  0.0


  0%|          | 6642/2000001 [1:06:23<360:17:39,  1.54it/s]

buffer size = 13684, epsilon = 0.09668
mean_reward :  0.0


  0%|          | 6643/2000001 [1:06:24<363:01:40,  1.53it/s]

buffer size = 13686, epsilon = 0.09668
mean_reward :  0.0


  0%|          | 6644/2000001 [1:06:24<363:26:58,  1.52it/s]

buffer size = 13688, epsilon = 0.09668
mean_reward :  0.0


  0%|          | 6645/2000001 [1:06:25<388:05:10,  1.43it/s]

buffer size = 13690, epsilon = 0.09668
mean_reward :  0.0


  0%|          | 6646/2000001 [1:06:26<399:28:15,  1.39it/s]

buffer size = 13692, epsilon = 0.09668
mean_reward :  0.0


  0%|          | 6647/2000001 [1:06:27<407:57:36,  1.36it/s]

buffer size = 13694, epsilon = 0.09668
mean_reward :  0.0


  0%|          | 6648/2000001 [1:06:28<427:07:52,  1.30it/s]

buffer size = 13696, epsilon = 0.09668
mean_reward :  0.0


  0%|          | 6649/2000001 [1:06:28<444:11:48,  1.25it/s]

buffer size = 13698, epsilon = 0.09668
mean_reward :  0.0


  0%|          | 6650/2000001 [1:06:29<435:42:23,  1.27it/s]

buffer size = 13700, epsilon = 0.09668
mean_reward :  0.0


  0%|          | 6651/2000001 [1:06:30<411:07:07,  1.35it/s]

buffer size = 13702, epsilon = 0.09668
mean_reward :  0.0


  0%|          | 6652/2000001 [1:06:30<394:51:47,  1.40it/s]

buffer size = 13704, epsilon = 0.09667
mean_reward :  0.0


  0%|          | 6653/2000001 [1:06:31<386:10:27,  1.43it/s]

buffer size = 13706, epsilon = 0.09667
mean_reward :  0.0


  0%|          | 6654/2000001 [1:06:32<378:24:04,  1.46it/s]

buffer size = 13708, epsilon = 0.09667
mean_reward :  0.0


  0%|          | 6655/2000001 [1:06:32<368:10:40,  1.50it/s]

buffer size = 13710, epsilon = 0.09667
mean_reward :  0.0


  0%|          | 6656/2000001 [1:06:33<368:02:09,  1.50it/s]

buffer size = 13712, epsilon = 0.09667
mean_reward :  0.0


  0%|          | 6657/2000001 [1:06:34<361:51:39,  1.53it/s]

buffer size = 13714, epsilon = 0.09667
mean_reward :  0.0


  0%|          | 6658/2000001 [1:06:34<368:15:45,  1.50it/s]

buffer size = 13716, epsilon = 0.09667
mean_reward :  0.0


  0%|          | 6659/2000001 [1:06:35<363:20:26,  1.52it/s]

buffer size = 13718, epsilon = 0.09667
mean_reward :  0.0


  0%|          | 6660/2000001 [1:06:36<356:32:11,  1.55it/s]

buffer size = 13720, epsilon = 0.09667
mean_reward :  0.0


  0%|          | 6661/2000001 [1:06:36<361:14:44,  1.53it/s]

buffer size = 13722, epsilon = 0.09667
mean_reward :  0.0


  0%|          | 6662/2000001 [1:06:37<357:54:35,  1.55it/s]

buffer size = 13724, epsilon = 0.09667
mean_reward :  0.0


  0%|          | 6663/2000001 [1:06:38<360:51:32,  1.53it/s]

buffer size = 13726, epsilon = 0.09667
mean_reward :  0.0


  0%|          | 6664/2000001 [1:06:38<363:12:26,  1.52it/s]

buffer size = 13728, epsilon = 0.09667
mean_reward :  0.0


  0%|          | 6665/2000001 [1:06:39<390:18:08,  1.42it/s]

buffer size = 13730, epsilon = 0.09667
mean_reward :  0.0


  0%|          | 6666/2000001 [1:06:40<408:51:55,  1.35it/s]

buffer size = 13732, epsilon = 0.09667
mean_reward :  0.0


  0%|          | 6667/2000001 [1:06:41<421:23:38,  1.31it/s]

buffer size = 13734, epsilon = 0.09667
mean_reward :  0.0


  0%|          | 6668/2000001 [1:06:42<444:05:47,  1.25it/s]

buffer size = 13736, epsilon = 0.09667
mean_reward :  0.0


  0%|          | 6669/2000001 [1:06:42<445:20:42,  1.24it/s]

buffer size = 13738, epsilon = 0.09667
mean_reward :  0.0


  0%|          | 6670/2000001 [1:06:43<419:38:12,  1.32it/s]

buffer size = 13740, epsilon = 0.09667
mean_reward :  0.0


  0%|          | 6671/2000001 [1:06:44<404:17:50,  1.37it/s]

buffer size = 13742, epsilon = 0.09667
mean_reward :  0.0


  0%|          | 6672/2000001 [1:06:44<392:19:10,  1.41it/s]

buffer size = 13744, epsilon = 0.09666
mean_reward :  0.0


  0%|          | 6673/2000001 [1:06:45<380:31:37,  1.46it/s]

buffer size = 13746, epsilon = 0.09666
mean_reward :  0.0


  0%|          | 6674/2000001 [1:06:46<375:37:53,  1.47it/s]

buffer size = 13748, epsilon = 0.09666
mean_reward :  0.0


  0%|          | 6675/2000001 [1:06:46<367:21:34,  1.51it/s]

buffer size = 13750, epsilon = 0.09666
mean_reward :  0.0


  0%|          | 6676/2000001 [1:06:47<361:17:41,  1.53it/s]

buffer size = 13752, epsilon = 0.09666
mean_reward :  0.0


  0%|          | 6677/2000001 [1:06:48<361:51:53,  1.53it/s]

buffer size = 13754, epsilon = 0.09666
mean_reward :  0.0


  0%|          | 6678/2000001 [1:06:48<359:42:47,  1.54it/s]

buffer size = 13756, epsilon = 0.09666
mean_reward :  0.0


  0%|          | 6679/2000001 [1:06:49<360:14:19,  1.54it/s]

buffer size = 13758, epsilon = 0.09666
mean_reward :  0.0


  0%|          | 6680/2000001 [1:06:50<358:09:43,  1.55it/s]

buffer size = 13760, epsilon = 0.09666
mean_reward :  0.0


  0%|          | 6681/2000001 [1:06:50<360:42:24,  1.54it/s]

buffer size = 13762, epsilon = 0.09666
mean_reward :  0.0


  0%|          | 6682/2000001 [1:06:51<364:05:02,  1.52it/s]

buffer size = 13764, epsilon = 0.09666
mean_reward :  0.0


  0%|          | 6683/2000001 [1:06:52<364:52:37,  1.52it/s]

buffer size = 13766, epsilon = 0.09666
mean_reward :  0.0


  0%|          | 6684/2000001 [1:06:52<379:46:10,  1.46it/s]

buffer size = 13768, epsilon = 0.09666
mean_reward :  0.0


  0%|          | 6685/2000001 [1:06:53<405:56:20,  1.36it/s]

buffer size = 13770, epsilon = 0.09666
mean_reward :  0.0


  0%|          | 6686/2000001 [1:06:54<420:52:51,  1.32it/s]

buffer size = 13772, epsilon = 0.09666
mean_reward :  0.0


  0%|          | 6687/2000001 [1:06:55<439:32:52,  1.26it/s]

buffer size = 13774, epsilon = 0.09666
mean_reward :  0.0


  0%|          | 6688/2000001 [1:06:56<444:11:44,  1.25it/s]

buffer size = 13776, epsilon = 0.09666
mean_reward :  0.0


  0%|          | 6689/2000001 [1:06:56<435:46:37,  1.27it/s]

buffer size = 13778, epsilon = 0.09666
mean_reward :  0.0


  0%|          | 6690/2000001 [1:06:57<410:49:43,  1.35it/s]

buffer size = 13780, epsilon = 0.09666
mean_reward :  0.0


  0%|          | 6691/2000001 [1:06:58<399:35:24,  1.39it/s]

buffer size = 13782, epsilon = 0.09666
mean_reward :  0.0


  0%|          | 6692/2000001 [1:06:58<389:13:45,  1.42it/s]

buffer size = 13784, epsilon = 0.09665
mean_reward :  0.0


  0%|          | 6693/2000001 [1:06:59<381:30:54,  1.45it/s]

buffer size = 13786, epsilon = 0.09665
mean_reward :  0.0


  0%|          | 6694/2000001 [1:07:00<378:02:59,  1.46it/s]

buffer size = 13788, epsilon = 0.09665
mean_reward :  0.0


  0%|          | 6695/2000001 [1:07:00<374:10:23,  1.48it/s]

buffer size = 13790, epsilon = 0.09665
mean_reward :  0.0


  0%|          | 6696/2000001 [1:07:01<365:41:04,  1.51it/s]

buffer size = 13792, epsilon = 0.09665
mean_reward :  0.0


  0%|          | 6697/2000001 [1:07:02<362:18:58,  1.53it/s]

buffer size = 13794, epsilon = 0.09665
mean_reward :  0.0


  0%|          | 6698/2000001 [1:07:02<368:38:03,  1.50it/s]

buffer size = 13796, epsilon = 0.09665
mean_reward :  0.0


  0%|          | 6699/2000001 [1:07:03<364:08:45,  1.52it/s]

buffer size = 13798, epsilon = 0.09665
mean_reward :  0.0


  0%|          | 6700/2000001 [1:07:04<364:14:31,  1.52it/s]

buffer size = 13800, epsilon = 0.09665
mean_reward :  0.0


  0%|          | 6701/2000001 [1:07:04<361:57:42,  1.53it/s]

buffer size = 13802, epsilon = 0.09665
mean_reward :  0.0


  0%|          | 6702/2000001 [1:07:05<366:07:55,  1.51it/s]

buffer size = 13804, epsilon = 0.09665
mean_reward :  0.0


  0%|          | 6703/2000001 [1:07:06<383:38:28,  1.44it/s]

buffer size = 13806, epsilon = 0.09665
mean_reward :  0.0


  0%|          | 6704/2000001 [1:07:07<406:04:26,  1.36it/s]

buffer size = 13808, epsilon = 0.09665
mean_reward :  0.0


  0%|          | 6705/2000001 [1:07:07<414:13:04,  1.34it/s]

buffer size = 13810, epsilon = 0.09665
mean_reward :  0.0


  0%|          | 6706/2000001 [1:07:08<430:24:30,  1.29it/s]

buffer size = 13812, epsilon = 0.09665
mean_reward :  0.0


  0%|          | 6707/2000001 [1:07:09<458:20:47,  1.21it/s]

buffer size = 13814, epsilon = 0.09665
mean_reward :  0.0


  0%|          | 6708/2000001 [1:07:10<432:36:24,  1.28it/s]

buffer size = 13816, epsilon = 0.09665
mean_reward :  0.0


  0%|          | 6709/2000001 [1:07:10<410:59:11,  1.35it/s]

buffer size = 13818, epsilon = 0.09665
mean_reward :  0.0


  0%|          | 6710/2000001 [1:07:11<396:58:51,  1.39it/s]

buffer size = 13820, epsilon = 0.09665
mean_reward :  0.0


  0%|          | 6711/2000001 [1:07:12<387:13:44,  1.43it/s]

buffer size = 13822, epsilon = 0.09665
mean_reward :  0.0


  0%|          | 6712/2000001 [1:07:12<382:42:14,  1.45it/s]

buffer size = 13824, epsilon = 0.09664
mean_reward :  0.0


  0%|          | 6713/2000001 [1:07:13<375:55:01,  1.47it/s]

buffer size = 13826, epsilon = 0.09664
mean_reward :  0.0


  0%|          | 6714/2000001 [1:07:14<373:53:57,  1.48it/s]

buffer size = 13828, epsilon = 0.09664
mean_reward :  0.0


  0%|          | 6715/2000001 [1:07:14<368:30:24,  1.50it/s]

buffer size = 13830, epsilon = 0.09664
mean_reward :  0.0


  0%|          | 6716/2000001 [1:07:15<368:35:26,  1.50it/s]

buffer size = 13832, epsilon = 0.09664
mean_reward :  0.0


  0%|          | 6717/2000001 [1:07:16<363:23:08,  1.52it/s]

buffer size = 13834, epsilon = 0.09664
mean_reward :  0.0


  0%|          | 6718/2000001 [1:07:16<365:14:39,  1.52it/s]

buffer size = 13836, epsilon = 0.09664
mean_reward :  0.0


  0%|          | 6719/2000001 [1:07:17<359:19:19,  1.54it/s]

buffer size = 13838, epsilon = 0.09664
mean_reward :  0.0


  0%|          | 6720/2000001 [1:07:18<361:30:11,  1.53it/s]

buffer size = 13840, epsilon = 0.09664
mean_reward :  0.0


  0%|          | 6721/2000001 [1:07:18<365:09:13,  1.52it/s]

buffer size = 13842, epsilon = 0.09664
mean_reward :  0.0


  0%|          | 6722/2000001 [1:07:19<361:47:13,  1.53it/s]

buffer size = 13844, epsilon = 0.09664
mean_reward :  0.0


  0%|          | 6723/2000001 [1:07:20<385:19:47,  1.44it/s]

buffer size = 13846, epsilon = 0.09664
mean_reward :  0.0


  0%|          | 6724/2000001 [1:07:21<421:01:17,  1.32it/s]

buffer size = 13848, epsilon = 0.09664
mean_reward :  0.0


  0%|          | 6725/2000001 [1:07:22<451:59:34,  1.22it/s]

buffer size = 13850, epsilon = 0.09664
mean_reward :  0.0


  0%|          | 6726/2000001 [1:07:23<470:14:30,  1.18it/s]

buffer size = 13852, epsilon = 0.09664
mean_reward :  0.0


  0%|          | 6727/2000001 [1:07:23<438:28:56,  1.26it/s]

buffer size = 13854, epsilon = 0.09664
mean_reward :  0.0


  0%|          | 6728/2000001 [1:07:24<414:15:20,  1.34it/s]

buffer size = 13856, epsilon = 0.09664
mean_reward :  0.0


  0%|          | 6729/2000001 [1:07:25<399:28:52,  1.39it/s]

buffer size = 13858, epsilon = 0.09664
mean_reward :  0.0


  0%|          | 6730/2000001 [1:07:25<391:05:03,  1.42it/s]

buffer size = 13860, epsilon = 0.09664
mean_reward :  0.0


  0%|          | 6731/2000001 [1:07:26<379:36:27,  1.46it/s]

buffer size = 13862, epsilon = 0.09663
mean_reward :  0.0


  0%|          | 6732/2000001 [1:07:26<370:15:22,  1.50it/s]

buffer size = 13864, epsilon = 0.09663
mean_reward :  0.0


  0%|          | 6733/2000001 [1:07:27<368:27:25,  1.50it/s]

buffer size = 13866, epsilon = 0.09663
mean_reward :  0.0


  0%|          | 6734/2000001 [1:07:28<367:19:38,  1.51it/s]

buffer size = 13868, epsilon = 0.09663
mean_reward :  0.0


  0%|          | 6735/2000001 [1:07:28<365:24:22,  1.52it/s]

buffer size = 13870, epsilon = 0.09663
mean_reward :  0.0


  0%|          | 6736/2000001 [1:07:29<363:30:51,  1.52it/s]

buffer size = 13872, epsilon = 0.09663
mean_reward :  0.0


  0%|          | 6737/2000001 [1:07:30<358:11:15,  1.55it/s]

buffer size = 13874, epsilon = 0.09663
mean_reward :  0.0


  0%|          | 6738/2000001 [1:07:30<360:29:50,  1.54it/s]

buffer size = 13876, epsilon = 0.09663
mean_reward :  0.0


  0%|          | 6739/2000001 [1:07:31<362:22:15,  1.53it/s]

buffer size = 13878, epsilon = 0.09663
mean_reward :  0.0


  0%|          | 6740/2000001 [1:07:32<359:30:26,  1.54it/s]

buffer size = 13880, epsilon = 0.09663
mean_reward :  0.0


  0%|          | 6741/2000001 [1:07:32<361:56:26,  1.53it/s]

buffer size = 13882, epsilon = 0.09663
mean_reward :  0.0


  0%|          | 6742/2000001 [1:07:33<384:27:37,  1.44it/s]

buffer size = 13884, epsilon = 0.09663
mean_reward :  0.0


  0%|          | 6743/2000001 [1:07:34<399:18:29,  1.39it/s]

buffer size = 13886, epsilon = 0.09663
mean_reward :  0.0


  0%|          | 6744/2000001 [1:07:35<412:46:27,  1.34it/s]

buffer size = 13888, epsilon = 0.09663
mean_reward :  0.0


  0%|          | 6745/2000001 [1:07:35<424:50:16,  1.30it/s]

buffer size = 13890, epsilon = 0.09663
mean_reward :  0.0


  0%|          | 6746/2000001 [1:07:36<441:08:15,  1.26it/s]

buffer size = 13892, epsilon = 0.09663
mean_reward :  0.0


  0%|          | 6747/2000001 [1:07:37<432:41:44,  1.28it/s]

buffer size = 13894, epsilon = 0.09663
mean_reward :  0.0


  0%|          | 6748/2000001 [1:07:38<422:52:09,  1.31it/s]

buffer size = 13896, epsilon = 0.09663
mean_reward :  0.0


  0%|          | 6749/2000001 [1:07:38<402:36:58,  1.38it/s]

buffer size = 13898, epsilon = 0.09663
mean_reward :  0.0


  0%|          | 6750/2000001 [1:07:39<391:35:34,  1.41it/s]

buffer size = 13900, epsilon = 0.09663
mean_reward :  0.0


  0%|          | 6751/2000001 [1:07:40<381:00:46,  1.45it/s]

buffer size = 13902, epsilon = 0.09663
mean_reward :  0.0


  0%|          | 6752/2000001 [1:07:40<376:03:05,  1.47it/s]

buffer size = 13904, epsilon = 0.09662
mean_reward :  0.0


  0%|          | 6753/2000001 [1:07:41<372:30:53,  1.49it/s]

buffer size = 13906, epsilon = 0.09662
mean_reward :  0.0


  0%|          | 6754/2000001 [1:07:42<368:32:21,  1.50it/s]

buffer size = 13908, epsilon = 0.09662
mean_reward :  0.0


  0%|          | 6755/2000001 [1:07:42<364:41:47,  1.52it/s]

buffer size = 13910, epsilon = 0.09662
mean_reward :  0.0


  0%|          | 6756/2000001 [1:07:43<362:17:27,  1.53it/s]

buffer size = 13912, epsilon = 0.09662
mean_reward :  0.0


  0%|          | 6757/2000001 [1:07:44<360:47:41,  1.53it/s]

buffer size = 13914, epsilon = 0.09662
mean_reward :  0.0


  0%|          | 6758/2000001 [1:07:44<357:47:43,  1.55it/s]

buffer size = 13916, epsilon = 0.09662
mean_reward :  0.0


  0%|          | 6759/2000001 [1:07:45<357:57:10,  1.55it/s]

buffer size = 13918, epsilon = 0.09662
mean_reward :  0.0


  0%|          | 6760/2000001 [1:07:46<359:48:57,  1.54it/s]

buffer size = 13920, epsilon = 0.09662
mean_reward :  0.0


  0%|          | 6761/2000001 [1:07:46<356:13:26,  1.55it/s]

buffer size = 13922, epsilon = 0.09662
mean_reward :  0.0


  0%|          | 6762/2000001 [1:07:47<383:06:16,  1.45it/s]

buffer size = 13924, epsilon = 0.09662
mean_reward :  0.0


  0%|          | 6763/2000001 [1:07:48<393:07:39,  1.41it/s]

buffer size = 13926, epsilon = 0.09662
mean_reward :  0.0


  0%|          | 6764/2000001 [1:07:49<404:40:24,  1.37it/s]

buffer size = 13928, epsilon = 0.09662
mean_reward :  0.0


  0%|          | 6765/2000001 [1:07:49<424:36:18,  1.30it/s]

buffer size = 13930, epsilon = 0.09662
mean_reward :  0.0


  0%|          | 6766/2000001 [1:07:50<443:25:06,  1.25it/s]

buffer size = 13932, epsilon = 0.09662
mean_reward :  0.0


  0%|          | 6767/2000001 [1:07:51<433:29:07,  1.28it/s]

buffer size = 13934, epsilon = 0.09662
mean_reward :  0.0


  0%|          | 6768/2000001 [1:07:52<413:28:12,  1.34it/s]

buffer size = 13936, epsilon = 0.09662
mean_reward :  0.0


  0%|          | 6769/2000001 [1:07:52<399:46:16,  1.38it/s]

buffer size = 13938, epsilon = 0.09662
mean_reward :  0.0


  0%|          | 6770/2000001 [1:07:53<387:52:39,  1.43it/s]

buffer size = 13940, epsilon = 0.09662
mean_reward :  0.0


  0%|          | 6771/2000001 [1:07:54<380:38:19,  1.45it/s]

buffer size = 13942, epsilon = 0.09662
mean_reward :  0.0


  0%|          | 6772/2000001 [1:07:54<375:52:11,  1.47it/s]

buffer size = 13944, epsilon = 0.09661
mean_reward :  0.0


  0%|          | 6773/2000001 [1:07:55<369:18:21,  1.50it/s]

buffer size = 13946, epsilon = 0.09661
mean_reward :  0.0


  0%|          | 6774/2000001 [1:07:56<369:04:37,  1.50it/s]

buffer size = 13948, epsilon = 0.09661
mean_reward :  0.0


  0%|          | 6775/2000001 [1:07:56<362:16:27,  1.53it/s]

buffer size = 13950, epsilon = 0.09661
mean_reward :  0.0


  0%|          | 6776/2000001 [1:07:57<361:26:19,  1.53it/s]

buffer size = 13952, epsilon = 0.09661
mean_reward :  0.0


  0%|          | 6777/2000001 [1:07:58<362:41:23,  1.53it/s]

buffer size = 13954, epsilon = 0.09661
mean_reward :  0.0


  0%|          | 6778/2000001 [1:07:58<366:43:32,  1.51it/s]

buffer size = 13956, epsilon = 0.09661
mean_reward :  0.0


  0%|          | 6779/2000001 [1:07:59<366:19:42,  1.51it/s]

buffer size = 13958, epsilon = 0.09661
mean_reward :  0.0


  0%|          | 6780/2000001 [1:08:00<365:26:15,  1.52it/s]

buffer size = 13960, epsilon = 0.09661
mean_reward :  0.0


  0%|          | 6781/2000001 [1:08:00<366:05:48,  1.51it/s]

buffer size = 13962, epsilon = 0.09661
mean_reward :  0.0


  0%|          | 6782/2000001 [1:08:01<395:41:06,  1.40it/s]

buffer size = 13964, epsilon = 0.09661
mean_reward :  0.0


  0%|          | 6783/2000001 [1:08:02<418:29:30,  1.32it/s]

buffer size = 13966, epsilon = 0.09661
mean_reward :  0.0


  0%|          | 6784/2000001 [1:08:03<429:48:44,  1.29it/s]

buffer size = 13968, epsilon = 0.09661
mean_reward :  0.0


  0%|          | 6785/2000001 [1:08:04<444:51:13,  1.24it/s]

buffer size = 13970, epsilon = 0.09661
mean_reward :  0.0


  0%|          | 6786/2000001 [1:08:04<449:29:39,  1.23it/s]

buffer size = 13972, epsilon = 0.09661
mean_reward :  0.0


  0%|          | 6787/2000001 [1:08:05<438:57:29,  1.26it/s]

buffer size = 13974, epsilon = 0.09661
mean_reward :  0.0


  0%|          | 6788/2000001 [1:08:06<417:26:17,  1.33it/s]

buffer size = 13976, epsilon = 0.09661
mean_reward :  0.0


  0%|          | 6789/2000001 [1:08:07<403:36:28,  1.37it/s]

buffer size = 13978, epsilon = 0.09661
mean_reward :  0.0


  0%|          | 6790/2000001 [1:08:07<394:56:57,  1.40it/s]

buffer size = 13980, epsilon = 0.09661
mean_reward :  0.0


  0%|          | 6791/2000001 [1:08:08<382:43:29,  1.45it/s]

buffer size = 13982, epsilon = 0.09661
mean_reward :  0.0


  0%|          | 6792/2000001 [1:08:09<375:55:10,  1.47it/s]

buffer size = 13984, epsilon = 0.09660
mean_reward :  0.0


  0%|          | 6793/2000001 [1:08:09<376:18:35,  1.47it/s]

buffer size = 13986, epsilon = 0.09660
mean_reward :  0.0


  0%|          | 6794/2000001 [1:08:10<372:38:22,  1.49it/s]

buffer size = 13988, epsilon = 0.09660
mean_reward :  0.0


  0%|          | 6795/2000001 [1:08:11<372:45:36,  1.49it/s]

buffer size = 13990, epsilon = 0.09660
mean_reward :  0.0


  0%|          | 6796/2000001 [1:08:11<373:29:43,  1.48it/s]

buffer size = 13992, epsilon = 0.09660
mean_reward :  0.0


  0%|          | 6797/2000001 [1:08:12<367:17:00,  1.51it/s]

buffer size = 13994, epsilon = 0.09660
mean_reward :  0.0


  0%|          | 6798/2000001 [1:08:12<367:55:12,  1.50it/s]

buffer size = 13996, epsilon = 0.09660
mean_reward :  0.0


  0%|          | 6799/2000001 [1:08:13<364:17:27,  1.52it/s]

buffer size = 13998, epsilon = 0.09660
mean_reward :  0.0


  0%|          | 6800/2000001 [1:08:14<366:12:43,  1.51it/s]

buffer size = 14000, epsilon = 0.09660
mean_reward :  0.0


  0%|          | 6801/2000001 [1:08:15<387:50:21,  1.43it/s]

buffer size = 14002, epsilon = 0.09660
mean_reward :  0.0


  0%|          | 6802/2000001 [1:08:15<414:41:48,  1.34it/s]

buffer size = 14004, epsilon = 0.09660
mean_reward :  0.0


  0%|          | 6803/2000001 [1:08:16<441:09:29,  1.26it/s]

buffer size = 14006, epsilon = 0.09660
mean_reward :  0.0


  0%|          | 6804/2000001 [1:08:17<465:04:19,  1.19it/s]

buffer size = 14008, epsilon = 0.09660
mean_reward :  0.0


  0%|          | 6805/2000001 [1:08:18<447:35:41,  1.24it/s]

buffer size = 14010, epsilon = 0.09660
mean_reward :  0.0


  0%|          | 6806/2000001 [1:08:19<421:24:51,  1.31it/s]

buffer size = 14012, epsilon = 0.09660
mean_reward :  0.0


  0%|          | 6807/2000001 [1:08:19<409:35:00,  1.35it/s]

buffer size = 14014, epsilon = 0.09660
mean_reward :  0.0


  0%|          | 6808/2000001 [1:08:20<394:49:37,  1.40it/s]

buffer size = 14016, epsilon = 0.09660
mean_reward :  0.0


  0%|          | 6809/2000001 [1:08:21<389:13:53,  1.42it/s]

buffer size = 14018, epsilon = 0.09660
mean_reward :  0.0


  0%|          | 6810/2000001 [1:08:21<381:41:15,  1.45it/s]

buffer size = 14020, epsilon = 0.09660
mean_reward :  0.0


  0%|          | 6811/2000001 [1:08:22<379:17:42,  1.46it/s]

buffer size = 14022, epsilon = 0.09660
mean_reward :  0.0


  0%|          | 6812/2000001 [1:08:23<371:45:17,  1.49it/s]

buffer size = 14024, epsilon = 0.09659
mean_reward :  0.0


  0%|          | 6813/2000001 [1:08:23<369:20:32,  1.50it/s]

buffer size = 14026, epsilon = 0.09659
mean_reward :  0.0


  0%|          | 6814/2000001 [1:08:24<361:42:00,  1.53it/s]

buffer size = 14028, epsilon = 0.09659
mean_reward :  0.0


  0%|          | 6815/2000001 [1:08:25<364:58:49,  1.52it/s]

buffer size = 14030, epsilon = 0.09659
mean_reward :  0.0


  0%|          | 6816/2000001 [1:08:25<361:46:41,  1.53it/s]

buffer size = 14032, epsilon = 0.09659
mean_reward :  0.0


  0%|          | 6817/2000001 [1:08:26<365:07:06,  1.52it/s]

buffer size = 14034, epsilon = 0.09659
mean_reward :  0.0


  0%|          | 6818/2000001 [1:08:27<360:50:48,  1.53it/s]

buffer size = 14036, epsilon = 0.09659
mean_reward :  0.0


  0%|          | 6819/2000001 [1:08:27<362:20:03,  1.53it/s]

buffer size = 14038, epsilon = 0.09659
mean_reward :  0.0


  0%|          | 6820/2000001 [1:08:28<396:58:04,  1.39it/s]

buffer size = 14040, epsilon = 0.09659
mean_reward :  0.0


  0%|          | 6821/2000001 [1:08:29<433:32:12,  1.28it/s]

buffer size = 14042, epsilon = 0.09659
mean_reward :  0.0


  0%|          | 6822/2000001 [1:08:30<451:00:51,  1.23it/s]

buffer size = 14044, epsilon = 0.09659
mean_reward :  0.0


  0%|          | 6823/2000001 [1:08:31<461:51:20,  1.20it/s]

buffer size = 14046, epsilon = 0.09659
mean_reward :  0.0


  0%|          | 6824/2000001 [1:08:31<434:48:35,  1.27it/s]

buffer size = 14048, epsilon = 0.09659
mean_reward :  0.0


  0%|          | 6825/2000001 [1:08:32<414:45:05,  1.33it/s]

buffer size = 14050, epsilon = 0.09659
mean_reward :  0.0


  0%|          | 6826/2000001 [1:08:33<397:54:16,  1.39it/s]

buffer size = 14052, epsilon = 0.09659
mean_reward :  0.0


  0%|          | 6827/2000001 [1:08:33<388:52:17,  1.42it/s]

buffer size = 14054, epsilon = 0.09659
mean_reward :  0.0


  0%|          | 6828/2000001 [1:08:34<380:09:12,  1.46it/s]

buffer size = 14056, epsilon = 0.09659
mean_reward :  0.0


  0%|          | 6829/2000001 [1:08:35<378:50:23,  1.46it/s]

buffer size = 14058, epsilon = 0.09659
mean_reward :  0.0


  0%|          | 6830/2000001 [1:08:35<368:34:50,  1.50it/s]

buffer size = 14060, epsilon = 0.09659
mean_reward :  0.0


  0%|          | 6831/2000001 [1:08:36<368:23:29,  1.50it/s]

buffer size = 14062, epsilon = 0.09659
mean_reward :  0.0


  0%|          | 6832/2000001 [1:08:37<367:06:57,  1.51it/s]

buffer size = 14064, epsilon = 0.09658
mean_reward :  0.0


  0%|          | 6833/2000001 [1:08:37<365:30:06,  1.51it/s]

buffer size = 14066, epsilon = 0.09658
mean_reward :  0.0


  0%|          | 6834/2000001 [1:08:38<363:45:44,  1.52it/s]

buffer size = 14068, epsilon = 0.09658
mean_reward :  0.0


  0%|          | 6835/2000001 [1:08:39<368:06:15,  1.50it/s]

buffer size = 14070, epsilon = 0.09658
mean_reward :  0.0


  0%|          | 6836/2000001 [1:08:39<364:07:21,  1.52it/s]

buffer size = 14072, epsilon = 0.09658
mean_reward :  0.0


  0%|          | 6837/2000001 [1:08:40<365:26:18,  1.52it/s]

buffer size = 14074, epsilon = 0.09658
mean_reward :  0.0


  0%|          | 6838/2000001 [1:08:41<359:14:54,  1.54it/s]

buffer size = 14076, epsilon = 0.09658
mean_reward :  0.0


  0%|          | 6839/2000001 [1:08:41<384:00:36,  1.44it/s]

buffer size = 14078, epsilon = 0.09658
mean_reward :  0.0


  0%|          | 6840/2000001 [1:08:42<399:49:53,  1.38it/s]

buffer size = 14080, epsilon = 0.09658
mean_reward :  0.0


  0%|          | 6841/2000001 [1:08:43<415:55:37,  1.33it/s]

buffer size = 14082, epsilon = 0.09658
mean_reward :  0.0


  0%|          | 6842/2000001 [1:08:44<436:27:33,  1.27it/s]

buffer size = 14084, epsilon = 0.09658
mean_reward :  0.0


  0%|          | 6843/2000001 [1:08:45<443:26:33,  1.25it/s]

buffer size = 14086, epsilon = 0.09658
mean_reward :  0.0


  0%|          | 6844/2000001 [1:08:46<435:06:20,  1.27it/s]

buffer size = 14088, epsilon = 0.09658
mean_reward :  0.0


  0%|          | 6845/2000001 [1:08:46<421:22:12,  1.31it/s]

buffer size = 14090, epsilon = 0.09658
mean_reward :  0.0


  0%|          | 6846/2000001 [1:08:47<400:46:26,  1.38it/s]

buffer size = 14092, epsilon = 0.09658
mean_reward :  0.0


  0%|          | 6847/2000001 [1:08:48<389:52:37,  1.42it/s]

buffer size = 14094, epsilon = 0.09658
mean_reward :  0.0


  0%|          | 6848/2000001 [1:08:48<379:24:39,  1.46it/s]

buffer size = 14096, epsilon = 0.09658
mean_reward :  0.0


  0%|          | 6849/2000001 [1:08:49<377:20:36,  1.47it/s]

buffer size = 14098, epsilon = 0.09658
mean_reward :  0.0


  0%|          | 6850/2000001 [1:08:50<378:22:25,  1.46it/s]

buffer size = 14100, epsilon = 0.09658
mean_reward :  0.0


  0%|          | 6851/2000001 [1:08:50<370:30:29,  1.49it/s]

buffer size = 14102, epsilon = 0.09658
mean_reward :  0.0


  0%|          | 6852/2000001 [1:08:51<367:49:26,  1.51it/s]

buffer size = 14104, epsilon = 0.09657
mean_reward :  0.0


  0%|          | 6853/2000001 [1:08:51<366:02:41,  1.51it/s]

buffer size = 14106, epsilon = 0.09657
mean_reward :  0.0


  0%|          | 6854/2000001 [1:08:52<364:59:04,  1.52it/s]

buffer size = 14108, epsilon = 0.09657
mean_reward :  0.0


  0%|          | 6855/2000001 [1:08:53<362:23:26,  1.53it/s]

buffer size = 14110, epsilon = 0.09657
mean_reward :  0.0


  0%|          | 6856/2000001 [1:08:53<362:32:14,  1.53it/s]

buffer size = 14112, epsilon = 0.09657
mean_reward :  0.0


  0%|          | 6857/2000001 [1:08:54<364:50:49,  1.52it/s]

buffer size = 14114, epsilon = 0.09657
mean_reward :  0.0


  0%|          | 6858/2000001 [1:08:55<368:58:42,  1.50it/s]

buffer size = 14116, epsilon = 0.09657
mean_reward :  0.0


  0%|          | 6859/2000001 [1:08:56<416:16:30,  1.33it/s]

buffer size = 14118, epsilon = 0.09657
mean_reward :  0.0


  0%|          | 6860/2000001 [1:08:57<441:14:40,  1.25it/s]

buffer size = 14120, epsilon = 0.09657
mean_reward :  0.0


  0%|          | 6861/2000001 [1:08:58<465:09:00,  1.19it/s]

buffer size = 14122, epsilon = 0.09657
mean_reward :  0.0


  0%|          | 6862/2000001 [1:08:58<453:42:18,  1.22it/s]

buffer size = 14124, epsilon = 0.09657
mean_reward :  0.0


  0%|          | 6863/2000001 [1:08:59<430:15:45,  1.29it/s]

buffer size = 14126, epsilon = 0.09657
mean_reward :  0.0


  0%|          | 6864/2000001 [1:09:00<405:42:01,  1.36it/s]

buffer size = 14128, epsilon = 0.09657
mean_reward :  0.0


  0%|          | 6865/2000001 [1:09:00<391:16:08,  1.42it/s]

buffer size = 14130, epsilon = 0.09657
mean_reward :  0.0


  0%|          | 6866/2000001 [1:09:01<385:10:20,  1.44it/s]

buffer size = 14132, epsilon = 0.09657
mean_reward :  0.0


  0%|          | 6867/2000001 [1:09:02<378:58:35,  1.46it/s]

buffer size = 14134, epsilon = 0.09657
mean_reward :  0.0


  0%|          | 6868/2000001 [1:09:02<375:03:37,  1.48it/s]

buffer size = 14136, epsilon = 0.09657
mean_reward :  0.0


  0%|          | 6869/2000001 [1:09:03<368:23:09,  1.50it/s]

buffer size = 14138, epsilon = 0.09657
mean_reward :  0.0


  0%|          | 6870/2000001 [1:09:04<367:52:27,  1.50it/s]

buffer size = 14140, epsilon = 0.09657
mean_reward :  0.0


  0%|          | 6871/2000001 [1:09:04<370:16:30,  1.50it/s]

buffer size = 14142, epsilon = 0.09657
mean_reward :  0.0


  0%|          | 6872/2000001 [1:09:05<367:19:36,  1.51it/s]

buffer size = 14144, epsilon = 0.09656
mean_reward :  0.0


  0%|          | 6873/2000001 [1:09:06<368:48:50,  1.50it/s]

buffer size = 14146, epsilon = 0.09656
mean_reward :  0.0


  0%|          | 6874/2000001 [1:09:06<369:54:16,  1.50it/s]

buffer size = 14148, epsilon = 0.09656
mean_reward :  0.0


  0%|          | 6875/2000001 [1:09:07<368:35:45,  1.50it/s]

buffer size = 14150, epsilon = 0.09656
mean_reward :  0.0


  0%|          | 6876/2000001 [1:09:08<370:40:21,  1.49it/s]

buffer size = 14152, epsilon = 0.09656
mean_reward :  0.0


  0%|          | 6877/2000001 [1:09:08<384:46:18,  1.44it/s]

buffer size = 14154, epsilon = 0.09656
mean_reward :  0.0


  0%|          | 6878/2000001 [1:09:09<425:28:52,  1.30it/s]

buffer size = 14156, epsilon = 0.09656
mean_reward :  0.0


  0%|          | 6879/2000001 [1:09:10<455:59:53,  1.21it/s]

buffer size = 14158, epsilon = 0.09656
mean_reward :  0.0


  0%|          | 6880/2000001 [1:09:11<463:48:18,  1.19it/s]

buffer size = 14160, epsilon = 0.09656
mean_reward :  0.0


  0%|          | 6881/2000001 [1:09:12<436:20:44,  1.27it/s]

buffer size = 14162, epsilon = 0.09656
mean_reward :  0.0


  0%|          | 6882/2000001 [1:09:12<415:03:03,  1.33it/s]

buffer size = 14164, epsilon = 0.09656
mean_reward :  0.0


  0%|          | 6883/2000001 [1:09:13<401:53:30,  1.38it/s]

buffer size = 14166, epsilon = 0.09656
mean_reward :  0.0


  0%|          | 6884/2000001 [1:09:14<391:01:46,  1.42it/s]

buffer size = 14168, epsilon = 0.09656
mean_reward :  0.0


  0%|          | 6885/2000001 [1:09:14<379:10:13,  1.46it/s]

buffer size = 14170, epsilon = 0.09656
mean_reward :  0.0


  0%|          | 6886/2000001 [1:09:15<381:34:43,  1.45it/s]

buffer size = 14172, epsilon = 0.09656
mean_reward :  0.0


  0%|          | 6887/2000001 [1:09:16<376:11:44,  1.47it/s]

buffer size = 14174, epsilon = 0.09656
mean_reward :  0.0


  0%|          | 6888/2000001 [1:09:16<366:49:29,  1.51it/s]

buffer size = 14176, epsilon = 0.09656
mean_reward :  0.0


  0%|          | 6889/2000001 [1:09:17<368:24:23,  1.50it/s]

buffer size = 14178, epsilon = 0.09656
mean_reward :  0.0


  0%|          | 6890/2000001 [1:09:18<362:50:22,  1.53it/s]

buffer size = 14180, epsilon = 0.09656
mean_reward :  0.0


  0%|          | 6891/2000001 [1:09:18<364:47:43,  1.52it/s]

buffer size = 14182, epsilon = 0.09656
mean_reward :  0.0


  0%|          | 6892/2000001 [1:09:19<364:15:18,  1.52it/s]

buffer size = 14184, epsilon = 0.09655
mean_reward :  0.0


  0%|          | 6893/2000001 [1:09:20<364:55:50,  1.52it/s]

buffer size = 14186, epsilon = 0.09655
mean_reward :  0.0


  0%|          | 6894/2000001 [1:09:20<364:08:33,  1.52it/s]

buffer size = 14188, epsilon = 0.09655
mean_reward :  0.0


  0%|          | 6895/2000001 [1:09:21<360:47:14,  1.53it/s]

buffer size = 14190, epsilon = 0.09655
mean_reward :  0.0


  0%|          | 6896/2000001 [1:09:22<393:29:28,  1.41it/s]

buffer size = 14192, epsilon = 0.09655
mean_reward :  0.0


  0%|          | 6897/2000001 [1:09:23<430:46:49,  1.29it/s]

buffer size = 14194, epsilon = 0.09655
mean_reward :  0.0


  0%|          | 6898/2000001 [1:09:24<446:28:50,  1.24it/s]

buffer size = 14196, epsilon = 0.09655
mean_reward :  0.0


  0%|          | 6899/2000001 [1:09:24<458:21:43,  1.21it/s]

buffer size = 14198, epsilon = 0.09655
mean_reward :  0.0


  0%|          | 6900/2000001 [1:09:25<443:31:03,  1.25it/s]

buffer size = 14200, epsilon = 0.09655
mean_reward :  0.0


  0%|          | 6901/2000001 [1:09:26<419:56:26,  1.32it/s]

buffer size = 14202, epsilon = 0.09655
mean_reward :  0.0


  0%|          | 6902/2000001 [1:09:27<405:07:56,  1.37it/s]

buffer size = 14204, epsilon = 0.09655
mean_reward :  0.0


  0%|          | 6903/2000001 [1:09:27<403:51:52,  1.37it/s]

buffer size = 14206, epsilon = 0.09655
mean_reward :  0.0


  0%|          | 6904/2000001 [1:09:28<394:39:15,  1.40it/s]

buffer size = 14208, epsilon = 0.09655
mean_reward :  0.0


  0%|          | 6905/2000001 [1:09:29<392:17:52,  1.41it/s]

buffer size = 14210, epsilon = 0.09655
mean_reward :  0.0


  0%|          | 6906/2000001 [1:09:29<380:58:34,  1.45it/s]

buffer size = 14212, epsilon = 0.09655
mean_reward :  0.0


  0%|          | 6907/2000001 [1:09:30<380:11:40,  1.46it/s]

buffer size = 14214, epsilon = 0.09655
mean_reward :  0.0


  0%|          | 6908/2000001 [1:09:31<379:08:55,  1.46it/s]

buffer size = 14216, epsilon = 0.09655
mean_reward :  0.0


  0%|          | 6909/2000001 [1:09:31<372:08:28,  1.49it/s]

buffer size = 14218, epsilon = 0.09655
mean_reward :  0.0


  0%|          | 6910/2000001 [1:09:32<373:24:25,  1.48it/s]

buffer size = 14220, epsilon = 0.09655
mean_reward :  0.0


  0%|          | 6911/2000001 [1:09:33<366:13:43,  1.51it/s]

buffer size = 14222, epsilon = 0.09655
mean_reward :  0.0


  0%|          | 6912/2000001 [1:09:33<367:40:48,  1.51it/s]

buffer size = 14224, epsilon = 0.09654
mean_reward :  0.0


  0%|          | 6913/2000001 [1:09:34<365:43:50,  1.51it/s]

buffer size = 14226, epsilon = 0.09654
mean_reward :  0.0


  0%|          | 6914/2000001 [1:09:35<385:21:55,  1.44it/s]

buffer size = 14228, epsilon = 0.09654
mean_reward :  0.0


  0%|          | 6915/2000001 [1:09:36<410:56:06,  1.35it/s]

buffer size = 14230, epsilon = 0.09654
mean_reward :  0.0


  0%|          | 6916/2000001 [1:09:36<432:44:52,  1.28it/s]

buffer size = 14232, epsilon = 0.09654
mean_reward :  0.0


  0%|          | 6917/2000001 [1:09:37<454:59:35,  1.22it/s]

buffer size = 14234, epsilon = 0.09654
mean_reward :  0.0


  0%|          | 6918/2000001 [1:09:38<465:40:21,  1.19it/s]

buffer size = 14236, epsilon = 0.09654
mean_reward :  0.0


  0%|          | 6919/2000001 [1:09:39<434:06:22,  1.28it/s]

buffer size = 14238, epsilon = 0.09654
mean_reward :  0.0


  0%|          | 6920/2000001 [1:09:40<419:24:58,  1.32it/s]

buffer size = 14240, epsilon = 0.09654
mean_reward :  0.0


  0%|          | 6921/2000001 [1:09:40<404:19:55,  1.37it/s]

buffer size = 14242, epsilon = 0.09654
mean_reward :  0.0


  0%|          | 6922/2000001 [1:09:41<389:47:00,  1.42it/s]

buffer size = 14244, epsilon = 0.09654
mean_reward :  0.0


  0%|          | 6923/2000001 [1:09:42<383:04:37,  1.45it/s]

buffer size = 14246, epsilon = 0.09654
mean_reward :  0.0


  0%|          | 6924/2000001 [1:09:42<376:59:50,  1.47it/s]

buffer size = 14248, epsilon = 0.09654
mean_reward :  0.0


  0%|          | 6925/2000001 [1:09:43<373:46:10,  1.48it/s]

buffer size = 14250, epsilon = 0.09654
mean_reward :  0.0


  0%|          | 6926/2000001 [1:09:44<374:40:15,  1.48it/s]

buffer size = 14252, epsilon = 0.09654
mean_reward :  0.0


  0%|          | 6927/2000001 [1:09:44<373:08:38,  1.48it/s]

buffer size = 14254, epsilon = 0.09654
mean_reward :  0.0


  0%|          | 6928/2000001 [1:09:45<368:26:31,  1.50it/s]

buffer size = 14256, epsilon = 0.09654
mean_reward :  0.0


  0%|          | 6929/2000001 [1:09:46<371:27:38,  1.49it/s]

buffer size = 14258, epsilon = 0.09654
mean_reward :  0.0


  0%|          | 6930/2000001 [1:09:46<364:48:20,  1.52it/s]

buffer size = 14260, epsilon = 0.09654
mean_reward :  0.0


  0%|          | 6931/2000001 [1:09:47<368:23:59,  1.50it/s]

buffer size = 14262, epsilon = 0.09654
mean_reward :  0.0


  0%|          | 6932/2000001 [1:09:48<367:18:01,  1.51it/s]

buffer size = 14264, epsilon = 0.09653
mean_reward :  0.0


  0%|          | 6933/2000001 [1:09:48<389:26:54,  1.42it/s]

buffer size = 14266, epsilon = 0.09653
mean_reward :  0.0


  0%|          | 6934/2000001 [1:09:49<413:44:17,  1.34it/s]

buffer size = 14268, epsilon = 0.09653
mean_reward :  0.0


  0%|          | 6935/2000001 [1:09:50<429:01:55,  1.29it/s]

buffer size = 14270, epsilon = 0.09653
mean_reward :  0.0


  0%|          | 6936/2000001 [1:09:51<443:33:21,  1.25it/s]

buffer size = 14272, epsilon = 0.09653
mean_reward :  0.0


  0%|          | 6937/2000001 [1:09:52<454:02:00,  1.22it/s]

buffer size = 14274, epsilon = 0.09653
mean_reward :  0.0


  0%|          | 6938/2000001 [1:09:52<441:57:00,  1.25it/s]

buffer size = 14276, epsilon = 0.09653
mean_reward :  0.0


  0%|          | 6939/2000001 [1:09:53<422:29:45,  1.31it/s]

buffer size = 14278, epsilon = 0.09653
mean_reward :  0.0


  0%|          | 6940/2000001 [1:09:54<404:31:50,  1.37it/s]

buffer size = 14280, epsilon = 0.09653
mean_reward :  0.0


  0%|          | 6941/2000001 [1:09:55<395:55:30,  1.40it/s]

buffer size = 14282, epsilon = 0.09653
mean_reward :  0.0


  0%|          | 6942/2000001 [1:09:55<387:36:55,  1.43it/s]

buffer size = 14284, epsilon = 0.09653
mean_reward :  0.0


  0%|          | 6943/2000001 [1:09:56<383:08:27,  1.44it/s]

buffer size = 14286, epsilon = 0.09653
mean_reward :  0.0


  0%|          | 6944/2000001 [1:09:57<381:31:26,  1.45it/s]

buffer size = 14288, epsilon = 0.09653
mean_reward :  0.0


  0%|          | 6945/2000001 [1:09:57<377:46:27,  1.47it/s]

buffer size = 14290, epsilon = 0.09653
mean_reward :  0.0


  0%|          | 6946/2000001 [1:09:58<373:24:40,  1.48it/s]

buffer size = 14292, epsilon = 0.09653
mean_reward :  0.0


  0%|          | 6947/2000001 [1:09:59<373:33:33,  1.48it/s]

buffer size = 14294, epsilon = 0.09653
mean_reward :  0.0


  0%|          | 6948/2000001 [1:09:59<369:54:33,  1.50it/s]

buffer size = 14296, epsilon = 0.09653
mean_reward :  0.0


  0%|          | 6949/2000001 [1:10:00<372:21:25,  1.49it/s]

buffer size = 14298, epsilon = 0.09653
mean_reward :  0.0


  0%|          | 6950/2000001 [1:10:01<368:39:11,  1.50it/s]

buffer size = 14300, epsilon = 0.09653
mean_reward :  0.0


  0%|          | 6951/2000001 [1:10:01<368:27:03,  1.50it/s]

buffer size = 14302, epsilon = 0.09652
mean_reward :  0.0


  0%|          | 6952/2000001 [1:10:02<390:54:26,  1.42it/s]

buffer size = 14304, epsilon = 0.09652
mean_reward :  0.0


  0%|          | 6953/2000001 [1:10:03<406:41:23,  1.36it/s]

buffer size = 14306, epsilon = 0.09652
mean_reward :  0.0


  0%|          | 6954/2000001 [1:10:04<413:36:10,  1.34it/s]

buffer size = 14308, epsilon = 0.09652
mean_reward :  0.0


  0%|          | 6955/2000001 [1:10:04<426:16:38,  1.30it/s]

buffer size = 14310, epsilon = 0.09652
mean_reward :  0.0


  0%|          | 6956/2000001 [1:10:05<446:43:33,  1.24it/s]

buffer size = 14312, epsilon = 0.09652
mean_reward :  0.0


  0%|          | 6957/2000001 [1:10:06<449:58:27,  1.23it/s]

buffer size = 14314, epsilon = 0.09652
mean_reward :  0.0


  0%|          | 6958/2000001 [1:10:07<440:05:46,  1.26it/s]

buffer size = 14316, epsilon = 0.09652
mean_reward :  0.0


  0%|          | 6959/2000001 [1:10:08<418:32:36,  1.32it/s]

buffer size = 14318, epsilon = 0.09652
mean_reward :  0.0


  0%|          | 6960/2000001 [1:10:08<404:01:58,  1.37it/s]

buffer size = 14320, epsilon = 0.09652
mean_reward :  0.0


  0%|          | 6961/2000001 [1:10:09<393:21:48,  1.41it/s]

buffer size = 14322, epsilon = 0.09652
mean_reward :  0.0


  0%|          | 6962/2000001 [1:10:09<382:19:29,  1.45it/s]

buffer size = 14324, epsilon = 0.09652
mean_reward :  0.0


  0%|          | 6963/2000001 [1:10:10<382:03:02,  1.45it/s]

buffer size = 14326, epsilon = 0.09652
mean_reward :  0.0


  0%|          | 6964/2000001 [1:10:11<373:08:59,  1.48it/s]

buffer size = 14328, epsilon = 0.09652
mean_reward :  0.0


  0%|          | 6965/2000001 [1:10:12<375:11:38,  1.48it/s]

buffer size = 14330, epsilon = 0.09652
mean_reward :  0.0


  0%|          | 6966/2000001 [1:10:12<373:47:27,  1.48it/s]

buffer size = 14332, epsilon = 0.09652
mean_reward :  0.0


  0%|          | 6967/2000001 [1:10:13<367:35:42,  1.51it/s]

buffer size = 14334, epsilon = 0.09652
mean_reward :  0.0


  0%|          | 6968/2000001 [1:10:13<366:13:10,  1.51it/s]

buffer size = 14336, epsilon = 0.09652
mean_reward :  0.0


  0%|          | 6969/2000001 [1:10:14<367:52:00,  1.50it/s]

buffer size = 14338, epsilon = 0.09652
mean_reward :  0.0


  0%|          | 6970/2000001 [1:10:15<365:20:17,  1.52it/s]

buffer size = 14340, epsilon = 0.09652
mean_reward :  0.0


  0%|          | 6971/2000001 [1:10:15<366:17:32,  1.51it/s]

buffer size = 14342, epsilon = 0.09652
mean_reward :  0.0


  0%|          | 6972/2000001 [1:10:16<399:18:50,  1.39it/s]

buffer size = 14344, epsilon = 0.09651
mean_reward :  0.0


  0%|          | 6973/2000001 [1:10:17<438:14:24,  1.26it/s]

buffer size = 14346, epsilon = 0.09651
mean_reward :  0.0


  0%|          | 6974/2000001 [1:10:18<444:25:36,  1.25it/s]

buffer size = 14348, epsilon = 0.09651
mean_reward :  0.0


  0%|          | 6975/2000001 [1:10:19<465:16:48,  1.19it/s]

buffer size = 14350, epsilon = 0.09651
mean_reward :  0.0


  0%|          | 6976/2000001 [1:10:20<448:15:17,  1.24it/s]

buffer size = 14352, epsilon = 0.09651
mean_reward :  0.0


  0%|          | 6977/2000001 [1:10:20<423:26:14,  1.31it/s]

buffer size = 14354, epsilon = 0.09651
mean_reward :  0.0


  0%|          | 6978/2000001 [1:10:21<406:38:37,  1.36it/s]

buffer size = 14356, epsilon = 0.09651
mean_reward :  0.0


  0%|          | 6979/2000001 [1:10:22<396:41:06,  1.40it/s]

buffer size = 14358, epsilon = 0.09651
mean_reward :  0.0


  0%|          | 6980/2000001 [1:10:22<389:21:43,  1.42it/s]

buffer size = 14360, epsilon = 0.09651
mean_reward :  0.0


  0%|          | 6981/2000001 [1:10:23<382:53:20,  1.45it/s]

buffer size = 14362, epsilon = 0.09651
mean_reward :  0.0


  0%|          | 6982/2000001 [1:10:24<379:57:26,  1.46it/s]

buffer size = 14364, epsilon = 0.09651
mean_reward :  0.0


  0%|          | 6983/2000001 [1:10:24<376:25:28,  1.47it/s]

buffer size = 14366, epsilon = 0.09651
mean_reward :  0.0


  0%|          | 6984/2000001 [1:10:25<372:53:58,  1.48it/s]

buffer size = 14368, epsilon = 0.09651
mean_reward :  0.0


  0%|          | 6985/2000001 [1:10:26<374:27:11,  1.48it/s]

buffer size = 14370, epsilon = 0.09651
mean_reward :  0.0


  0%|          | 6986/2000001 [1:10:26<373:29:41,  1.48it/s]

buffer size = 14372, epsilon = 0.09651
mean_reward :  0.0


  0%|          | 6987/2000001 [1:10:27<370:28:52,  1.49it/s]

buffer size = 14374, epsilon = 0.09651
mean_reward :  0.0


  0%|          | 6988/2000001 [1:10:28<366:09:28,  1.51it/s]

buffer size = 14376, epsilon = 0.09651
mean_reward :  0.0


  0%|          | 6989/2000001 [1:10:28<368:02:17,  1.50it/s]

buffer size = 14378, epsilon = 0.09651
mean_reward :  0.0


  0%|          | 6990/2000001 [1:10:29<370:28:05,  1.49it/s]

buffer size = 14380, epsilon = 0.09651
mean_reward :  0.0


  0%|          | 6991/2000001 [1:10:30<396:34:58,  1.40it/s]

buffer size = 14382, epsilon = 0.09651
mean_reward :  0.0


  0%|          | 6992/2000001 [1:10:31<409:40:16,  1.35it/s]

buffer size = 14384, epsilon = 0.09650
mean_reward :  0.0


  0%|          | 6993/2000001 [1:10:32<417:07:09,  1.33it/s]

buffer size = 14386, epsilon = 0.09650
mean_reward :  0.0


  0%|          | 6994/2000001 [1:10:32<441:12:32,  1.25it/s]

buffer size = 14388, epsilon = 0.09650
mean_reward :  0.0


  0%|          | 6995/2000001 [1:10:33<449:56:02,  1.23it/s]

buffer size = 14390, epsilon = 0.09650
mean_reward :  0.0


  0%|          | 6996/2000001 [1:10:34<439:36:23,  1.26it/s]

buffer size = 14392, epsilon = 0.09650
mean_reward :  0.0


  0%|          | 6997/2000001 [1:10:35<418:41:17,  1.32it/s]

buffer size = 14394, epsilon = 0.09650
mean_reward :  0.0


  0%|          | 6998/2000001 [1:10:35<401:22:56,  1.38it/s]

buffer size = 14396, epsilon = 0.09650
mean_reward :  0.0


  0%|          | 6999/2000001 [1:10:36<391:33:19,  1.41it/s]

buffer size = 14398, epsilon = 0.09650
mean_reward :  0.0


  0%|          | 7000/2000001 [1:10:37<389:09:12,  1.42it/s]

buffer size = 14400, epsilon = 0.09650
mean_reward :  0.0


  0%|          | 7001/2000001 [1:10:37<378:54:22,  1.46it/s]

buffer size = 14402, epsilon = 0.09650
mean_reward :  0.0


  0%|          | 7002/2000001 [1:10:38<377:24:13,  1.47it/s]

buffer size = 14404, epsilon = 0.09650
mean_reward :  0.0


  0%|          | 7003/2000001 [1:10:39<374:47:17,  1.48it/s]

buffer size = 14406, epsilon = 0.09650
mean_reward :  0.0


  0%|          | 7004/2000001 [1:10:39<373:32:22,  1.48it/s]

buffer size = 14408, epsilon = 0.09650
mean_reward :  0.0


  0%|          | 7005/2000001 [1:10:40<369:15:11,  1.50it/s]

buffer size = 14410, epsilon = 0.09650
mean_reward :  0.0


  0%|          | 7006/2000001 [1:10:41<368:11:36,  1.50it/s]

buffer size = 14412, epsilon = 0.09650
mean_reward :  0.0


  0%|          | 7007/2000001 [1:10:41<366:06:46,  1.51it/s]

buffer size = 14414, epsilon = 0.09650
mean_reward :  0.0


  0%|          | 7008/2000001 [1:10:42<370:04:38,  1.50it/s]

buffer size = 14416, epsilon = 0.09650
mean_reward :  0.0


  0%|          | 7009/2000001 [1:10:43<368:32:30,  1.50it/s]

buffer size = 14418, epsilon = 0.09650
mean_reward :  0.0


  0%|          | 7010/2000001 [1:10:43<393:13:44,  1.41it/s]

buffer size = 14420, epsilon = 0.09650
mean_reward :  0.0


  0%|          | 7011/2000001 [1:10:44<410:04:14,  1.35it/s]

buffer size = 14422, epsilon = 0.09650
mean_reward :  0.0


  0%|          | 7012/2000001 [1:10:45<417:04:26,  1.33it/s]

buffer size = 14424, epsilon = 0.09649
mean_reward :  0.0


  0%|          | 7013/2000001 [1:10:46<428:38:34,  1.29it/s]

buffer size = 14426, epsilon = 0.09649
mean_reward :  0.0


  0%|          | 7014/2000001 [1:10:47<447:06:23,  1.24it/s]

buffer size = 14428, epsilon = 0.09649
mean_reward :  0.0


  0%|          | 7015/2000001 [1:10:48<443:03:40,  1.25it/s]

buffer size = 14430, epsilon = 0.09649
mean_reward :  0.0


  0%|          | 7016/2000001 [1:10:48<434:45:25,  1.27it/s]

buffer size = 14432, epsilon = 0.09649
mean_reward :  0.0


  0%|          | 7017/2000001 [1:10:49<408:51:27,  1.35it/s]

buffer size = 14434, epsilon = 0.09649
mean_reward :  0.0


  0%|          | 7018/2000001 [1:10:50<403:41:06,  1.37it/s]

buffer size = 14436, epsilon = 0.09649
mean_reward :  0.0


  0%|          | 7019/2000001 [1:10:50<390:48:48,  1.42it/s]

buffer size = 14438, epsilon = 0.09649
mean_reward :  0.0


  0%|          | 7020/2000001 [1:10:51<381:21:43,  1.45it/s]

buffer size = 14440, epsilon = 0.09649
mean_reward :  0.0


  0%|          | 7021/2000001 [1:10:52<382:32:59,  1.45it/s]

buffer size = 14442, epsilon = 0.09649
mean_reward :  0.0


  0%|          | 7022/2000001 [1:10:52<372:00:14,  1.49it/s]

buffer size = 14444, epsilon = 0.09649
mean_reward :  0.0


  0%|          | 7023/2000001 [1:10:53<370:27:12,  1.49it/s]

buffer size = 14446, epsilon = 0.09649
mean_reward :  0.0


  0%|          | 7024/2000001 [1:10:54<371:08:14,  1.49it/s]

buffer size = 14448, epsilon = 0.09649
mean_reward :  0.0


  0%|          | 7025/2000001 [1:10:54<372:58:32,  1.48it/s]

buffer size = 14450, epsilon = 0.09649
mean_reward :  0.0


  0%|          | 7026/2000001 [1:10:55<370:53:08,  1.49it/s]

buffer size = 14452, epsilon = 0.09649
mean_reward :  0.0


  0%|          | 7027/2000001 [1:10:56<371:47:14,  1.49it/s]

buffer size = 14454, epsilon = 0.09649
mean_reward :  0.0


  0%|          | 7028/2000001 [1:10:56<368:08:02,  1.50it/s]

buffer size = 14456, epsilon = 0.09649
mean_reward :  0.0


  0%|          | 7029/2000001 [1:10:57<384:38:35,  1.44it/s]

buffer size = 14458, epsilon = 0.09649
mean_reward :  0.0


  0%|          | 7030/2000001 [1:10:58<419:11:24,  1.32it/s]

buffer size = 14460, epsilon = 0.09649
mean_reward :  0.0


  0%|          | 7031/2000001 [1:10:59<439:12:21,  1.26it/s]

buffer size = 14462, epsilon = 0.09649
mean_reward :  0.0


  0%|          | 7032/2000001 [1:11:00<447:14:28,  1.24it/s]

buffer size = 14464, epsilon = 0.09648
mean_reward :  0.0


  0%|          | 7033/2000001 [1:11:01<456:17:27,  1.21it/s]

buffer size = 14466, epsilon = 0.09648
mean_reward :  0.0


  0%|          | 7034/2000001 [1:11:01<428:31:55,  1.29it/s]

buffer size = 14468, epsilon = 0.09648
mean_reward :  0.0


  0%|          | 7035/2000001 [1:11:02<412:21:10,  1.34it/s]

buffer size = 14470, epsilon = 0.09648
mean_reward :  0.0


  0%|          | 7036/2000001 [1:11:03<400:44:51,  1.38it/s]

buffer size = 14472, epsilon = 0.09648
mean_reward :  0.0


  0%|          | 7037/2000001 [1:11:03<390:41:53,  1.42it/s]

buffer size = 14474, epsilon = 0.09648
mean_reward :  0.0


  0%|          | 7038/2000001 [1:11:04<383:55:45,  1.44it/s]

buffer size = 14476, epsilon = 0.09648
mean_reward :  0.0


  0%|          | 7039/2000001 [1:11:05<382:58:26,  1.45it/s]

buffer size = 14478, epsilon = 0.09648
mean_reward :  0.0


  0%|          | 7040/2000001 [1:11:05<380:59:04,  1.45it/s]

buffer size = 14480, epsilon = 0.09648
mean_reward :  0.0


  0%|          | 7041/2000001 [1:11:06<373:03:57,  1.48it/s]

buffer size = 14482, epsilon = 0.09648
mean_reward :  0.0


  0%|          | 7042/2000001 [1:11:07<375:19:33,  1.47it/s]

buffer size = 14484, epsilon = 0.09648
mean_reward :  0.0


  0%|          | 7043/2000001 [1:11:07<377:07:20,  1.47it/s]

buffer size = 14486, epsilon = 0.09648
mean_reward :  0.0


  0%|          | 7044/2000001 [1:11:08<376:42:45,  1.47it/s]

buffer size = 14488, epsilon = 0.09648
mean_reward :  0.0


  0%|          | 7045/2000001 [1:11:09<373:22:48,  1.48it/s]

buffer size = 14490, epsilon = 0.09648
mean_reward :  0.0


  0%|          | 7046/2000001 [1:11:09<374:21:14,  1.48it/s]

buffer size = 14492, epsilon = 0.09648
mean_reward :  0.0


  0%|          | 7047/2000001 [1:11:10<371:46:12,  1.49it/s]

buffer size = 14494, epsilon = 0.09648
mean_reward :  0.0


  0%|          | 7048/2000001 [1:11:11<392:11:47,  1.41it/s]

buffer size = 14496, epsilon = 0.09648
mean_reward :  0.0


  0%|          | 7049/2000001 [1:11:12<408:58:58,  1.35it/s]

buffer size = 14498, epsilon = 0.09648
mean_reward :  0.0


  0%|          | 7050/2000001 [1:11:12<412:08:10,  1.34it/s]

buffer size = 14500, epsilon = 0.09648
mean_reward :  0.0


  0%|          | 7051/2000001 [1:11:13<426:52:54,  1.30it/s]

buffer size = 14502, epsilon = 0.09648
mean_reward :  0.0


  0%|          | 7052/2000001 [1:11:14<445:41:18,  1.24it/s]

buffer size = 14504, epsilon = 0.09647
mean_reward :  0.0


  0%|          | 7053/2000001 [1:11:15<437:38:35,  1.26it/s]

buffer size = 14506, epsilon = 0.09647
mean_reward :  0.0


  0%|          | 7054/2000001 [1:11:15<429:04:38,  1.29it/s]

buffer size = 14508, epsilon = 0.09647
mean_reward :  0.0


  0%|          | 7055/2000001 [1:11:16<419:02:46,  1.32it/s]

buffer size = 14510, epsilon = 0.09647
mean_reward :  0.0


  0%|          | 7056/2000001 [1:11:17<399:40:02,  1.39it/s]

buffer size = 14512, epsilon = 0.09647
mean_reward :  0.0


  0%|          | 7057/2000001 [1:11:17<383:35:12,  1.44it/s]

buffer size = 14514, epsilon = 0.09647
mean_reward :  0.0


  0%|          | 7058/2000001 [1:11:18<378:10:00,  1.46it/s]

buffer size = 14516, epsilon = 0.09647
mean_reward :  0.0


  0%|          | 7059/2000001 [1:11:19<373:19:01,  1.48it/s]

buffer size = 14518, epsilon = 0.09647
mean_reward :  0.0


  0%|          | 7060/2000001 [1:11:19<374:37:10,  1.48it/s]

buffer size = 14520, epsilon = 0.09647
mean_reward :  0.0


  0%|          | 7061/2000001 [1:11:20<378:13:11,  1.46it/s]

buffer size = 14522, epsilon = 0.09647
mean_reward :  0.0


  0%|          | 7062/2000001 [1:11:21<372:45:53,  1.49it/s]

buffer size = 14524, epsilon = 0.09647
mean_reward :  0.0


  0%|          | 7063/2000001 [1:11:22<376:48:47,  1.47it/s]

buffer size = 14526, epsilon = 0.09647
mean_reward :  0.0


  0%|          | 7064/2000001 [1:11:22<373:49:48,  1.48it/s]

buffer size = 14528, epsilon = 0.09647
mean_reward :  0.0


  0%|          | 7065/2000001 [1:11:23<372:41:00,  1.49it/s]

buffer size = 14530, epsilon = 0.09647
mean_reward :  0.0


  0%|          | 7066/2000001 [1:11:24<373:07:53,  1.48it/s]

buffer size = 14532, epsilon = 0.09647
mean_reward :  0.0


  0%|          | 7067/2000001 [1:11:24<375:31:59,  1.47it/s]

buffer size = 14534, epsilon = 0.09647
mean_reward :  0.0


  0%|          | 7068/2000001 [1:11:25<404:52:13,  1.37it/s]

buffer size = 14536, epsilon = 0.09647
mean_reward :  0.0


  0%|          | 7069/2000001 [1:11:26<416:04:59,  1.33it/s]

buffer size = 14538, epsilon = 0.09647
mean_reward :  0.0


  0%|          | 7070/2000001 [1:11:27<424:53:46,  1.30it/s]

buffer size = 14540, epsilon = 0.09647
mean_reward :  0.0


  0%|          | 7071/2000001 [1:11:28<463:08:50,  1.20it/s]

buffer size = 14542, epsilon = 0.09647
mean_reward :  0.0


  0%|          | 7072/2000001 [1:11:28<458:51:00,  1.21it/s]

buffer size = 14544, epsilon = 0.09646
mean_reward :  0.0


  0%|          | 7073/2000001 [1:11:29<450:26:14,  1.23it/s]

buffer size = 14546, epsilon = 0.09646
mean_reward :  0.0


  0%|          | 7074/2000001 [1:11:30<428:39:18,  1.29it/s]

buffer size = 14548, epsilon = 0.09646
mean_reward :  0.0


  0%|          | 7075/2000001 [1:11:31<408:37:13,  1.35it/s]

buffer size = 14550, epsilon = 0.09646
mean_reward :  0.0


  0%|          | 7076/2000001 [1:11:31<399:56:51,  1.38it/s]

buffer size = 14552, epsilon = 0.09646
mean_reward :  0.0


  0%|          | 7077/2000001 [1:11:32<391:51:18,  1.41it/s]

buffer size = 14554, epsilon = 0.09646
mean_reward :  0.0


  0%|          | 7078/2000001 [1:11:33<382:40:01,  1.45it/s]

buffer size = 14556, epsilon = 0.09646
mean_reward :  0.0


  0%|          | 7079/2000001 [1:11:33<380:57:11,  1.45it/s]

buffer size = 14558, epsilon = 0.09646
mean_reward :  0.0


  0%|          | 7080/2000001 [1:11:34<379:20:40,  1.46it/s]

buffer size = 14560, epsilon = 0.09646
mean_reward :  0.0


  0%|          | 7081/2000001 [1:11:35<377:50:04,  1.47it/s]

buffer size = 14562, epsilon = 0.09646
mean_reward :  0.0


  0%|          | 7082/2000001 [1:11:35<374:43:12,  1.48it/s]

buffer size = 14564, epsilon = 0.09646
mean_reward :  0.0


  0%|          | 7083/2000001 [1:11:36<370:17:45,  1.49it/s]

buffer size = 14566, epsilon = 0.09646
mean_reward :  0.0


  0%|          | 7084/2000001 [1:11:37<370:53:17,  1.49it/s]

buffer size = 14568, epsilon = 0.09646
mean_reward :  0.0


  0%|          | 7085/2000001 [1:11:37<371:21:01,  1.49it/s]

buffer size = 14570, epsilon = 0.09646
mean_reward :  0.0


  0%|          | 7086/2000001 [1:11:38<367:28:42,  1.51it/s]

buffer size = 14572, epsilon = 0.09646
mean_reward :  0.0


  0%|          | 7087/2000001 [1:11:39<391:11:49,  1.42it/s]

buffer size = 14574, epsilon = 0.09646
mean_reward :  0.0


  0%|          | 7088/2000001 [1:11:40<427:06:59,  1.30it/s]

buffer size = 14576, epsilon = 0.09646
mean_reward :  0.0


  0%|          | 7089/2000001 [1:11:41<443:11:19,  1.25it/s]

buffer size = 14578, epsilon = 0.09646
mean_reward :  0.0


  0%|          | 7090/2000001 [1:11:41<462:36:04,  1.20it/s]

buffer size = 14580, epsilon = 0.09646
mean_reward :  0.0


  0%|          | 7091/2000001 [1:11:42<448:44:38,  1.23it/s]

buffer size = 14582, epsilon = 0.09645
mean_reward :  0.0


  0%|          | 7092/2000001 [1:11:43<431:34:29,  1.28it/s]

buffer size = 14584, epsilon = 0.09645
mean_reward :  0.0


  0%|          | 7093/2000001 [1:11:44<419:12:51,  1.32it/s]

buffer size = 14586, epsilon = 0.09645
mean_reward :  0.0


  0%|          | 7094/2000001 [1:11:44<402:19:06,  1.38it/s]

buffer size = 14588, epsilon = 0.09645
mean_reward :  0.0


  0%|          | 7095/2000001 [1:11:45<392:19:16,  1.41it/s]

buffer size = 14590, epsilon = 0.09645
mean_reward :  0.0


  0%|          | 7096/2000001 [1:11:46<388:22:19,  1.43it/s]

buffer size = 14592, epsilon = 0.09645
mean_reward :  0.0


  0%|          | 7097/2000001 [1:11:46<378:49:57,  1.46it/s]

buffer size = 14594, epsilon = 0.09645
mean_reward :  0.0


  0%|          | 7098/2000001 [1:11:47<379:22:25,  1.46it/s]

buffer size = 14596, epsilon = 0.09645
mean_reward :  0.0


  0%|          | 7099/2000001 [1:11:48<380:33:38,  1.45it/s]

buffer size = 14598, epsilon = 0.09645
mean_reward :  0.0


  0%|          | 7100/2000001 [1:11:48<372:25:32,  1.49it/s]

buffer size = 14600, epsilon = 0.09645
mean_reward :  0.0


  0%|          | 7101/2000001 [1:11:49<370:10:42,  1.50it/s]

buffer size = 14602, epsilon = 0.09645
mean_reward :  0.0


  0%|          | 7102/2000001 [1:11:50<376:42:54,  1.47it/s]

buffer size = 14604, epsilon = 0.09645
mean_reward :  0.0


  0%|          | 7103/2000001 [1:11:50<370:09:39,  1.50it/s]

buffer size = 14606, epsilon = 0.09645
mean_reward :  0.0


  0%|          | 7104/2000001 [1:11:51<374:01:16,  1.48it/s]

buffer size = 14608, epsilon = 0.09645
mean_reward :  0.0


  0%|          | 7105/2000001 [1:11:52<387:42:13,  1.43it/s]

buffer size = 14610, epsilon = 0.09645
mean_reward :  0.0


  0%|          | 7106/2000001 [1:11:53<404:05:07,  1.37it/s]

buffer size = 14612, epsilon = 0.09645
mean_reward :  0.0


  0%|          | 7107/2000001 [1:11:53<414:36:04,  1.34it/s]

buffer size = 14614, epsilon = 0.09645
mean_reward :  0.0


  0%|          | 7108/2000001 [1:11:54<430:50:51,  1.28it/s]

buffer size = 14616, epsilon = 0.09645
mean_reward :  0.0


  0%|          | 7109/2000001 [1:11:55<443:01:39,  1.25it/s]

buffer size = 14618, epsilon = 0.09645
mean_reward :  0.0


  0%|          | 7110/2000001 [1:11:56<439:08:08,  1.26it/s]

buffer size = 14620, epsilon = 0.09645
mean_reward :  0.0


  0%|          | 7111/2000001 [1:11:57<433:16:52,  1.28it/s]

buffer size = 14622, epsilon = 0.09645
mean_reward :  0.0


  0%|          | 7112/2000001 [1:11:57<425:13:47,  1.30it/s]

buffer size = 14624, epsilon = 0.09644
mean_reward :  0.0


  0%|          | 7113/2000001 [1:11:58<407:56:11,  1.36it/s]

buffer size = 14626, epsilon = 0.09644
mean_reward :  0.0


  0%|          | 7114/2000001 [1:11:59<400:22:54,  1.38it/s]

buffer size = 14628, epsilon = 0.09644
mean_reward :  0.0


  0%|          | 7115/2000001 [1:11:59<394:44:47,  1.40it/s]

buffer size = 14630, epsilon = 0.09644
mean_reward :  0.0


  0%|          | 7116/2000001 [1:12:00<387:09:46,  1.43it/s]

buffer size = 14632, epsilon = 0.09644
mean_reward :  0.0


  0%|          | 7117/2000001 [1:12:01<379:47:33,  1.46it/s]

buffer size = 14634, epsilon = 0.09644
mean_reward :  0.0


  0%|          | 7118/2000001 [1:12:01<373:54:16,  1.48it/s]

buffer size = 14636, epsilon = 0.09644
mean_reward :  0.0


  0%|          | 7119/2000001 [1:12:02<370:02:03,  1.50it/s]

buffer size = 14638, epsilon = 0.09644
mean_reward :  0.0


  0%|          | 7120/2000001 [1:12:03<371:07:03,  1.49it/s]

buffer size = 14640, epsilon = 0.09644
mean_reward :  0.0


  0%|          | 7121/2000001 [1:12:03<367:53:51,  1.50it/s]

buffer size = 14642, epsilon = 0.09644
mean_reward :  0.0


  0%|          | 7122/2000001 [1:12:04<372:01:01,  1.49it/s]

buffer size = 14644, epsilon = 0.09644
mean_reward :  0.0


  0%|          | 7123/2000001 [1:12:05<372:51:26,  1.48it/s]

buffer size = 14646, epsilon = 0.09644
mean_reward :  0.0


  0%|          | 7124/2000001 [1:12:05<376:53:49,  1.47it/s]

buffer size = 14648, epsilon = 0.09644
mean_reward :  0.0


  0%|          | 7125/2000001 [1:12:06<409:31:16,  1.35it/s]

buffer size = 14650, epsilon = 0.09644
mean_reward :  0.0


  0%|          | 7126/2000001 [1:12:07<425:08:55,  1.30it/s]

buffer size = 14652, epsilon = 0.09644
mean_reward :  0.0


  0%|          | 7127/2000001 [1:12:08<433:59:51,  1.28it/s]

buffer size = 14654, epsilon = 0.09644
mean_reward :  0.0


  0%|          | 7128/2000001 [1:12:09<452:43:14,  1.22it/s]

buffer size = 14656, epsilon = 0.09644
mean_reward :  0.0


  0%|          | 7129/2000001 [1:12:10<448:48:08,  1.23it/s]

buffer size = 14658, epsilon = 0.09644
mean_reward :  0.0


  0%|          | 7130/2000001 [1:12:10<438:24:43,  1.26it/s]

buffer size = 14660, epsilon = 0.09644
mean_reward :  0.0


  0%|          | 7131/2000001 [1:12:11<414:18:53,  1.34it/s]

buffer size = 14662, epsilon = 0.09644
mean_reward :  0.0


  0%|          | 7132/2000001 [1:12:12<397:13:37,  1.39it/s]

buffer size = 14664, epsilon = 0.09643
mean_reward :  0.0


  0%|          | 7133/2000001 [1:12:12<392:01:23,  1.41it/s]

buffer size = 14666, epsilon = 0.09643
mean_reward :  0.0


  0%|          | 7134/2000001 [1:12:13<384:03:48,  1.44it/s]

buffer size = 14668, epsilon = 0.09643
mean_reward :  0.0


  0%|          | 7135/2000001 [1:12:14<376:20:44,  1.47it/s]

buffer size = 14670, epsilon = 0.09643
mean_reward :  0.0


  0%|          | 7136/2000001 [1:12:14<378:04:39,  1.46it/s]

buffer size = 14672, epsilon = 0.09643
mean_reward :  0.0


  0%|          | 7137/2000001 [1:12:15<371:42:47,  1.49it/s]

buffer size = 14674, epsilon = 0.09643
mean_reward :  0.0


  0%|          | 7138/2000001 [1:12:16<372:12:21,  1.49it/s]

buffer size = 14676, epsilon = 0.09643
mean_reward :  0.0


  0%|          | 7139/2000001 [1:12:16<369:45:32,  1.50it/s]

buffer size = 14678, epsilon = 0.09643
mean_reward :  0.0


  0%|          | 7140/2000001 [1:12:17<369:48:36,  1.50it/s]

buffer size = 14680, epsilon = 0.09643
mean_reward :  0.0


  0%|          | 7141/2000001 [1:12:18<369:17:59,  1.50it/s]

buffer size = 14682, epsilon = 0.09643
mean_reward :  0.0


  0%|          | 7142/2000001 [1:12:18<371:39:28,  1.49it/s]

buffer size = 14684, epsilon = 0.09643
mean_reward :  0.0


  0%|          | 7143/2000001 [1:12:19<368:45:05,  1.50it/s]

buffer size = 14686, epsilon = 0.09643
mean_reward :  0.0


  0%|          | 7144/2000001 [1:12:20<415:30:06,  1.33it/s]

buffer size = 14688, epsilon = 0.09643
mean_reward :  0.0


  0%|          | 7145/2000001 [1:12:21<437:12:29,  1.27it/s]

buffer size = 14690, epsilon = 0.09643
mean_reward :  0.0


  0%|          | 7146/2000001 [1:12:22<446:36:08,  1.24it/s]

buffer size = 14692, epsilon = 0.09643
mean_reward :  0.0


  0%|          | 7147/2000001 [1:12:23<455:36:15,  1.22it/s]

buffer size = 14694, epsilon = 0.09643
mean_reward :  0.0


  0%|          | 7148/2000001 [1:12:23<447:34:25,  1.24it/s]

buffer size = 14696, epsilon = 0.09643
mean_reward :  0.0


  0%|          | 7149/2000001 [1:12:24<429:00:33,  1.29it/s]

buffer size = 14698, epsilon = 0.09643
mean_reward :  0.0


  0%|          | 7150/2000001 [1:12:25<414:07:41,  1.34it/s]

buffer size = 14700, epsilon = 0.09643
mean_reward :  0.0


  0%|          | 7151/2000001 [1:12:25<401:11:57,  1.38it/s]

buffer size = 14702, epsilon = 0.09643
mean_reward :  0.0


  0%|          | 7152/2000001 [1:12:26<391:08:45,  1.42it/s]

buffer size = 14704, epsilon = 0.09642
mean_reward :  0.0


  0%|          | 7153/2000001 [1:12:27<381:04:20,  1.45it/s]

buffer size = 14706, epsilon = 0.09642
mean_reward :  0.0


  0%|          | 7154/2000001 [1:12:27<379:36:37,  1.46it/s]

buffer size = 14708, epsilon = 0.09642
mean_reward :  0.0


  0%|          | 7155/2000001 [1:12:28<376:12:56,  1.47it/s]

buffer size = 14710, epsilon = 0.09642
mean_reward :  0.0


  0%|          | 7156/2000001 [1:12:29<371:50:00,  1.49it/s]

buffer size = 14712, epsilon = 0.09642
mean_reward :  0.0


  0%|          | 7157/2000001 [1:12:29<372:22:43,  1.49it/s]

buffer size = 14714, epsilon = 0.09642
mean_reward :  0.0


  0%|          | 7158/2000001 [1:12:30<373:44:41,  1.48it/s]

buffer size = 14716, epsilon = 0.09642
mean_reward :  0.0


  0%|          | 7159/2000001 [1:12:31<369:09:14,  1.50it/s]

buffer size = 14718, epsilon = 0.09642
mean_reward :  0.0


  0%|          | 7160/2000001 [1:12:31<366:37:42,  1.51it/s]

buffer size = 14720, epsilon = 0.09642
mean_reward :  0.0


  0%|          | 7161/2000001 [1:12:32<366:14:03,  1.51it/s]

buffer size = 14722, epsilon = 0.09642
mean_reward :  0.0


  0%|          | 7162/2000001 [1:12:33<368:34:05,  1.50it/s]

buffer size = 14724, epsilon = 0.09642
mean_reward :  0.0


  0%|          | 7163/2000001 [1:12:33<391:48:00,  1.41it/s]

buffer size = 14726, epsilon = 0.09642
mean_reward :  0.0


  0%|          | 7164/2000001 [1:12:34<413:40:54,  1.34it/s]

buffer size = 14728, epsilon = 0.09642
mean_reward :  0.0


  0%|          | 7165/2000001 [1:12:35<426:58:46,  1.30it/s]

buffer size = 14730, epsilon = 0.09642
mean_reward :  0.0


  0%|          | 7166/2000001 [1:12:36<436:38:24,  1.27it/s]

buffer size = 14732, epsilon = 0.09642
mean_reward :  0.0


  0%|          | 7167/2000001 [1:12:37<446:34:18,  1.24it/s]

buffer size = 14734, epsilon = 0.09642
mean_reward :  0.0


  0%|          | 7168/2000001 [1:12:38<449:08:32,  1.23it/s]

buffer size = 14736, epsilon = 0.09642
mean_reward :  0.0


  0%|          | 7169/2000001 [1:12:38<441:51:25,  1.25it/s]

buffer size = 14738, epsilon = 0.09642
mean_reward :  0.0


  0%|          | 7170/2000001 [1:12:39<418:55:31,  1.32it/s]

buffer size = 14740, epsilon = 0.09642
mean_reward :  0.0


  0%|          | 7171/2000001 [1:12:40<407:15:50,  1.36it/s]

buffer size = 14742, epsilon = 0.09642
mean_reward :  0.0


  0%|          | 7172/2000001 [1:12:40<400:34:33,  1.38it/s]

buffer size = 14744, epsilon = 0.09641
mean_reward :  0.0


  0%|          | 7173/2000001 [1:12:41<390:10:17,  1.42it/s]

buffer size = 14746, epsilon = 0.09641
mean_reward :  0.0


  0%|          | 7174/2000001 [1:12:42<382:45:55,  1.45it/s]

buffer size = 14748, epsilon = 0.09641
mean_reward :  0.0


  0%|          | 7175/2000001 [1:12:42<380:11:41,  1.46it/s]

buffer size = 14750, epsilon = 0.09641
mean_reward :  0.0


  0%|          | 7176/2000001 [1:12:43<377:31:54,  1.47it/s]

buffer size = 14752, epsilon = 0.09641
mean_reward :  0.0


  0%|          | 7177/2000001 [1:12:44<376:37:42,  1.47it/s]

buffer size = 14754, epsilon = 0.09641
mean_reward :  0.0


  0%|          | 7178/2000001 [1:12:44<372:17:57,  1.49it/s]

buffer size = 14756, epsilon = 0.09641
mean_reward :  0.0


  0%|          | 7179/2000001 [1:12:45<377:21:16,  1.47it/s]

buffer size = 14758, epsilon = 0.09641
mean_reward :  0.0


  0%|          | 7180/2000001 [1:12:46<379:00:00,  1.46it/s]

buffer size = 14760, epsilon = 0.09641
mean_reward :  0.0


  0%|          | 7181/2000001 [1:12:46<372:22:33,  1.49it/s]

buffer size = 14762, epsilon = 0.09641
mean_reward :  0.0


  0%|          | 7182/2000001 [1:12:47<402:41:21,  1.37it/s]

buffer size = 14764, epsilon = 0.09641
mean_reward :  0.0


  0%|          | 7183/2000001 [1:12:48<409:18:39,  1.35it/s]

buffer size = 14766, epsilon = 0.09641
mean_reward :  0.0


  0%|          | 7184/2000001 [1:12:49<413:09:38,  1.34it/s]

buffer size = 14768, epsilon = 0.09641
mean_reward :  0.0


  0%|          | 7185/2000001 [1:12:50<432:41:44,  1.28it/s]

buffer size = 14770, epsilon = 0.09641
mean_reward :  0.0


  0%|          | 7186/2000001 [1:12:51<445:20:22,  1.24it/s]

buffer size = 14772, epsilon = 0.09641
mean_reward :  0.0


  0%|          | 7187/2000001 [1:12:51<437:24:55,  1.27it/s]

buffer size = 14774, epsilon = 0.09641
mean_reward :  0.0


  0%|          | 7188/2000001 [1:12:52<428:49:16,  1.29it/s]

buffer size = 14776, epsilon = 0.09641
mean_reward :  0.0


  0%|          | 7189/2000001 [1:12:53<426:11:34,  1.30it/s]

buffer size = 14778, epsilon = 0.09641
mean_reward :  0.0


  0%|          | 7190/2000001 [1:12:53<411:35:22,  1.34it/s]

buffer size = 14780, epsilon = 0.09641
mean_reward :  0.0


  0%|          | 7191/2000001 [1:12:54<398:51:58,  1.39it/s]

buffer size = 14782, epsilon = 0.09641
mean_reward :  0.0


  0%|          | 7192/2000001 [1:12:55<397:27:46,  1.39it/s]

buffer size = 14784, epsilon = 0.09640
mean_reward :  0.0


  0%|          | 7193/2000001 [1:12:56<389:02:47,  1.42it/s]

buffer size = 14786, epsilon = 0.09640
mean_reward :  0.0


  0%|          | 7194/2000001 [1:12:56<382:31:34,  1.45it/s]

buffer size = 14788, epsilon = 0.09640
mean_reward :  0.0


  0%|          | 7195/2000001 [1:12:57<379:08:21,  1.46it/s]

buffer size = 14790, epsilon = 0.09640
mean_reward :  0.0


  0%|          | 7196/2000001 [1:12:58<380:22:12,  1.46it/s]

buffer size = 14792, epsilon = 0.09640
mean_reward :  0.0


  0%|          | 7197/2000001 [1:12:58<377:09:53,  1.47it/s]

buffer size = 14794, epsilon = 0.09640
mean_reward :  0.0


  0%|          | 7198/2000001 [1:12:59<383:53:29,  1.44it/s]

buffer size = 14796, epsilon = 0.09640
mean_reward :  0.0


  0%|          | 7199/2000001 [1:13:00<379:20:39,  1.46it/s]

buffer size = 14798, epsilon = 0.09640
mean_reward :  0.0


  0%|          | 7200/2000001 [1:13:00<373:34:45,  1.48it/s]

buffer size = 14800, epsilon = 0.09640
mean_reward :  0.0


  0%|          | 7201/2000001 [1:13:01<388:13:13,  1.43it/s]

buffer size = 14802, epsilon = 0.09640
mean_reward :  0.0


  0%|          | 7202/2000001 [1:13:02<427:57:26,  1.29it/s]

buffer size = 14804, epsilon = 0.09640
mean_reward :  0.0


  0%|          | 7203/2000001 [1:13:03<448:21:44,  1.23it/s]

buffer size = 14806, epsilon = 0.09640
mean_reward :  0.0


  0%|          | 7204/2000001 [1:13:04<461:21:18,  1.20it/s]

buffer size = 14808, epsilon = 0.09640
mean_reward :  0.0


  0%|          | 7205/2000001 [1:13:05<459:04:09,  1.21it/s]

buffer size = 14810, epsilon = 0.09640
mean_reward :  0.0


  0%|          | 7206/2000001 [1:13:05<434:09:47,  1.27it/s]

buffer size = 14812, epsilon = 0.09640
mean_reward :  0.0


  0%|          | 7207/2000001 [1:13:06<415:16:43,  1.33it/s]

buffer size = 14814, epsilon = 0.09640
mean_reward :  0.0


  0%|          | 7208/2000001 [1:13:07<400:04:30,  1.38it/s]

buffer size = 14816, epsilon = 0.09640
mean_reward :  0.0


  0%|          | 7209/2000001 [1:13:07<396:41:37,  1.40it/s]

buffer size = 14818, epsilon = 0.09640
mean_reward :  0.0


  0%|          | 7210/2000001 [1:13:08<387:57:09,  1.43it/s]

buffer size = 14820, epsilon = 0.09640
mean_reward :  0.0


  0%|          | 7211/2000001 [1:13:09<379:07:44,  1.46it/s]

buffer size = 14822, epsilon = 0.09640
mean_reward :  0.0


  0%|          | 7212/2000001 [1:13:09<377:25:10,  1.47it/s]

buffer size = 14824, epsilon = 0.09639
mean_reward :  0.0


  0%|          | 7213/2000001 [1:13:10<378:40:57,  1.46it/s]

buffer size = 14826, epsilon = 0.09639
mean_reward :  0.0


  0%|          | 7214/2000001 [1:13:11<375:53:43,  1.47it/s]

buffer size = 14828, epsilon = 0.09639
mean_reward :  0.0


  0%|          | 7215/2000001 [1:13:11<371:43:35,  1.49it/s]

buffer size = 14830, epsilon = 0.09639
mean_reward :  0.0


  0%|          | 7216/2000001 [1:13:12<372:59:04,  1.48it/s]

buffer size = 14832, epsilon = 0.09639
mean_reward :  0.0


  0%|          | 7217/2000001 [1:13:13<373:25:36,  1.48it/s]

buffer size = 14834, epsilon = 0.09639
mean_reward :  0.0


  0%|          | 7218/2000001 [1:13:13<374:52:45,  1.48it/s]

buffer size = 14836, epsilon = 0.09639
mean_reward :  0.0


  0%|          | 7219/2000001 [1:13:14<369:33:28,  1.50it/s]

buffer size = 14838, epsilon = 0.09639
mean_reward :  0.0


  0%|          | 7220/2000001 [1:13:15<391:02:01,  1.42it/s]

buffer size = 14840, epsilon = 0.09639
mean_reward :  0.0


  0%|          | 7221/2000001 [1:13:16<431:14:46,  1.28it/s]

buffer size = 14842, epsilon = 0.09639
mean_reward :  0.0


  0%|          | 7222/2000001 [1:13:17<434:21:46,  1.27it/s]

buffer size = 14844, epsilon = 0.09639
mean_reward :  0.0


  0%|          | 7223/2000001 [1:13:17<451:39:03,  1.23it/s]

buffer size = 14846, epsilon = 0.09639
mean_reward :  0.0


  0%|          | 7224/2000001 [1:13:18<453:22:55,  1.22it/s]

buffer size = 14848, epsilon = 0.09639
mean_reward :  0.0


  0%|          | 7225/2000001 [1:13:19<427:13:55,  1.30it/s]

buffer size = 14850, epsilon = 0.09639
mean_reward :  0.0


  0%|          | 7226/2000001 [1:13:20<413:35:56,  1.34it/s]

buffer size = 14852, epsilon = 0.09639
mean_reward :  0.0


  0%|          | 7227/2000001 [1:13:20<399:55:52,  1.38it/s]

buffer size = 14854, epsilon = 0.09639
mean_reward :  0.0


  0%|          | 7228/2000001 [1:13:21<393:19:17,  1.41it/s]

buffer size = 14856, epsilon = 0.09639
mean_reward :  0.0


  0%|          | 7229/2000001 [1:13:22<383:18:20,  1.44it/s]

buffer size = 14858, epsilon = 0.09639
mean_reward :  0.0


  0%|          | 7230/2000001 [1:13:22<380:15:33,  1.46it/s]

buffer size = 14860, epsilon = 0.09639
mean_reward :  0.0


  0%|          | 7231/2000001 [1:13:23<377:14:04,  1.47it/s]

buffer size = 14862, epsilon = 0.09638
mean_reward :  0.0


  0%|          | 7232/2000001 [1:13:24<368:33:04,  1.50it/s]

buffer size = 14864, epsilon = 0.09638
mean_reward :  0.0


  0%|          | 7233/2000001 [1:13:24<367:56:14,  1.50it/s]

buffer size = 14866, epsilon = 0.09638
mean_reward :  0.0


  0%|          | 7234/2000001 [1:13:25<370:06:36,  1.50it/s]

buffer size = 14868, epsilon = 0.09638
mean_reward :  0.0


  0%|          | 7235/2000001 [1:13:26<367:00:39,  1.51it/s]

buffer size = 14870, epsilon = 0.09638
mean_reward :  0.0


  0%|          | 7236/2000001 [1:13:26<365:37:31,  1.51it/s]

buffer size = 14872, epsilon = 0.09638
mean_reward :  0.0


  0%|          | 7237/2000001 [1:13:27<363:19:29,  1.52it/s]

buffer size = 14874, epsilon = 0.09638
mean_reward :  0.0


  0%|          | 7238/2000001 [1:13:28<368:26:53,  1.50it/s]

buffer size = 14876, epsilon = 0.09638
mean_reward :  0.0


  0%|          | 7239/2000001 [1:13:28<385:52:15,  1.43it/s]

buffer size = 14878, epsilon = 0.09638
mean_reward :  0.0


  0%|          | 7240/2000001 [1:13:29<407:32:08,  1.36it/s]

buffer size = 14880, epsilon = 0.09638
mean_reward :  0.0


  0%|          | 7241/2000001 [1:13:30<421:40:05,  1.31it/s]

buffer size = 14882, epsilon = 0.09638
mean_reward :  0.0


  0%|          | 7242/2000001 [1:13:31<431:10:21,  1.28it/s]

buffer size = 14884, epsilon = 0.09638
mean_reward :  0.0


  0%|          | 7243/2000001 [1:13:32<445:21:30,  1.24it/s]

buffer size = 14886, epsilon = 0.09638
mean_reward :  0.0


  0%|          | 7244/2000001 [1:13:32<437:17:00,  1.27it/s]

buffer size = 14888, epsilon = 0.09638
mean_reward :  0.0


  0%|          | 7245/2000001 [1:13:33<427:39:24,  1.29it/s]

buffer size = 14890, epsilon = 0.09638
mean_reward :  0.0


  0%|          | 7246/2000001 [1:13:34<419:40:59,  1.32it/s]

buffer size = 14892, epsilon = 0.09638
mean_reward :  0.0


  0%|          | 7247/2000001 [1:13:35<408:42:21,  1.35it/s]

buffer size = 14894, epsilon = 0.09638
mean_reward :  0.0


  0%|          | 7248/2000001 [1:13:35<392:31:36,  1.41it/s]

buffer size = 14896, epsilon = 0.09638
mean_reward :  0.0


  0%|          | 7249/2000001 [1:13:36<388:29:05,  1.42it/s]

buffer size = 14898, epsilon = 0.09638
mean_reward :  0.0


  0%|          | 7250/2000001 [1:13:37<386:21:07,  1.43it/s]

buffer size = 14900, epsilon = 0.09638
mean_reward :  0.0


  0%|          | 7251/2000001 [1:13:37<379:57:01,  1.46it/s]

buffer size = 14902, epsilon = 0.09638
mean_reward :  0.0


  0%|          | 7252/2000001 [1:13:38<379:51:12,  1.46it/s]

buffer size = 14904, epsilon = 0.09637
mean_reward :  0.0


  0%|          | 7253/2000001 [1:13:39<373:26:17,  1.48it/s]

buffer size = 14906, epsilon = 0.09637
mean_reward :  0.0


  0%|          | 7254/2000001 [1:13:39<376:38:53,  1.47it/s]

buffer size = 14908, epsilon = 0.09637
mean_reward :  0.0


  0%|          | 7255/2000001 [1:13:40<374:21:00,  1.48it/s]

buffer size = 14910, epsilon = 0.09637
mean_reward :  0.0


  0%|          | 7256/2000001 [1:13:41<375:08:36,  1.48it/s]

buffer size = 14912, epsilon = 0.09637
mean_reward :  0.0


  0%|          | 7257/2000001 [1:13:41<373:16:45,  1.48it/s]

buffer size = 14914, epsilon = 0.09637
mean_reward :  0.0


  0%|          | 7258/2000001 [1:13:42<385:53:12,  1.43it/s]

buffer size = 14916, epsilon = 0.09637
mean_reward :  0.0


  0%|          | 7259/2000001 [1:13:43<404:02:26,  1.37it/s]

buffer size = 14918, epsilon = 0.09637
mean_reward :  0.0


  0%|          | 7260/2000001 [1:13:44<411:02:40,  1.35it/s]

buffer size = 14920, epsilon = 0.09637
mean_reward :  0.0


  0%|          | 7261/2000001 [1:13:44<420:55:44,  1.32it/s]

buffer size = 14922, epsilon = 0.09637
mean_reward :  0.0


  0%|          | 7262/2000001 [1:13:45<431:36:30,  1.28it/s]

buffer size = 14924, epsilon = 0.09637
mean_reward :  0.0


  0%|          | 7263/2000001 [1:13:46<437:39:47,  1.26it/s]

buffer size = 14926, epsilon = 0.09637
mean_reward :  0.0


  0%|          | 7264/2000001 [1:13:47<428:16:03,  1.29it/s]

buffer size = 14928, epsilon = 0.09637
mean_reward :  0.0


  0%|          | 7265/2000001 [1:13:48<424:11:36,  1.30it/s]

buffer size = 14930, epsilon = 0.09637
mean_reward :  0.0


  0%|          | 7266/2000001 [1:13:48<408:08:19,  1.36it/s]

buffer size = 14932, epsilon = 0.09637
mean_reward :  0.0


  0%|          | 7267/2000001 [1:13:49<394:49:38,  1.40it/s]

buffer size = 14934, epsilon = 0.09637
mean_reward :  0.0


  0%|          | 7268/2000001 [1:13:50<391:24:17,  1.41it/s]

buffer size = 14936, epsilon = 0.09637
mean_reward :  0.0


  0%|          | 7269/2000001 [1:13:50<386:14:54,  1.43it/s]

buffer size = 14938, epsilon = 0.09637
mean_reward :  0.0


  0%|          | 7270/2000001 [1:13:51<384:52:55,  1.44it/s]

buffer size = 14940, epsilon = 0.09637
mean_reward :  0.0


  0%|          | 7271/2000001 [1:13:52<385:53:38,  1.43it/s]

buffer size = 14942, epsilon = 0.09637
mean_reward :  0.0


  0%|          | 7272/2000001 [1:13:52<381:47:37,  1.45it/s]

buffer size = 14944, epsilon = 0.09636
mean_reward :  0.0


  0%|          | 7273/2000001 [1:13:53<380:00:06,  1.46it/s]

buffer size = 14946, epsilon = 0.09636
mean_reward :  0.0


  0%|          | 7274/2000001 [1:13:54<378:51:32,  1.46it/s]

buffer size = 14948, epsilon = 0.09636
mean_reward :  0.0


  0%|          | 7275/2000001 [1:13:54<376:13:49,  1.47it/s]

buffer size = 14950, epsilon = 0.09636
mean_reward :  0.0


  0%|          | 7276/2000001 [1:13:55<375:24:27,  1.47it/s]

buffer size = 14952, epsilon = 0.09636
mean_reward :  0.0


  0%|          | 7277/2000001 [1:13:56<375:10:34,  1.48it/s]

buffer size = 14954, epsilon = 0.09636
mean_reward :  0.0


  0%|          | 7278/2000001 [1:13:56<392:23:21,  1.41it/s]

buffer size = 14956, epsilon = 0.09636
mean_reward :  0.0


  0%|          | 7279/2000001 [1:13:57<410:26:22,  1.35it/s]

buffer size = 14958, epsilon = 0.09636
mean_reward :  0.0


  0%|          | 7280/2000001 [1:13:58<416:12:51,  1.33it/s]

buffer size = 14960, epsilon = 0.09636
mean_reward :  0.0


  0%|          | 7281/2000001 [1:13:59<428:46:32,  1.29it/s]

buffer size = 14962, epsilon = 0.09636
mean_reward :  0.0


  0%|          | 7282/2000001 [1:14:00<440:55:25,  1.26it/s]

buffer size = 14964, epsilon = 0.09636
mean_reward :  0.0


  0%|          | 7283/2000001 [1:14:00<435:25:39,  1.27it/s]

buffer size = 14966, epsilon = 0.09636
mean_reward :  0.0


  0%|          | 7284/2000001 [1:14:01<427:28:16,  1.29it/s]

buffer size = 14968, epsilon = 0.09636
mean_reward :  0.0


  0%|          | 7285/2000001 [1:14:02<420:13:23,  1.32it/s]

buffer size = 14970, epsilon = 0.09636
mean_reward :  0.0


  0%|          | 7286/2000001 [1:14:03<406:10:48,  1.36it/s]

buffer size = 14972, epsilon = 0.09636
mean_reward :  0.0


  0%|          | 7287/2000001 [1:14:03<394:24:06,  1.40it/s]

buffer size = 14974, epsilon = 0.09636
mean_reward :  0.0


  0%|          | 7288/2000001 [1:14:04<379:27:51,  1.46it/s]

buffer size = 14976, epsilon = 0.09636
mean_reward :  0.0


  0%|          | 7289/2000001 [1:14:05<374:56:22,  1.48it/s]

buffer size = 14978, epsilon = 0.09636
mean_reward :  0.0


  0%|          | 7290/2000001 [1:14:05<368:24:45,  1.50it/s]

buffer size = 14980, epsilon = 0.09636
mean_reward :  0.0


  0%|          | 7291/2000001 [1:14:06<368:25:02,  1.50it/s]

buffer size = 14982, epsilon = 0.09636
mean_reward :  0.0


  0%|          | 7292/2000001 [1:14:07<371:08:37,  1.49it/s]

buffer size = 14984, epsilon = 0.09635
mean_reward :  0.0


  0%|          | 7293/2000001 [1:14:07<367:04:18,  1.51it/s]

buffer size = 14986, epsilon = 0.09635
mean_reward :  0.0


  0%|          | 7294/2000001 [1:14:08<362:02:44,  1.53it/s]

buffer size = 14988, epsilon = 0.09635
mean_reward :  0.0


  0%|          | 7295/2000001 [1:14:08<359:24:58,  1.54it/s]

buffer size = 14990, epsilon = 0.09635
mean_reward :  0.0


  0%|          | 7296/2000001 [1:14:09<358:10:45,  1.55it/s]

buffer size = 14992, epsilon = 0.09635
mean_reward :  0.0


  0%|          | 7297/2000001 [1:14:10<356:19:19,  1.55it/s]

buffer size = 14994, epsilon = 0.09635
mean_reward :  0.0


  0%|          | 7298/2000001 [1:14:11<388:26:59,  1.42it/s]

buffer size = 14996, epsilon = 0.09635
mean_reward :  0.0


  0%|          | 7299/2000001 [1:14:11<411:02:18,  1.35it/s]

buffer size = 14998, epsilon = 0.09635
mean_reward :  0.0


  0%|          | 7300/2000001 [1:14:12<446:28:48,  1.24it/s]

buffer size = 15000, epsilon = 0.09635
mean_reward :  0.0


  0%|          | 7301/2000001 [1:14:13<461:38:25,  1.20it/s]

buffer size = 15002, epsilon = 0.09635
mean_reward :  0.0


  0%|          | 7302/2000001 [1:14:14<443:42:19,  1.25it/s]

buffer size = 15004, epsilon = 0.09635
mean_reward :  0.0


  0%|          | 7303/2000001 [1:14:15<418:07:47,  1.32it/s]

buffer size = 15006, epsilon = 0.09635
mean_reward :  0.0


  0%|          | 7304/2000001 [1:14:15<406:49:02,  1.36it/s]

buffer size = 15008, epsilon = 0.09635
mean_reward :  0.0


  0%|          | 7305/2000001 [1:14:16<397:51:34,  1.39it/s]

buffer size = 15010, epsilon = 0.09635
mean_reward :  0.0


  0%|          | 7306/2000001 [1:14:17<385:39:08,  1.44it/s]

buffer size = 15012, epsilon = 0.09635
mean_reward :  0.0


  0%|          | 7307/2000001 [1:14:17<384:14:42,  1.44it/s]

buffer size = 15014, epsilon = 0.09635
mean_reward :  0.0


  0%|          | 7308/2000001 [1:14:18<383:04:06,  1.44it/s]

buffer size = 15016, epsilon = 0.09635
mean_reward :  0.0


  0%|          | 7309/2000001 [1:14:19<376:18:30,  1.47it/s]

buffer size = 15018, epsilon = 0.09635
mean_reward :  0.0


  0%|          | 7310/2000001 [1:14:19<379:18:52,  1.46it/s]

buffer size = 15020, epsilon = 0.09635
mean_reward :  0.0


  0%|          | 7311/2000001 [1:14:20<370:03:58,  1.50it/s]

buffer size = 15022, epsilon = 0.09635
mean_reward :  0.0


  0%|          | 7312/2000001 [1:14:21<370:47:02,  1.49it/s]

buffer size = 15024, epsilon = 0.09634
mean_reward :  0.0


  0%|          | 7313/2000001 [1:14:21<369:36:41,  1.50it/s]

buffer size = 15026, epsilon = 0.09634
mean_reward :  0.0


  0%|          | 7314/2000001 [1:14:22<370:04:08,  1.50it/s]

buffer size = 15028, epsilon = 0.09634
mean_reward :  0.0


  0%|          | 7315/2000001 [1:14:23<370:12:10,  1.50it/s]

buffer size = 15030, epsilon = 0.09634
mean_reward :  0.0


  0%|          | 7316/2000001 [1:14:23<384:50:35,  1.44it/s]

buffer size = 15032, epsilon = 0.09634
mean_reward :  0.0


  0%|          | 7317/2000001 [1:14:24<400:28:44,  1.38it/s]

buffer size = 15034, epsilon = 0.09634
mean_reward :  0.0


  0%|          | 7318/2000001 [1:14:25<408:39:40,  1.35it/s]

buffer size = 15036, epsilon = 0.09634
mean_reward :  0.0


  0%|          | 7319/2000001 [1:14:26<421:54:41,  1.31it/s]

buffer size = 15038, epsilon = 0.09634
mean_reward :  0.0


  0%|          | 7320/2000001 [1:14:27<435:34:56,  1.27it/s]

buffer size = 15040, epsilon = 0.09634
mean_reward :  0.0


  0%|          | 7321/2000001 [1:14:27<429:26:25,  1.29it/s]

buffer size = 15042, epsilon = 0.09634
mean_reward :  0.0


  0%|          | 7322/2000001 [1:14:28<425:03:01,  1.30it/s]

buffer size = 15044, epsilon = 0.09634
mean_reward :  0.0


  0%|          | 7323/2000001 [1:14:29<421:31:00,  1.31it/s]

buffer size = 15046, epsilon = 0.09634
mean_reward :  0.0


  0%|          | 7324/2000001 [1:14:30<406:04:11,  1.36it/s]

buffer size = 15048, epsilon = 0.09634
mean_reward :  0.0


  0%|          | 7325/2000001 [1:14:30<394:20:24,  1.40it/s]

buffer size = 15050, epsilon = 0.09634
mean_reward :  0.0


  0%|          | 7326/2000001 [1:14:31<383:46:34,  1.44it/s]

buffer size = 15052, epsilon = 0.09634
mean_reward :  0.0


  0%|          | 7327/2000001 [1:14:32<378:14:01,  1.46it/s]

buffer size = 15054, epsilon = 0.09634
mean_reward :  0.0


  0%|          | 7328/2000001 [1:14:32<378:28:10,  1.46it/s]

buffer size = 15056, epsilon = 0.09634
mean_reward :  0.0


  0%|          | 7329/2000001 [1:14:33<373:24:30,  1.48it/s]

buffer size = 15058, epsilon = 0.09634
mean_reward :  0.0


  0%|          | 7330/2000001 [1:14:34<372:38:01,  1.49it/s]

buffer size = 15060, epsilon = 0.09634
mean_reward :  0.0


  0%|          | 7331/2000001 [1:14:34<371:12:09,  1.49it/s]

buffer size = 15062, epsilon = 0.09634
mean_reward :  0.0


  0%|          | 7332/2000001 [1:14:35<368:37:14,  1.50it/s]

buffer size = 15064, epsilon = 0.09633
mean_reward :  0.0


  0%|          | 7333/2000001 [1:14:36<369:48:34,  1.50it/s]

buffer size = 15066, epsilon = 0.09633
mean_reward :  0.0


  0%|          | 7334/2000001 [1:14:36<370:07:31,  1.50it/s]

buffer size = 15068, epsilon = 0.09633
mean_reward :  0.0


  0%|          | 7335/2000001 [1:14:37<374:34:25,  1.48it/s]

buffer size = 15070, epsilon = 0.09633
mean_reward :  0.0


  0%|          | 7336/2000001 [1:14:38<397:34:36,  1.39it/s]

buffer size = 15072, epsilon = 0.09633
mean_reward :  0.0


  0%|          | 7337/2000001 [1:14:39<415:55:42,  1.33it/s]

buffer size = 15074, epsilon = 0.09633
mean_reward :  0.0


  0%|          | 7338/2000001 [1:14:39<430:13:13,  1.29it/s]

buffer size = 15076, epsilon = 0.09633
mean_reward :  0.0


  0%|          | 7339/2000001 [1:14:40<442:50:25,  1.25it/s]

buffer size = 15078, epsilon = 0.09633
mean_reward :  0.0


  0%|          | 7340/2000001 [1:14:41<434:24:08,  1.27it/s]

buffer size = 15080, epsilon = 0.09633
mean_reward :  0.0


  0%|          | 7341/2000001 [1:14:42<428:19:29,  1.29it/s]

buffer size = 15082, epsilon = 0.09633
mean_reward :  0.0


  0%|          | 7342/2000001 [1:14:42<422:33:47,  1.31it/s]

buffer size = 15084, epsilon = 0.09633
mean_reward :  0.0


  0%|          | 7343/2000001 [1:14:43<401:06:19,  1.38it/s]

buffer size = 15086, epsilon = 0.09633
mean_reward :  0.0


  0%|          | 7344/2000001 [1:14:44<390:54:30,  1.42it/s]

buffer size = 15088, epsilon = 0.09633
mean_reward :  0.0


  0%|          | 7345/2000001 [1:14:44<380:51:59,  1.45it/s]

buffer size = 15090, epsilon = 0.09633
mean_reward :  0.0


  0%|          | 7346/2000001 [1:14:45<381:57:33,  1.45it/s]

buffer size = 15092, epsilon = 0.09633
mean_reward :  0.0


  0%|          | 7347/2000001 [1:14:46<376:34:43,  1.47it/s]

buffer size = 15094, epsilon = 0.09633
mean_reward :  0.0


  0%|          | 7348/2000001 [1:14:46<374:40:47,  1.48it/s]

buffer size = 15096, epsilon = 0.09633
mean_reward :  0.0


  0%|          | 7349/2000001 [1:14:47<373:13:20,  1.48it/s]

buffer size = 15098, epsilon = 0.09633
mean_reward :  0.0


  0%|          | 7350/2000001 [1:14:48<368:50:37,  1.50it/s]

buffer size = 15100, epsilon = 0.09633
mean_reward :  0.0


  0%|          | 7351/2000001 [1:14:48<365:37:17,  1.51it/s]

buffer size = 15102, epsilon = 0.09633
mean_reward :  0.0


  0%|          | 7352/2000001 [1:14:49<369:21:06,  1.50it/s]

buffer size = 15104, epsilon = 0.09632
mean_reward :  0.0


  0%|          | 7353/2000001 [1:14:50<373:25:07,  1.48it/s]

buffer size = 15106, epsilon = 0.09632
mean_reward :  0.0


  0%|          | 7354/2000001 [1:14:51<385:22:00,  1.44it/s]

buffer size = 15108, epsilon = 0.09632
mean_reward :  0.0


  0%|          | 7355/2000001 [1:14:51<406:38:10,  1.36it/s]

buffer size = 15110, epsilon = 0.09632
mean_reward :  0.0


  0%|          | 7356/2000001 [1:14:52<414:06:47,  1.34it/s]

buffer size = 15112, epsilon = 0.09632
mean_reward :  0.0


  0%|          | 7357/2000001 [1:14:53<420:41:01,  1.32it/s]

buffer size = 15114, epsilon = 0.09632
mean_reward :  0.0


  0%|          | 7358/2000001 [1:14:54<431:54:09,  1.28it/s]

buffer size = 15116, epsilon = 0.09632
mean_reward :  0.0


  0%|          | 7359/2000001 [1:14:55<434:11:50,  1.27it/s]

buffer size = 15118, epsilon = 0.09632
mean_reward :  0.0


  0%|          | 7360/2000001 [1:14:55<428:06:18,  1.29it/s]

buffer size = 15120, epsilon = 0.09632
mean_reward :  0.0


  0%|          | 7361/2000001 [1:14:56<421:20:48,  1.31it/s]

buffer size = 15122, epsilon = 0.09632
mean_reward :  0.0


  0%|          | 7362/2000001 [1:14:57<422:19:06,  1.31it/s]

buffer size = 15124, epsilon = 0.09632
mean_reward :  0.0


  0%|          | 7363/2000001 [1:14:57<406:30:30,  1.36it/s]

buffer size = 15126, epsilon = 0.09632
mean_reward :  0.0


  0%|          | 7364/2000001 [1:14:58<395:23:30,  1.40it/s]

buffer size = 15128, epsilon = 0.09632
mean_reward :  0.0


  0%|          | 7365/2000001 [1:14:59<389:42:15,  1.42it/s]

buffer size = 15130, epsilon = 0.09632
mean_reward :  0.0


  0%|          | 7366/2000001 [1:14:59<386:30:14,  1.43it/s]

buffer size = 15132, epsilon = 0.09632
mean_reward :  0.0


  0%|          | 7367/2000001 [1:15:00<383:08:41,  1.44it/s]

buffer size = 15134, epsilon = 0.09632
mean_reward :  0.0


  0%|          | 7368/2000001 [1:15:01<377:58:12,  1.46it/s]

buffer size = 15136, epsilon = 0.09632
mean_reward :  0.0


  0%|          | 7369/2000001 [1:15:02<379:45:12,  1.46it/s]

buffer size = 15138, epsilon = 0.09632
mean_reward :  0.0


  0%|          | 7370/2000001 [1:15:02<379:01:15,  1.46it/s]

buffer size = 15140, epsilon = 0.09632
mean_reward :  0.0


  0%|          | 7371/2000001 [1:15:03<382:05:37,  1.45it/s]

buffer size = 15142, epsilon = 0.09632
mean_reward :  0.0


  0%|          | 7372/2000001 [1:15:04<377:38:41,  1.47it/s]

buffer size = 15144, epsilon = 0.09631
mean_reward :  0.0


  0%|          | 7373/2000001 [1:15:04<395:25:06,  1.40it/s]

buffer size = 15146, epsilon = 0.09631
mean_reward :  0.0


  0%|          | 7374/2000001 [1:15:05<410:45:35,  1.35it/s]

buffer size = 15148, epsilon = 0.09631
mean_reward :  0.0


  0%|          | 7375/2000001 [1:15:06<415:25:52,  1.33it/s]

buffer size = 15150, epsilon = 0.09631
mean_reward :  0.0


  0%|          | 7376/2000001 [1:15:07<428:46:22,  1.29it/s]

buffer size = 15152, epsilon = 0.09631
mean_reward :  0.0


  0%|          | 7377/2000001 [1:15:08<438:48:05,  1.26it/s]

buffer size = 15154, epsilon = 0.09631
mean_reward :  0.0


  0%|          | 7378/2000001 [1:15:08<432:39:20,  1.28it/s]

buffer size = 15156, epsilon = 0.09631
mean_reward :  0.0


  0%|          | 7379/2000001 [1:15:09<431:08:18,  1.28it/s]

buffer size = 15158, epsilon = 0.09631
mean_reward :  0.0


  0%|          | 7380/2000001 [1:15:10<425:09:16,  1.30it/s]

buffer size = 15160, epsilon = 0.09631
mean_reward :  0.0


  0%|          | 7381/2000001 [1:15:11<418:35:06,  1.32it/s]

buffer size = 15162, epsilon = 0.09631
mean_reward :  0.0


  0%|          | 7382/2000001 [1:15:11<406:50:28,  1.36it/s]

buffer size = 15164, epsilon = 0.09631
mean_reward :  0.0


  0%|          | 7383/2000001 [1:15:12<394:18:35,  1.40it/s]

buffer size = 15166, epsilon = 0.09631
mean_reward :  0.0


  0%|          | 7384/2000001 [1:15:13<386:25:04,  1.43it/s]

buffer size = 15168, epsilon = 0.09631
mean_reward :  0.0


  0%|          | 7385/2000001 [1:15:13<385:32:42,  1.44it/s]

buffer size = 15170, epsilon = 0.09631
mean_reward :  0.0


  0%|          | 7386/2000001 [1:15:14<380:11:06,  1.46it/s]

buffer size = 15172, epsilon = 0.09631
mean_reward :  0.0


  0%|          | 7387/2000001 [1:15:15<378:38:47,  1.46it/s]

buffer size = 15174, epsilon = 0.09631
mean_reward :  0.0


  0%|          | 7388/2000001 [1:15:15<379:34:33,  1.46it/s]

buffer size = 15176, epsilon = 0.09631
mean_reward :  0.0


  0%|          | 7389/2000001 [1:15:16<372:50:54,  1.48it/s]

buffer size = 15178, epsilon = 0.09631
mean_reward :  0.0


  0%|          | 7390/2000001 [1:15:17<370:02:53,  1.50it/s]

buffer size = 15180, epsilon = 0.09631
mean_reward :  0.0


  0%|          | 7391/2000001 [1:15:17<374:49:11,  1.48it/s]

buffer size = 15182, epsilon = 0.09631
mean_reward :  0.0


  0%|          | 7392/2000001 [1:15:18<393:36:58,  1.41it/s]

buffer size = 15184, epsilon = 0.09630
mean_reward :  0.0


  0%|          | 7393/2000001 [1:15:19<412:28:04,  1.34it/s]

buffer size = 15186, epsilon = 0.09630
mean_reward :  0.0


  0%|          | 7394/2000001 [1:15:20<420:59:58,  1.31it/s]

buffer size = 15188, epsilon = 0.09630
mean_reward :  0.0


  0%|          | 7395/2000001 [1:15:21<431:45:46,  1.28it/s]

buffer size = 15190, epsilon = 0.09630
mean_reward :  0.0


  0%|          | 7396/2000001 [1:15:21<448:56:13,  1.23it/s]

buffer size = 15192, epsilon = 0.09630
mean_reward :  0.0


  0%|          | 7397/2000001 [1:15:22<439:44:48,  1.26it/s]

buffer size = 15194, epsilon = 0.09630
mean_reward :  0.0


  0%|          | 7398/2000001 [1:15:23<432:21:25,  1.28it/s]

buffer size = 15196, epsilon = 0.09630
mean_reward :  0.0


  0%|          | 7399/2000001 [1:15:24<422:44:07,  1.31it/s]

buffer size = 15198, epsilon = 0.09630
mean_reward :  0.0


  0%|          | 7400/2000001 [1:15:24<409:07:06,  1.35it/s]

buffer size = 15200, epsilon = 0.09630
mean_reward :  0.0


  0%|          | 7401/2000001 [1:15:25<398:01:04,  1.39it/s]

buffer size = 15202, epsilon = 0.09630
mean_reward :  0.0


  0%|          | 7402/2000001 [1:15:26<392:02:17,  1.41it/s]

buffer size = 15204, epsilon = 0.09630
mean_reward :  0.0


  0%|          | 7403/2000001 [1:15:26<386:27:35,  1.43it/s]

buffer size = 15206, epsilon = 0.09630
mean_reward :  0.0


  0%|          | 7404/2000001 [1:15:27<381:09:10,  1.45it/s]

buffer size = 15208, epsilon = 0.09630
mean_reward :  0.0


  0%|          | 7405/2000001 [1:15:28<381:49:13,  1.45it/s]

buffer size = 15210, epsilon = 0.09630
mean_reward :  0.0


  0%|          | 7406/2000001 [1:15:28<381:24:09,  1.45it/s]

buffer size = 15212, epsilon = 0.09630
mean_reward :  0.0


  0%|          | 7407/2000001 [1:15:29<381:39:18,  1.45it/s]

buffer size = 15214, epsilon = 0.09630
mean_reward :  0.0


  0%|          | 7408/2000001 [1:15:30<378:09:47,  1.46it/s]

buffer size = 15216, epsilon = 0.09630
mean_reward :  0.0


  0%|          | 7409/2000001 [1:15:30<375:57:10,  1.47it/s]

buffer size = 15218, epsilon = 0.09630
mean_reward :  0.0


  0%|          | 7410/2000001 [1:15:31<377:42:32,  1.47it/s]

buffer size = 15220, epsilon = 0.09630
mean_reward :  0.0


  0%|          | 7411/2000001 [1:15:32<392:36:01,  1.41it/s]

buffer size = 15222, epsilon = 0.09630
mean_reward :  0.0


  0%|          | 7412/2000001 [1:15:33<408:10:26,  1.36it/s]

buffer size = 15224, epsilon = 0.09629
mean_reward :  0.0


  0%|          | 7413/2000001 [1:15:34<419:35:55,  1.32it/s]

buffer size = 15226, epsilon = 0.09629
mean_reward :  0.0


  0%|          | 7414/2000001 [1:15:34<441:32:04,  1.25it/s]

buffer size = 15228, epsilon = 0.09629
mean_reward :  0.0


  0%|          | 7415/2000001 [1:15:35<448:06:13,  1.24it/s]

buffer size = 15230, epsilon = 0.09629
mean_reward :  0.0


  0%|          | 7416/2000001 [1:15:36<437:53:09,  1.26it/s]

buffer size = 15232, epsilon = 0.09629
mean_reward :  0.0


  0%|          | 7417/2000001 [1:15:37<434:35:44,  1.27it/s]

buffer size = 15234, epsilon = 0.09629
mean_reward :  0.0


  0%|          | 7418/2000001 [1:15:38<428:55:10,  1.29it/s]

buffer size = 15236, epsilon = 0.09629
mean_reward :  0.0


  0%|          | 7419/2000001 [1:15:38<408:39:58,  1.35it/s]

buffer size = 15238, epsilon = 0.09629
mean_reward :  0.0


  0%|          | 7420/2000001 [1:15:39<397:20:36,  1.39it/s]

buffer size = 15240, epsilon = 0.09629
mean_reward :  0.0


  0%|          | 7421/2000001 [1:15:40<391:58:50,  1.41it/s]

buffer size = 15242, epsilon = 0.09629
mean_reward :  0.0


  0%|          | 7422/2000001 [1:15:40<389:57:44,  1.42it/s]

buffer size = 15244, epsilon = 0.09629
mean_reward :  0.0


  0%|          | 7423/2000001 [1:15:41<383:32:54,  1.44it/s]

buffer size = 15246, epsilon = 0.09629
mean_reward :  0.0


  0%|          | 7424/2000001 [1:15:42<381:12:30,  1.45it/s]

buffer size = 15248, epsilon = 0.09629
mean_reward :  0.0


  0%|          | 7425/2000001 [1:15:42<378:53:28,  1.46it/s]

buffer size = 15250, epsilon = 0.09629
mean_reward :  0.0


  0%|          | 7426/2000001 [1:15:43<383:08:52,  1.44it/s]

buffer size = 15252, epsilon = 0.09629
mean_reward :  0.0


  0%|          | 7427/2000001 [1:15:44<378:01:23,  1.46it/s]

buffer size = 15254, epsilon = 0.09629
mean_reward :  0.0


  0%|          | 7428/2000001 [1:15:44<375:49:30,  1.47it/s]

buffer size = 15256, epsilon = 0.09629
mean_reward :  0.0


  0%|          | 7429/2000001 [1:15:45<389:45:01,  1.42it/s]

buffer size = 15258, epsilon = 0.09629
mean_reward :  0.0


  0%|          | 7430/2000001 [1:15:46<429:25:19,  1.29it/s]

buffer size = 15260, epsilon = 0.09629
mean_reward :  0.0


  0%|          | 7431/2000001 [1:15:47<464:44:22,  1.19it/s]

buffer size = 15262, epsilon = 0.09629
mean_reward :  0.0


  0%|          | 7432/2000001 [1:15:48<472:30:07,  1.17it/s]

buffer size = 15264, epsilon = 0.09628
mean_reward :  0.0


  0%|          | 7433/2000001 [1:15:49<453:03:21,  1.22it/s]

buffer size = 15266, epsilon = 0.09628
mean_reward :  0.0


  0%|          | 7434/2000001 [1:15:49<435:28:33,  1.27it/s]

buffer size = 15268, epsilon = 0.09628
mean_reward :  0.0


  0%|          | 7435/2000001 [1:15:50<411:47:14,  1.34it/s]

buffer size = 15270, epsilon = 0.09628
mean_reward :  0.0


  0%|          | 7436/2000001 [1:15:51<402:20:46,  1.38it/s]

buffer size = 15272, epsilon = 0.09628
mean_reward :  0.0


  0%|          | 7437/2000001 [1:15:51<393:38:33,  1.41it/s]

buffer size = 15274, epsilon = 0.09628
mean_reward :  0.0


  0%|          | 7438/2000001 [1:15:52<387:15:01,  1.43it/s]

buffer size = 15276, epsilon = 0.09628
mean_reward :  0.0


  0%|          | 7439/2000001 [1:15:53<387:47:43,  1.43it/s]

buffer size = 15278, epsilon = 0.09628
mean_reward :  0.0


  0%|          | 7440/2000001 [1:15:53<382:42:16,  1.45it/s]

buffer size = 15280, epsilon = 0.09628
mean_reward :  0.0


  0%|          | 7441/2000001 [1:15:54<382:48:37,  1.45it/s]

buffer size = 15282, epsilon = 0.09628
mean_reward :  0.0


  0%|          | 7442/2000001 [1:15:55<381:06:17,  1.45it/s]

buffer size = 15284, epsilon = 0.09628
mean_reward :  0.0


  0%|          | 7443/2000001 [1:15:55<378:06:52,  1.46it/s]

buffer size = 15286, epsilon = 0.09628
mean_reward :  0.0


  0%|          | 7444/2000001 [1:15:56<375:09:17,  1.48it/s]

buffer size = 15288, epsilon = 0.09628
mean_reward :  0.0


  0%|          | 7445/2000001 [1:15:57<375:54:57,  1.47it/s]

buffer size = 15290, epsilon = 0.09628
mean_reward :  0.0


  0%|          | 7446/2000001 [1:15:57<375:50:58,  1.47it/s]

buffer size = 15292, epsilon = 0.09628
mean_reward :  0.0


  0%|          | 7447/2000001 [1:15:58<392:54:50,  1.41it/s]

buffer size = 15294, epsilon = 0.09628
mean_reward :  0.0


  0%|          | 7448/2000001 [1:15:59<411:56:56,  1.34it/s]

buffer size = 15296, epsilon = 0.09628
mean_reward :  0.0


  0%|          | 7449/2000001 [1:16:00<416:28:41,  1.33it/s]

buffer size = 15298, epsilon = 0.09628
mean_reward :  0.0


  0%|          | 7450/2000001 [1:16:01<425:18:28,  1.30it/s]

buffer size = 15300, epsilon = 0.09628
mean_reward :  0.0


  0%|          | 7451/2000001 [1:16:01<434:05:00,  1.28it/s]

buffer size = 15302, epsilon = 0.09627
mean_reward :  0.0


  0%|          | 7452/2000001 [1:16:02<437:05:27,  1.27it/s]

buffer size = 15304, epsilon = 0.09627
mean_reward :  0.0


  0%|          | 7453/2000001 [1:16:03<430:36:09,  1.29it/s]

buffer size = 15306, epsilon = 0.09627
mean_reward :  0.0


  0%|          | 7454/2000001 [1:16:04<423:12:42,  1.31it/s]

buffer size = 15308, epsilon = 0.09627
mean_reward :  0.0


  0%|          | 7455/2000001 [1:16:05<421:30:21,  1.31it/s]

buffer size = 15310, epsilon = 0.09627
mean_reward :  0.0


  0%|          | 7456/2000001 [1:16:05<415:38:25,  1.33it/s]

buffer size = 15312, epsilon = 0.09627
mean_reward :  0.0


  0%|          | 7457/2000001 [1:16:06<400:42:12,  1.38it/s]

buffer size = 15314, epsilon = 0.09627
mean_reward :  0.0


  0%|          | 7458/2000001 [1:16:07<395:20:56,  1.40it/s]

buffer size = 15316, epsilon = 0.09627
mean_reward :  0.0


  0%|          | 7459/2000001 [1:16:07<390:10:57,  1.42it/s]

buffer size = 15318, epsilon = 0.09627
mean_reward :  0.0


  0%|          | 7460/2000001 [1:16:08<385:28:20,  1.44it/s]

buffer size = 15320, epsilon = 0.09627
mean_reward :  0.0


  0%|          | 7461/2000001 [1:16:09<381:03:17,  1.45it/s]

buffer size = 15322, epsilon = 0.09627
mean_reward :  0.0


  0%|          | 7462/2000001 [1:16:09<378:57:31,  1.46it/s]

buffer size = 15324, epsilon = 0.09627
mean_reward :  0.0


  0%|          | 7463/2000001 [1:16:10<380:14:59,  1.46it/s]

buffer size = 15326, epsilon = 0.09627
mean_reward :  0.0


  0%|          | 7464/2000001 [1:16:11<378:46:30,  1.46it/s]

buffer size = 15328, epsilon = 0.09627
mean_reward :  0.0


  0%|          | 7465/2000001 [1:16:11<374:26:56,  1.48it/s]

buffer size = 15330, epsilon = 0.09627
mean_reward :  0.0


  0%|          | 7466/2000001 [1:16:12<388:12:18,  1.43it/s]

buffer size = 15332, epsilon = 0.09627
mean_reward :  0.0


  0%|          | 7467/2000001 [1:16:13<411:27:16,  1.35it/s]

buffer size = 15334, epsilon = 0.09627
mean_reward :  0.0


  0%|          | 7468/2000001 [1:16:14<423:26:14,  1.31it/s]

buffer size = 15336, epsilon = 0.09627
mean_reward :  0.0


  0%|          | 7469/2000001 [1:16:15<429:42:49,  1.29it/s]

buffer size = 15338, epsilon = 0.09627
mean_reward :  0.0


  0%|          | 7470/2000001 [1:16:15<446:09:19,  1.24it/s]

buffer size = 15340, epsilon = 0.09627
mean_reward :  0.0


  0%|          | 7471/2000001 [1:16:16<439:12:05,  1.26it/s]

buffer size = 15342, epsilon = 0.09627
mean_reward :  0.0


  0%|          | 7472/2000001 [1:16:17<435:17:35,  1.27it/s]

buffer size = 15344, epsilon = 0.09626
mean_reward :  0.0


  0%|          | 7473/2000001 [1:16:18<424:09:28,  1.30it/s]

buffer size = 15346, epsilon = 0.09626
mean_reward :  0.0


  0%|          | 7474/2000001 [1:16:18<412:08:49,  1.34it/s]

buffer size = 15348, epsilon = 0.09626
mean_reward :  0.0


  0%|          | 7475/2000001 [1:16:19<400:07:04,  1.38it/s]

buffer size = 15350, epsilon = 0.09626
mean_reward :  0.0


  0%|          | 7476/2000001 [1:16:20<391:38:33,  1.41it/s]

buffer size = 15352, epsilon = 0.09626
mean_reward :  0.0


  0%|          | 7477/2000001 [1:16:20<388:12:56,  1.43it/s]

buffer size = 15354, epsilon = 0.09626
mean_reward :  0.0


  0%|          | 7478/2000001 [1:16:21<381:43:24,  1.45it/s]

buffer size = 15356, epsilon = 0.09626
mean_reward :  0.0


  0%|          | 7479/2000001 [1:16:22<376:35:52,  1.47it/s]

buffer size = 15358, epsilon = 0.09626
mean_reward :  0.0


  0%|          | 7480/2000001 [1:16:22<378:13:03,  1.46it/s]

buffer size = 15360, epsilon = 0.09626
mean_reward :  0.0


  0%|          | 7481/2000001 [1:16:23<373:27:16,  1.48it/s]

buffer size = 15362, epsilon = 0.09626
mean_reward :  0.0


  0%|          | 7482/2000001 [1:16:24<372:57:38,  1.48it/s]

buffer size = 15364, epsilon = 0.09626
mean_reward :  0.0


  0%|          | 7483/2000001 [1:16:24<373:11:29,  1.48it/s]

buffer size = 15366, epsilon = 0.09626
mean_reward :  0.0


  0%|          | 7484/2000001 [1:16:25<375:36:55,  1.47it/s]

buffer size = 15368, epsilon = 0.09626
mean_reward :  0.0


  0%|          | 7485/2000001 [1:16:26<395:09:54,  1.40it/s]

buffer size = 15370, epsilon = 0.09626
mean_reward :  0.0


  0%|          | 7486/2000001 [1:16:27<424:27:33,  1.30it/s]

buffer size = 15372, epsilon = 0.09626
mean_reward :  0.0


  0%|          | 7487/2000001 [1:16:28<440:31:16,  1.26it/s]

buffer size = 15374, epsilon = 0.09626
mean_reward :  0.0


  0%|          | 7488/2000001 [1:16:29<452:11:19,  1.22it/s]

buffer size = 15376, epsilon = 0.09626
mean_reward :  0.0


  0%|          | 7489/2000001 [1:16:29<456:01:55,  1.21it/s]

buffer size = 15378, epsilon = 0.09626
mean_reward :  0.0


  0%|          | 7490/2000001 [1:16:30<443:54:43,  1.25it/s]

buffer size = 15380, epsilon = 0.09626
mean_reward :  0.0


  0%|          | 7491/2000001 [1:16:31<426:03:43,  1.30it/s]

buffer size = 15382, epsilon = 0.09626
mean_reward :  0.0


  0%|          | 7492/2000001 [1:16:31<411:08:58,  1.35it/s]

buffer size = 15384, epsilon = 0.09625
mean_reward :  0.0


  0%|          | 7493/2000001 [1:16:32<406:09:45,  1.36it/s]

buffer size = 15386, epsilon = 0.09625
mean_reward :  0.0


  0%|          | 7494/2000001 [1:16:33<396:37:22,  1.40it/s]

buffer size = 15388, epsilon = 0.09625
mean_reward :  0.0


  0%|          | 7495/2000001 [1:16:34<387:24:27,  1.43it/s]

buffer size = 15390, epsilon = 0.09625
mean_reward :  0.0


  0%|          | 7496/2000001 [1:16:34<390:11:29,  1.42it/s]

buffer size = 15392, epsilon = 0.09625
mean_reward :  0.0


  0%|          | 7497/2000001 [1:16:35<388:14:01,  1.43it/s]

buffer size = 15394, epsilon = 0.09625
mean_reward :  0.0


  0%|          | 7498/2000001 [1:16:36<382:20:01,  1.45it/s]

buffer size = 15396, epsilon = 0.09625
mean_reward :  0.0


  0%|          | 7499/2000001 [1:16:36<384:49:09,  1.44it/s]

buffer size = 15398, epsilon = 0.09625
mean_reward :  0.0


  0%|          | 7500/2000001 [1:16:37<387:11:37,  1.43it/s]

buffer size = 15400, epsilon = 0.09625
mean_reward :  0.0


  0%|          | 7501/2000001 [1:16:38<384:14:31,  1.44it/s]

buffer size = 15402, epsilon = 0.09625
mean_reward :  0.0


  0%|          | 7502/2000001 [1:16:38<381:44:47,  1.45it/s]

buffer size = 15404, epsilon = 0.09625
mean_reward :  0.0


  0%|          | 7503/2000001 [1:16:39<388:38:37,  1.42it/s]

buffer size = 15406, epsilon = 0.09625
mean_reward :  0.0


  0%|          | 7504/2000001 [1:16:40<415:01:59,  1.33it/s]

buffer size = 15408, epsilon = 0.09625
mean_reward :  0.0


  0%|          | 7505/2000001 [1:16:41<430:04:33,  1.29it/s]

buffer size = 15410, epsilon = 0.09625
mean_reward :  0.0


  0%|          | 7506/2000001 [1:16:42<448:34:51,  1.23it/s]

buffer size = 15412, epsilon = 0.09625
mean_reward :  0.0


  0%|          | 7507/2000001 [1:16:43<463:17:37,  1.19it/s]

buffer size = 15414, epsilon = 0.09625
mean_reward :  0.0


  0%|          | 7508/2000001 [1:16:43<453:58:28,  1.22it/s]

buffer size = 15416, epsilon = 0.09625
mean_reward :  0.0


  0%|          | 7509/2000001 [1:16:44<442:59:46,  1.25it/s]

buffer size = 15418, epsilon = 0.09625
mean_reward :  0.0


  0%|          | 7510/2000001 [1:16:45<428:10:40,  1.29it/s]

buffer size = 15420, epsilon = 0.09625
mean_reward :  0.0


  0%|          | 7511/2000001 [1:16:46<419:06:59,  1.32it/s]

buffer size = 15422, epsilon = 0.09625
mean_reward :  0.0


  0%|          | 7512/2000001 [1:16:46<402:18:40,  1.38it/s]

buffer size = 15424, epsilon = 0.09624
mean_reward :  0.0


  0%|          | 7513/2000001 [1:16:47<397:46:30,  1.39it/s]

buffer size = 15426, epsilon = 0.09624
mean_reward :  0.0


  0%|          | 7514/2000001 [1:16:48<389:58:13,  1.42it/s]

buffer size = 15428, epsilon = 0.09624
mean_reward :  0.0


  0%|          | 7515/2000001 [1:16:48<387:58:00,  1.43it/s]

buffer size = 15430, epsilon = 0.09624
mean_reward :  0.0


  0%|          | 7516/2000001 [1:16:49<385:00:29,  1.44it/s]

buffer size = 15432, epsilon = 0.09624
mean_reward :  0.0


  0%|          | 7517/2000001 [1:16:50<387:28:29,  1.43it/s]

buffer size = 15434, epsilon = 0.09624
mean_reward :  0.0


  0%|          | 7518/2000001 [1:16:50<383:05:16,  1.44it/s]

buffer size = 15436, epsilon = 0.09624
mean_reward :  0.0


  0%|          | 7519/2000001 [1:16:51<376:36:18,  1.47it/s]

buffer size = 15438, epsilon = 0.09624
mean_reward :  0.0


  0%|          | 7520/2000001 [1:16:52<374:53:34,  1.48it/s]

buffer size = 15440, epsilon = 0.09624
mean_reward :  0.0


  0%|          | 7521/2000001 [1:16:52<378:52:47,  1.46it/s]

buffer size = 15442, epsilon = 0.09624
mean_reward :  0.0


  0%|          | 7522/2000001 [1:16:53<406:48:11,  1.36it/s]

buffer size = 15444, epsilon = 0.09624
mean_reward :  0.0


  0%|          | 7523/2000001 [1:16:54<415:54:41,  1.33it/s]

buffer size = 15446, epsilon = 0.09624
mean_reward :  0.0


  0%|          | 7524/2000001 [1:16:55<421:24:39,  1.31it/s]

buffer size = 15448, epsilon = 0.09624
mean_reward :  0.0


  0%|          | 7525/2000001 [1:16:56<438:47:32,  1.26it/s]

buffer size = 15450, epsilon = 0.09624
mean_reward :  0.0


  0%|          | 7526/2000001 [1:16:57<447:24:01,  1.24it/s]

buffer size = 15452, epsilon = 0.09624
mean_reward :  0.0


  0%|          | 7527/2000001 [1:16:57<439:53:14,  1.26it/s]

buffer size = 15454, epsilon = 0.09624
mean_reward :  0.0


  0%|          | 7528/2000001 [1:16:58<432:29:14,  1.28it/s]

buffer size = 15456, epsilon = 0.09624
mean_reward :  0.0


  0%|          | 7529/2000001 [1:16:59<424:17:38,  1.30it/s]

buffer size = 15458, epsilon = 0.09624
mean_reward :  0.0


  0%|          | 7530/2000001 [1:16:59<413:16:14,  1.34it/s]

buffer size = 15460, epsilon = 0.09624
mean_reward :  0.0


  0%|          | 7531/2000001 [1:17:00<403:41:44,  1.37it/s]

buffer size = 15462, epsilon = 0.09624
mean_reward :  0.0


  0%|          | 7532/2000001 [1:17:01<393:40:17,  1.41it/s]

buffer size = 15464, epsilon = 0.09623
mean_reward :  0.0


  0%|          | 7533/2000001 [1:17:02<391:18:36,  1.41it/s]

buffer size = 15466, epsilon = 0.09623
mean_reward :  0.0


  0%|          | 7534/2000001 [1:17:02<386:35:15,  1.43it/s]

buffer size = 15468, epsilon = 0.09623
mean_reward :  0.0


  0%|          | 7535/2000001 [1:17:03<386:21:39,  1.43it/s]

buffer size = 15470, epsilon = 0.09623
mean_reward :  0.0


  0%|          | 7536/2000001 [1:17:04<382:02:33,  1.45it/s]

buffer size = 15472, epsilon = 0.09623
mean_reward :  0.0


  0%|          | 7537/2000001 [1:17:04<381:48:43,  1.45it/s]

buffer size = 15474, epsilon = 0.09623
mean_reward :  0.0


  0%|          | 7538/2000001 [1:17:05<377:34:46,  1.47it/s]

buffer size = 15476, epsilon = 0.09623
mean_reward :  0.0


  0%|          | 7539/2000001 [1:17:06<378:38:48,  1.46it/s]

buffer size = 15478, epsilon = 0.09623
mean_reward :  0.0


  0%|          | 7540/2000001 [1:17:06<375:30:07,  1.47it/s]

buffer size = 15480, epsilon = 0.09623
mean_reward :  0.0


  0%|          | 7541/2000001 [1:17:07<403:16:42,  1.37it/s]

buffer size = 15482, epsilon = 0.09623
mean_reward :  0.0


  0%|          | 7542/2000001 [1:17:08<414:05:17,  1.34it/s]

buffer size = 15484, epsilon = 0.09623
mean_reward :  0.0


  0%|          | 7543/2000001 [1:17:09<416:13:51,  1.33it/s]

buffer size = 15486, epsilon = 0.09623
mean_reward :  0.0


  0%|          | 7544/2000001 [1:17:10<455:25:46,  1.22it/s]

buffer size = 15488, epsilon = 0.09623
mean_reward :  0.0


  0%|          | 7545/2000001 [1:17:10<444:49:39,  1.24it/s]

buffer size = 15490, epsilon = 0.09623
mean_reward :  0.0


  0%|          | 7546/2000001 [1:17:11<437:31:06,  1.26it/s]

buffer size = 15492, epsilon = 0.09623
mean_reward :  0.0


  0%|          | 7547/2000001 [1:17:12<433:27:16,  1.28it/s]

buffer size = 15494, epsilon = 0.09623
mean_reward :  0.0


  0%|          | 7548/2000001 [1:17:13<426:54:55,  1.30it/s]

buffer size = 15496, epsilon = 0.09623
mean_reward :  0.0


  0%|          | 7549/2000001 [1:17:13<410:39:23,  1.35it/s]

buffer size = 15498, epsilon = 0.09623
mean_reward :  0.0


  0%|          | 7550/2000001 [1:17:14<400:23:47,  1.38it/s]

buffer size = 15500, epsilon = 0.09623
mean_reward :  0.0


  0%|          | 7551/2000001 [1:17:15<390:47:03,  1.42it/s]

buffer size = 15502, epsilon = 0.09623
mean_reward :  0.0


  0%|          | 7552/2000001 [1:17:15<390:20:08,  1.42it/s]

buffer size = 15504, epsilon = 0.09622
mean_reward :  0.0


  0%|          | 7553/2000001 [1:17:16<383:43:24,  1.44it/s]

buffer size = 15506, epsilon = 0.09622
mean_reward :  0.0


  0%|          | 7554/2000001 [1:17:17<383:18:31,  1.44it/s]

buffer size = 15508, epsilon = 0.09622
mean_reward :  0.0


  0%|          | 7555/2000001 [1:17:17<381:24:51,  1.45it/s]

buffer size = 15510, epsilon = 0.09622
mean_reward :  0.0


  0%|          | 7556/2000001 [1:17:18<375:44:10,  1.47it/s]

buffer size = 15512, epsilon = 0.09622
mean_reward :  0.0


  0%|          | 7557/2000001 [1:17:19<378:58:00,  1.46it/s]

buffer size = 15514, epsilon = 0.09622
mean_reward :  0.0


  0%|          | 7558/2000001 [1:17:19<380:17:08,  1.46it/s]

buffer size = 15516, epsilon = 0.09622
mean_reward :  0.0


  0%|          | 7559/2000001 [1:17:20<394:30:10,  1.40it/s]

buffer size = 15518, epsilon = 0.09622
mean_reward :  0.0


  0%|          | 7560/2000001 [1:17:21<434:40:41,  1.27it/s]

buffer size = 15520, epsilon = 0.09622
mean_reward :  0.0


  0%|          | 7561/2000001 [1:17:22<437:53:38,  1.26it/s]

buffer size = 15522, epsilon = 0.09622
mean_reward :  0.0


  0%|          | 7562/2000001 [1:17:23<449:18:30,  1.23it/s]

buffer size = 15524, epsilon = 0.09622
mean_reward :  0.0


  0%|          | 7563/2000001 [1:17:24<452:19:34,  1.22it/s]

buffer size = 15526, epsilon = 0.09622
mean_reward :  0.0


  0%|          | 7564/2000001 [1:17:24<443:59:38,  1.25it/s]

buffer size = 15528, epsilon = 0.09622
mean_reward :  0.0


  0%|          | 7565/2000001 [1:17:25<425:51:23,  1.30it/s]

buffer size = 15530, epsilon = 0.09622
mean_reward :  0.0


  0%|          | 7566/2000001 [1:17:26<409:34:26,  1.35it/s]

buffer size = 15532, epsilon = 0.09622
mean_reward :  0.0


  0%|          | 7567/2000001 [1:17:27<400:11:41,  1.38it/s]

buffer size = 15534, epsilon = 0.09622
mean_reward :  0.0


  0%|          | 7568/2000001 [1:17:27<392:28:15,  1.41it/s]

buffer size = 15536, epsilon = 0.09622
mean_reward :  0.0


  0%|          | 7569/2000001 [1:17:28<393:08:16,  1.41it/s]

buffer size = 15538, epsilon = 0.09622
mean_reward :  0.0


  0%|          | 7570/2000001 [1:17:29<387:06:51,  1.43it/s]

buffer size = 15540, epsilon = 0.09622
mean_reward :  0.0


  0%|          | 7571/2000001 [1:17:29<387:02:00,  1.43it/s]

buffer size = 15542, epsilon = 0.09622
mean_reward :  0.0


  0%|          | 7572/2000001 [1:17:30<386:20:49,  1.43it/s]

buffer size = 15544, epsilon = 0.09621
mean_reward :  0.0


  0%|          | 7573/2000001 [1:17:31<386:05:39,  1.43it/s]

buffer size = 15546, epsilon = 0.09621
mean_reward :  0.0


  0%|          | 7574/2000001 [1:17:31<382:15:35,  1.45it/s]

buffer size = 15548, epsilon = 0.09621
mean_reward :  0.0


  0%|          | 7575/2000001 [1:17:32<380:50:17,  1.45it/s]

buffer size = 15550, epsilon = 0.09621
mean_reward :  0.0


  0%|          | 7576/2000001 [1:17:33<382:36:45,  1.45it/s]

buffer size = 15552, epsilon = 0.09621
mean_reward :  0.0


  0%|          | 7577/2000001 [1:17:33<386:54:19,  1.43it/s]

buffer size = 15554, epsilon = 0.09621
mean_reward :  0.0


  0%|          | 7578/2000001 [1:17:34<425:17:52,  1.30it/s]

buffer size = 15556, epsilon = 0.09621
mean_reward :  0.0


  0%|          | 7579/2000001 [1:17:35<428:48:46,  1.29it/s]

buffer size = 15558, epsilon = 0.09621
mean_reward :  0.0


  0%|          | 7580/2000001 [1:17:36<442:15:31,  1.25it/s]

buffer size = 15560, epsilon = 0.09621
mean_reward :  0.0


  0%|          | 7581/2000001 [1:17:37<452:05:54,  1.22it/s]

buffer size = 15562, epsilon = 0.09621
mean_reward :  0.0


  0%|          | 7582/2000001 [1:17:38<446:03:10,  1.24it/s]

buffer size = 15564, epsilon = 0.09621
mean_reward :  0.0


  0%|          | 7583/2000001 [1:17:38<439:56:29,  1.26it/s]

buffer size = 15566, epsilon = 0.09621
mean_reward :  0.0


  0%|          | 7584/2000001 [1:17:39<429:07:16,  1.29it/s]

buffer size = 15568, epsilon = 0.09621
mean_reward :  0.0


  0%|          | 7585/2000001 [1:17:40<417:38:49,  1.33it/s]

buffer size = 15570, epsilon = 0.09621
mean_reward :  0.0


  0%|          | 7586/2000001 [1:17:41<413:28:47,  1.34it/s]

buffer size = 15572, epsilon = 0.09621
mean_reward :  0.0


  0%|          | 7587/2000001 [1:17:41<399:42:00,  1.38it/s]

buffer size = 15574, epsilon = 0.09621
mean_reward :  0.0


  0%|          | 7588/2000001 [1:17:42<397:14:24,  1.39it/s]

buffer size = 15576, epsilon = 0.09621
mean_reward :  0.0


  0%|          | 7589/2000001 [1:17:43<390:42:11,  1.42it/s]

buffer size = 15578, epsilon = 0.09621
mean_reward :  0.0


  0%|          | 7590/2000001 [1:17:43<386:49:36,  1.43it/s]

buffer size = 15580, epsilon = 0.09621
mean_reward :  0.0


  0%|          | 7591/2000001 [1:17:44<385:05:03,  1.44it/s]

buffer size = 15582, epsilon = 0.09620
mean_reward :  0.0


  0%|          | 7592/2000001 [1:17:45<379:24:33,  1.46it/s]

buffer size = 15584, epsilon = 0.09620
mean_reward :  0.0


  0%|          | 7593/2000001 [1:17:45<380:07:53,  1.46it/s]

buffer size = 15586, epsilon = 0.09620
mean_reward :  0.0


  0%|          | 7594/2000001 [1:17:46<380:33:43,  1.45it/s]

buffer size = 15588, epsilon = 0.09620
mean_reward :  0.0


  0%|          | 7595/2000001 [1:17:47<381:46:32,  1.45it/s]

buffer size = 15590, epsilon = 0.09620
mean_reward :  0.0


  0%|          | 7596/2000001 [1:17:48<398:10:37,  1.39it/s]

buffer size = 15592, epsilon = 0.09620
mean_reward :  0.0


  0%|          | 7597/2000001 [1:17:48<413:42:06,  1.34it/s]

buffer size = 15594, epsilon = 0.09620
mean_reward :  0.0


  0%|          | 7598/2000001 [1:17:49<421:37:18,  1.31it/s]

buffer size = 15596, epsilon = 0.09620
mean_reward :  0.0


  0%|          | 7599/2000001 [1:17:50<431:47:28,  1.28it/s]

buffer size = 15598, epsilon = 0.09620
mean_reward :  0.0


  0%|          | 7600/2000001 [1:17:51<452:30:39,  1.22it/s]

buffer size = 15600, epsilon = 0.09620
mean_reward :  0.0


  0%|          | 7601/2000001 [1:17:52<447:49:19,  1.24it/s]

buffer size = 15602, epsilon = 0.09620
mean_reward :  0.0


  0%|          | 7602/2000001 [1:17:52<438:40:43,  1.26it/s]

buffer size = 15604, epsilon = 0.09620
mean_reward :  0.0


  0%|          | 7603/2000001 [1:17:53<418:22:01,  1.32it/s]

buffer size = 15606, epsilon = 0.09620
mean_reward :  0.0


  0%|          | 7604/2000001 [1:17:54<405:20:04,  1.37it/s]

buffer size = 15608, epsilon = 0.09620
mean_reward :  0.0


  0%|          | 7605/2000001 [1:17:54<398:42:36,  1.39it/s]

buffer size = 15610, epsilon = 0.09620
mean_reward :  0.0


  0%|          | 7606/2000001 [1:17:55<394:20:25,  1.40it/s]

buffer size = 15612, epsilon = 0.09620
mean_reward :  0.0


  0%|          | 7607/2000001 [1:17:56<387:56:21,  1.43it/s]

buffer size = 15614, epsilon = 0.09620
mean_reward :  0.0


  0%|          | 7608/2000001 [1:17:57<387:21:30,  1.43it/s]

buffer size = 15616, epsilon = 0.09620
mean_reward :  0.0


  0%|          | 7609/2000001 [1:17:57<382:38:23,  1.45it/s]

buffer size = 15618, epsilon = 0.09620
mean_reward :  0.0


  0%|          | 7610/2000001 [1:17:58<383:45:42,  1.44it/s]

buffer size = 15620, epsilon = 0.09620
mean_reward :  0.0


  0%|          | 7611/2000001 [1:17:59<382:32:05,  1.45it/s]

buffer size = 15622, epsilon = 0.09620
mean_reward :  0.0


  0%|          | 7612/2000001 [1:17:59<378:42:13,  1.46it/s]

buffer size = 15624, epsilon = 0.09619
mean_reward :  0.0


  0%|          | 7613/2000001 [1:18:00<383:14:49,  1.44it/s]

buffer size = 15626, epsilon = 0.09619
mean_reward :  0.0


  0%|          | 7614/2000001 [1:18:01<382:45:37,  1.45it/s]

buffer size = 15628, epsilon = 0.09619
mean_reward :  0.0


  0%|          | 7615/2000001 [1:18:01<407:12:06,  1.36it/s]

buffer size = 15630, epsilon = 0.09619
mean_reward :  0.0


  0%|          | 7616/2000001 [1:18:02<418:23:47,  1.32it/s]

buffer size = 15632, epsilon = 0.09619
mean_reward :  0.0


  0%|          | 7617/2000001 [1:18:03<420:38:56,  1.32it/s]

buffer size = 15634, epsilon = 0.09619
mean_reward :  0.0


  0%|          | 7618/2000001 [1:18:04<446:55:08,  1.24it/s]

buffer size = 15636, epsilon = 0.09619
mean_reward :  0.0


  0%|          | 7619/2000001 [1:18:05<450:40:18,  1.23it/s]

buffer size = 15638, epsilon = 0.09619
mean_reward :  0.0


  0%|          | 7620/2000001 [1:18:06<444:53:10,  1.24it/s]

buffer size = 15640, epsilon = 0.09619
mean_reward :  0.0


  0%|          | 7621/2000001 [1:18:06<437:40:11,  1.26it/s]

buffer size = 15642, epsilon = 0.09619
mean_reward :  0.0


  0%|          | 7622/2000001 [1:18:07<422:43:25,  1.31it/s]

buffer size = 15644, epsilon = 0.09619
mean_reward :  0.0


  0%|          | 7623/2000001 [1:18:08<414:16:18,  1.34it/s]

buffer size = 15646, epsilon = 0.09619
mean_reward :  0.0


  0%|          | 7624/2000001 [1:18:08<402:03:24,  1.38it/s]

buffer size = 15648, epsilon = 0.09619
mean_reward :  0.0


  0%|          | 7625/2000001 [1:18:09<394:39:13,  1.40it/s]

buffer size = 15650, epsilon = 0.09619
mean_reward :  0.0


  0%|          | 7626/2000001 [1:18:10<392:21:48,  1.41it/s]

buffer size = 15652, epsilon = 0.09619
mean_reward :  0.0


  0%|          | 7627/2000001 [1:18:11<387:33:29,  1.43it/s]

buffer size = 15654, epsilon = 0.09619
mean_reward :  0.0


  0%|          | 7628/2000001 [1:18:11<382:20:59,  1.45it/s]

buffer size = 15656, epsilon = 0.09619
mean_reward :  0.0


  0%|          | 7629/2000001 [1:18:12<383:24:47,  1.44it/s]

buffer size = 15658, epsilon = 0.09619
mean_reward :  0.0


  0%|          | 7630/2000001 [1:18:13<381:52:48,  1.45it/s]

buffer size = 15660, epsilon = 0.09619
mean_reward :  0.0


  0%|          | 7631/2000001 [1:18:13<377:59:24,  1.46it/s]

buffer size = 15662, epsilon = 0.09619
mean_reward :  0.0


  0%|          | 7632/2000001 [1:18:14<377:10:16,  1.47it/s]

buffer size = 15664, epsilon = 0.09618
mean_reward :  0.0


  0%|          | 7633/2000001 [1:18:15<391:14:48,  1.41it/s]

buffer size = 15666, epsilon = 0.09618
mean_reward :  0.0


  0%|          | 7634/2000001 [1:18:16<413:11:46,  1.34it/s]

buffer size = 15668, epsilon = 0.09618
mean_reward :  0.0


  0%|          | 7635/2000001 [1:18:16<417:13:15,  1.33it/s]

buffer size = 15670, epsilon = 0.09618
mean_reward :  0.0


  0%|          | 7636/2000001 [1:18:17<422:54:47,  1.31it/s]

buffer size = 15672, epsilon = 0.09618
mean_reward :  0.0


  0%|          | 7637/2000001 [1:18:18<434:49:51,  1.27it/s]

buffer size = 15674, epsilon = 0.09618
mean_reward :  0.0


  0%|          | 7638/2000001 [1:18:19<444:28:43,  1.25it/s]

buffer size = 15676, epsilon = 0.09618
mean_reward :  0.0


  0%|          | 7639/2000001 [1:18:20<439:47:57,  1.26it/s]

buffer size = 15678, epsilon = 0.09618
mean_reward :  0.0


  0%|          | 7640/2000001 [1:18:20<434:39:05,  1.27it/s]

buffer size = 15680, epsilon = 0.09618
mean_reward :  0.0


  0%|          | 7641/2000001 [1:18:21<420:40:10,  1.32it/s]

buffer size = 15682, epsilon = 0.09618
mean_reward :  0.0


  0%|          | 7642/2000001 [1:18:22<405:54:41,  1.36it/s]

buffer size = 15684, epsilon = 0.09618
mean_reward :  0.0


  0%|          | 7643/2000001 [1:18:22<394:47:19,  1.40it/s]

buffer size = 15686, epsilon = 0.09618
mean_reward :  0.0


  0%|          | 7644/2000001 [1:18:23<390:27:39,  1.42it/s]

buffer size = 15688, epsilon = 0.09618
mean_reward :  0.0


  0%|          | 7645/2000001 [1:18:24<385:28:31,  1.44it/s]

buffer size = 15690, epsilon = 0.09618
mean_reward :  0.0


  0%|          | 7646/2000001 [1:18:24<386:43:42,  1.43it/s]

buffer size = 15692, epsilon = 0.09618
mean_reward :  0.0


  0%|          | 7647/2000001 [1:18:25<377:39:48,  1.47it/s]

buffer size = 15694, epsilon = 0.09618
mean_reward :  0.0


  0%|          | 7648/2000001 [1:18:26<381:19:20,  1.45it/s]

buffer size = 15696, epsilon = 0.09618
mean_reward :  0.0


  0%|          | 7649/2000001 [1:18:26<379:56:57,  1.46it/s]

buffer size = 15698, epsilon = 0.09618
mean_reward :  0.0


  0%|          | 7650/2000001 [1:18:27<376:46:54,  1.47it/s]

buffer size = 15700, epsilon = 0.09618
mean_reward :  0.0


  0%|          | 7651/2000001 [1:18:28<380:09:07,  1.46it/s]

buffer size = 15702, epsilon = 0.09618
mean_reward :  0.0


  0%|          | 7652/2000001 [1:18:28<378:47:44,  1.46it/s]

buffer size = 15704, epsilon = 0.09617
mean_reward :  0.0


  0%|          | 7653/2000001 [1:18:29<420:35:10,  1.32it/s]

buffer size = 15706, epsilon = 0.09617
mean_reward :  0.0


  0%|          | 7654/2000001 [1:18:30<431:03:54,  1.28it/s]

buffer size = 15708, epsilon = 0.09617
mean_reward :  0.0


  0%|          | 7655/2000001 [1:18:31<447:44:02,  1.24it/s]

buffer size = 15710, epsilon = 0.09617
mean_reward :  0.0


  0%|          | 7656/2000001 [1:18:32<463:35:57,  1.19it/s]

buffer size = 15712, epsilon = 0.09617
mean_reward :  0.0


  0%|          | 7657/2000001 [1:18:33<450:51:01,  1.23it/s]

buffer size = 15714, epsilon = 0.09617
mean_reward :  0.0


  0%|          | 7658/2000001 [1:18:34<440:39:25,  1.26it/s]

buffer size = 15716, epsilon = 0.09617
mean_reward :  0.0


  0%|          | 7659/2000001 [1:18:34<428:05:53,  1.29it/s]

buffer size = 15718, epsilon = 0.09617
mean_reward :  0.0


  0%|          | 7660/2000001 [1:18:35<415:50:28,  1.33it/s]

buffer size = 15720, epsilon = 0.09617
mean_reward :  0.0


  0%|          | 7661/2000001 [1:18:36<402:44:42,  1.37it/s]

buffer size = 15722, epsilon = 0.09617
mean_reward :  0.0


  0%|          | 7662/2000001 [1:18:36<398:03:05,  1.39it/s]

buffer size = 15724, epsilon = 0.09617
mean_reward :  0.0


  0%|          | 7663/2000001 [1:18:37<399:05:19,  1.39it/s]

buffer size = 15726, epsilon = 0.09617
mean_reward :  0.0


  0%|          | 7664/2000001 [1:18:38<388:14:55,  1.43it/s]

buffer size = 15728, epsilon = 0.09617
mean_reward :  0.0


  0%|          | 7665/2000001 [1:18:38<392:11:21,  1.41it/s]

buffer size = 15730, epsilon = 0.09617
mean_reward :  0.0


  0%|          | 7666/2000001 [1:18:39<388:18:10,  1.43it/s]

buffer size = 15732, epsilon = 0.09617
mean_reward :  0.0


  0%|          | 7667/2000001 [1:18:40<381:56:40,  1.45it/s]

buffer size = 15734, epsilon = 0.09617
mean_reward :  0.0


  0%|          | 7668/2000001 [1:18:40<386:51:30,  1.43it/s]

buffer size = 15736, epsilon = 0.09617
mean_reward :  0.0


  0%|          | 7669/2000001 [1:18:41<379:01:39,  1.46it/s]

buffer size = 15738, epsilon = 0.09617
mean_reward :  0.0


  0%|          | 7670/2000001 [1:18:42<383:04:27,  1.44it/s]

buffer size = 15740, epsilon = 0.09617
mean_reward :  0.0


  0%|          | 7671/2000001 [1:18:43<403:20:59,  1.37it/s]

buffer size = 15742, epsilon = 0.09617
mean_reward :  0.0


  0%|          | 7672/2000001 [1:18:43<419:51:29,  1.32it/s]

buffer size = 15744, epsilon = 0.09616
mean_reward :  0.0


  0%|          | 7673/2000001 [1:18:44<420:21:39,  1.32it/s]

buffer size = 15746, epsilon = 0.09616
mean_reward :  0.0


  0%|          | 7674/2000001 [1:18:45<426:49:28,  1.30it/s]

buffer size = 15748, epsilon = 0.09616
mean_reward :  0.0


  0%|          | 7675/2000001 [1:18:46<440:58:41,  1.25it/s]

buffer size = 15750, epsilon = 0.09616
mean_reward :  0.0


  0%|          | 7676/2000001 [1:18:47<433:18:49,  1.28it/s]

buffer size = 15752, epsilon = 0.09616
mean_reward :  0.0


  0%|          | 7677/2000001 [1:18:47<425:37:18,  1.30it/s]

buffer size = 15754, epsilon = 0.09616
mean_reward :  0.0


  0%|          | 7678/2000001 [1:18:48<420:40:57,  1.32it/s]

buffer size = 15756, epsilon = 0.09616
mean_reward :  0.0


  0%|          | 7679/2000001 [1:18:49<421:09:31,  1.31it/s]

buffer size = 15758, epsilon = 0.09616
mean_reward :  0.0


  0%|          | 7680/2000001 [1:18:50<415:39:28,  1.33it/s]

buffer size = 15760, epsilon = 0.09616
mean_reward :  0.0


  0%|          | 7681/2000001 [1:18:50<404:25:02,  1.37it/s]

buffer size = 15762, epsilon = 0.09616
mean_reward :  0.0


  0%|          | 7682/2000001 [1:18:51<396:06:00,  1.40it/s]

buffer size = 15764, epsilon = 0.09616
mean_reward :  0.0


  0%|          | 7683/2000001 [1:18:52<394:43:20,  1.40it/s]

buffer size = 15766, epsilon = 0.09616
mean_reward :  0.0


  0%|          | 7684/2000001 [1:18:52<391:25:29,  1.41it/s]

buffer size = 15768, epsilon = 0.09616
mean_reward :  0.0


  0%|          | 7685/2000001 [1:18:53<390:04:27,  1.42it/s]

buffer size = 15770, epsilon = 0.09616
mean_reward :  0.0


  0%|          | 7686/2000001 [1:18:54<379:50:11,  1.46it/s]

buffer size = 15772, epsilon = 0.09616
mean_reward :  0.0


  0%|          | 7687/2000001 [1:18:54<386:24:38,  1.43it/s]

buffer size = 15774, epsilon = 0.09616
mean_reward :  0.0


  0%|          | 7688/2000001 [1:18:55<385:11:49,  1.44it/s]

buffer size = 15776, epsilon = 0.09616
mean_reward :  0.0


  0%|          | 7689/2000001 [1:18:56<382:29:25,  1.45it/s]

buffer size = 15778, epsilon = 0.09616
mean_reward :  0.0


  0%|          | 7690/2000001 [1:18:57<413:59:35,  1.34it/s]

buffer size = 15780, epsilon = 0.09616
mean_reward :  0.0


  0%|          | 7691/2000001 [1:18:58<432:55:46,  1.28it/s]

buffer size = 15782, epsilon = 0.09616
mean_reward :  0.0


  0%|          | 7692/2000001 [1:18:59<456:43:46,  1.21it/s]

buffer size = 15784, epsilon = 0.09615
mean_reward :  0.0


  0%|          | 7693/2000001 [1:18:59<467:53:55,  1.18it/s]

buffer size = 15786, epsilon = 0.09615
mean_reward :  0.0


  0%|          | 7694/2000001 [1:19:00<454:05:34,  1.22it/s]

buffer size = 15788, epsilon = 0.09615
mean_reward :  0.0


  0%|          | 7695/2000001 [1:19:01<448:11:23,  1.23it/s]

buffer size = 15790, epsilon = 0.09615
mean_reward :  0.0


  0%|          | 7696/2000001 [1:19:02<431:53:18,  1.28it/s]

buffer size = 15792, epsilon = 0.09615
mean_reward :  0.0


  0%|          | 7697/2000001 [1:19:02<416:44:26,  1.33it/s]

buffer size = 15794, epsilon = 0.09615
mean_reward :  0.0


  0%|          | 7698/2000001 [1:19:03<407:30:27,  1.36it/s]

buffer size = 15796, epsilon = 0.09615
mean_reward :  0.0


  0%|          | 7699/2000001 [1:19:04<404:14:18,  1.37it/s]

buffer size = 15798, epsilon = 0.09615
mean_reward :  0.0


  0%|          | 7700/2000001 [1:19:04<398:55:51,  1.39it/s]

buffer size = 15800, epsilon = 0.09615
mean_reward :  0.0


  0%|          | 7701/2000001 [1:19:05<398:56:58,  1.39it/s]

buffer size = 15802, epsilon = 0.09615
mean_reward :  0.0


  0%|          | 7702/2000001 [1:19:06<393:39:56,  1.41it/s]

buffer size = 15804, epsilon = 0.09615
mean_reward :  0.0


  0%|          | 7703/2000001 [1:19:07<387:42:03,  1.43it/s]

buffer size = 15806, epsilon = 0.09615
mean_reward :  0.0


  0%|          | 7704/2000001 [1:19:07<386:18:23,  1.43it/s]

buffer size = 15808, epsilon = 0.09615
mean_reward :  0.0


  0%|          | 7705/2000001 [1:19:08<392:22:32,  1.41it/s]

buffer size = 15810, epsilon = 0.09615
mean_reward :  0.0


  0%|          | 7706/2000001 [1:19:09<385:21:16,  1.44it/s]

buffer size = 15812, epsilon = 0.09615
mean_reward :  0.0


  0%|          | 7707/2000001 [1:19:09<398:34:58,  1.39it/s]

buffer size = 15814, epsilon = 0.09615
mean_reward :  0.0


  0%|          | 7708/2000001 [1:19:10<414:21:37,  1.34it/s]

buffer size = 15816, epsilon = 0.09615
mean_reward :  0.0


  0%|          | 7709/2000001 [1:19:11<426:49:36,  1.30it/s]

buffer size = 15818, epsilon = 0.09615
mean_reward :  0.0


  0%|          | 7710/2000001 [1:19:12<451:40:06,  1.23it/s]

buffer size = 15820, epsilon = 0.09615
mean_reward :  0.0


  0%|          | 7711/2000001 [1:19:13<462:03:07,  1.20it/s]

buffer size = 15822, epsilon = 0.09615
mean_reward :  0.0


  0%|          | 7712/2000001 [1:19:14<448:55:53,  1.23it/s]

buffer size = 15824, epsilon = 0.09614
mean_reward :  0.0


  0%|          | 7713/2000001 [1:19:14<442:58:02,  1.25it/s]

buffer size = 15826, epsilon = 0.09614
mean_reward :  0.0


  0%|          | 7714/2000001 [1:19:15<431:17:24,  1.28it/s]

buffer size = 15828, epsilon = 0.09614
mean_reward :  0.0


  0%|          | 7715/2000001 [1:19:16<420:29:49,  1.32it/s]

buffer size = 15830, epsilon = 0.09614
mean_reward :  0.0


  0%|          | 7716/2000001 [1:19:17<409:44:02,  1.35it/s]

buffer size = 15832, epsilon = 0.09614
mean_reward :  0.0


  0%|          | 7717/2000001 [1:19:17<400:33:48,  1.38it/s]

buffer size = 15834, epsilon = 0.09614
mean_reward :  0.0


  0%|          | 7718/2000001 [1:19:18<397:05:25,  1.39it/s]

buffer size = 15836, epsilon = 0.09614
mean_reward :  0.0


  0%|          | 7719/2000001 [1:19:19<391:54:21,  1.41it/s]

buffer size = 15838, epsilon = 0.09614
mean_reward :  0.0


  0%|          | 7720/2000001 [1:19:19<394:22:52,  1.40it/s]

buffer size = 15840, epsilon = 0.09614
mean_reward :  0.0


  0%|          | 7721/2000001 [1:19:20<389:17:51,  1.42it/s]

buffer size = 15842, epsilon = 0.09614
mean_reward :  0.0


  0%|          | 7722/2000001 [1:19:21<384:56:15,  1.44it/s]

buffer size = 15844, epsilon = 0.09614
mean_reward :  0.0


  0%|          | 7723/2000001 [1:19:21<383:45:14,  1.44it/s]

buffer size = 15846, epsilon = 0.09614
mean_reward :  0.0


  0%|          | 7724/2000001 [1:19:22<387:19:56,  1.43it/s]

buffer size = 15848, epsilon = 0.09614
mean_reward :  0.0


  0%|          | 7725/2000001 [1:19:23<397:55:16,  1.39it/s]

buffer size = 15850, epsilon = 0.09614
mean_reward :  0.0


  0%|          | 7726/2000001 [1:19:24<413:11:42,  1.34it/s]

buffer size = 15852, epsilon = 0.09614
mean_reward :  0.0


  0%|          | 7727/2000001 [1:19:24<419:15:04,  1.32it/s]

buffer size = 15854, epsilon = 0.09614
mean_reward :  0.0


  0%|          | 7728/2000001 [1:19:25<440:00:41,  1.26it/s]

buffer size = 15856, epsilon = 0.09614
mean_reward :  0.0


  0%|          | 7729/2000001 [1:19:26<450:30:04,  1.23it/s]

buffer size = 15858, epsilon = 0.09614
mean_reward :  0.0


  0%|          | 7730/2000001 [1:19:27<461:17:26,  1.20it/s]

buffer size = 15860, epsilon = 0.09614
mean_reward :  0.0


  0%|          | 7731/2000001 [1:19:28<454:20:32,  1.22it/s]

buffer size = 15862, epsilon = 0.09614
mean_reward :  0.0


  0%|          | 7732/2000001 [1:19:29<445:48:52,  1.24it/s]

buffer size = 15864, epsilon = 0.09613
mean_reward :  0.0


  0%|          | 7733/2000001 [1:19:29<440:42:34,  1.26it/s]

buffer size = 15866, epsilon = 0.09613
mean_reward :  0.0


  0%|          | 7734/2000001 [1:19:30<422:36:54,  1.31it/s]

buffer size = 15868, epsilon = 0.09613
mean_reward :  0.0


  0%|          | 7735/2000001 [1:19:31<417:59:06,  1.32it/s]

buffer size = 15870, epsilon = 0.09613
mean_reward :  0.0


  0%|          | 7736/2000001 [1:19:32<408:21:42,  1.36it/s]

buffer size = 15872, epsilon = 0.09613
mean_reward :  0.0


  0%|          | 7737/2000001 [1:19:32<400:00:41,  1.38it/s]

buffer size = 15874, epsilon = 0.09613
mean_reward :  0.0


  0%|          | 7738/2000001 [1:19:33<398:38:58,  1.39it/s]

buffer size = 15876, epsilon = 0.09613
mean_reward :  0.0


  0%|          | 7739/2000001 [1:19:34<394:26:41,  1.40it/s]

buffer size = 15878, epsilon = 0.09613
mean_reward :  0.0


  0%|          | 7740/2000001 [1:19:34<389:02:44,  1.42it/s]

buffer size = 15880, epsilon = 0.09613
mean_reward :  0.0


  0%|          | 7741/2000001 [1:19:35<392:47:16,  1.41it/s]

buffer size = 15882, epsilon = 0.09613
mean_reward :  0.0


  0%|          | 7742/2000001 [1:19:36<388:03:10,  1.43it/s]

buffer size = 15884, epsilon = 0.09613
mean_reward :  0.0


  0%|          | 7743/2000001 [1:19:36<396:23:59,  1.40it/s]

buffer size = 15886, epsilon = 0.09613
mean_reward :  0.0


  0%|          | 7744/2000001 [1:19:37<427:11:09,  1.30it/s]

buffer size = 15888, epsilon = 0.09613
mean_reward :  0.0


  0%|          | 7745/2000001 [1:19:38<462:49:25,  1.20it/s]

buffer size = 15890, epsilon = 0.09613
mean_reward :  0.0


  0%|          | 7746/2000001 [1:19:39<478:59:48,  1.16it/s]

buffer size = 15892, epsilon = 0.09613
mean_reward :  0.0


  0%|          | 7747/2000001 [1:19:40<463:07:30,  1.19it/s]

buffer size = 15894, epsilon = 0.09613
mean_reward :  0.0


  0%|          | 7748/2000001 [1:19:41<443:59:42,  1.25it/s]

buffer size = 15896, epsilon = 0.09613
mean_reward :  0.0


  0%|          | 7749/2000001 [1:19:41<426:06:27,  1.30it/s]

buffer size = 15898, epsilon = 0.09613
mean_reward :  0.0


  0%|          | 7750/2000001 [1:19:42<415:33:48,  1.33it/s]

buffer size = 15900, epsilon = 0.09613
mean_reward :  0.0


  0%|          | 7751/2000001 [1:19:43<408:26:55,  1.35it/s]

buffer size = 15902, epsilon = 0.09613
mean_reward :  0.0


  0%|          | 7752/2000001 [1:19:44<401:17:38,  1.38it/s]

buffer size = 15904, epsilon = 0.09612
mean_reward :  0.0


  0%|          | 7753/2000001 [1:19:44<394:32:01,  1.40it/s]

buffer size = 15906, epsilon = 0.09612
mean_reward :  0.0


  0%|          | 7754/2000001 [1:19:45<384:45:20,  1.44it/s]

buffer size = 15908, epsilon = 0.09612
mean_reward :  0.0


  0%|          | 7755/2000001 [1:19:46<385:39:05,  1.43it/s]

buffer size = 15910, epsilon = 0.09612
mean_reward :  0.0


  0%|          | 7756/2000001 [1:19:46<388:40:53,  1.42it/s]

buffer size = 15912, epsilon = 0.09612
mean_reward :  0.0


  0%|          | 7757/2000001 [1:19:47<383:45:12,  1.44it/s]

buffer size = 15914, epsilon = 0.09612
mean_reward :  0.0


  0%|          | 7758/2000001 [1:19:48<378:43:32,  1.46it/s]

buffer size = 15916, epsilon = 0.09612
mean_reward :  0.0


  0%|          | 7759/2000001 [1:19:48<377:31:19,  1.47it/s]

buffer size = 15918, epsilon = 0.09612
mean_reward :  0.0


  0%|          | 7760/2000001 [1:19:49<377:36:10,  1.47it/s]

buffer size = 15920, epsilon = 0.09612
mean_reward :  0.0


  0%|          | 7761/2000001 [1:19:50<413:07:38,  1.34it/s]

buffer size = 15922, epsilon = 0.09612
mean_reward :  0.0


  0%|          | 7762/2000001 [1:19:51<432:52:55,  1.28it/s]

buffer size = 15924, epsilon = 0.09612
mean_reward :  0.0


  0%|          | 7763/2000001 [1:19:52<444:31:56,  1.24it/s]

buffer size = 15926, epsilon = 0.09612
mean_reward :  0.0


  0%|          | 7764/2000001 [1:19:52<452:09:02,  1.22it/s]

buffer size = 15928, epsilon = 0.09612
mean_reward :  0.0


  0%|          | 7765/2000001 [1:19:53<449:29:25,  1.23it/s]

buffer size = 15930, epsilon = 0.09612
mean_reward :  0.0


  0%|          | 7766/2000001 [1:19:54<440:06:03,  1.26it/s]

buffer size = 15932, epsilon = 0.09612
mean_reward :  0.0


  0%|          | 7767/2000001 [1:19:55<434:33:08,  1.27it/s]

buffer size = 15934, epsilon = 0.09612
mean_reward :  0.0


  0%|          | 7768/2000001 [1:19:56<424:47:49,  1.30it/s]

buffer size = 15936, epsilon = 0.09612
mean_reward :  0.0


  0%|          | 7769/2000001 [1:19:56<411:59:09,  1.34it/s]

buffer size = 15938, epsilon = 0.09612
mean_reward :  0.0


  0%|          | 7770/2000001 [1:19:57<405:43:12,  1.36it/s]

buffer size = 15940, epsilon = 0.09612
mean_reward :  0.0


  0%|          | 7771/2000001 [1:19:58<396:23:45,  1.40it/s]

buffer size = 15942, epsilon = 0.09612
mean_reward :  0.0


  0%|          | 7772/2000001 [1:19:58<392:19:13,  1.41it/s]

buffer size = 15944, epsilon = 0.09611
mean_reward :  0.0


  0%|          | 7773/2000001 [1:19:59<394:30:05,  1.40it/s]

buffer size = 15946, epsilon = 0.09611
mean_reward :  0.0


  0%|          | 7774/2000001 [1:20:00<389:01:31,  1.42it/s]

buffer size = 15948, epsilon = 0.09611
mean_reward :  0.0


  0%|          | 7775/2000001 [1:20:00<393:41:36,  1.41it/s]

buffer size = 15950, epsilon = 0.09611
mean_reward :  0.0


  0%|          | 7776/2000001 [1:20:01<389:49:18,  1.42it/s]

buffer size = 15952, epsilon = 0.09611
mean_reward :  0.0


  0%|          | 7777/2000001 [1:20:02<386:16:13,  1.43it/s]

buffer size = 15954, epsilon = 0.09611
mean_reward :  0.0


  0%|          | 7778/2000001 [1:20:03<388:07:59,  1.43it/s]

buffer size = 15956, epsilon = 0.09611
mean_reward :  0.0


  0%|          | 7779/2000001 [1:20:03<405:01:59,  1.37it/s]

buffer size = 15958, epsilon = 0.09611
mean_reward :  0.0


  0%|          | 7780/2000001 [1:20:04<422:56:08,  1.31it/s]

buffer size = 15960, epsilon = 0.09611
mean_reward :  0.0


  0%|          | 7781/2000001 [1:20:05<429:24:50,  1.29it/s]

buffer size = 15962, epsilon = 0.09611
mean_reward :  0.0


  0%|          | 7782/2000001 [1:20:06<434:00:00,  1.28it/s]

buffer size = 15964, epsilon = 0.09611
mean_reward :  0.0


  0%|          | 7783/2000001 [1:20:07<441:29:29,  1.25it/s]

buffer size = 15966, epsilon = 0.09611
mean_reward :  0.0


  0%|          | 7784/2000001 [1:20:07<436:37:35,  1.27it/s]

buffer size = 15968, epsilon = 0.09611
mean_reward :  0.0


  0%|          | 7785/2000001 [1:20:08<429:23:32,  1.29it/s]

buffer size = 15970, epsilon = 0.09611
mean_reward :  0.0


  0%|          | 7786/2000001 [1:20:09<427:29:09,  1.29it/s]

buffer size = 15972, epsilon = 0.09611
mean_reward :  0.0


  0%|          | 7787/2000001 [1:20:10<428:25:30,  1.29it/s]

buffer size = 15974, epsilon = 0.09611
mean_reward :  0.0


  0%|          | 7788/2000001 [1:20:10<425:08:35,  1.30it/s]

buffer size = 15976, epsilon = 0.09611
mean_reward :  0.0


  0%|          | 7789/2000001 [1:20:11<417:15:24,  1.33it/s]

buffer size = 15978, epsilon = 0.09611
mean_reward :  0.0


  0%|          | 7790/2000001 [1:20:12<405:55:24,  1.36it/s]

buffer size = 15980, epsilon = 0.09611
mean_reward :  0.0


  0%|          | 7791/2000001 [1:20:12<398:44:44,  1.39it/s]

buffer size = 15982, epsilon = 0.09611
mean_reward :  0.0


  0%|          | 7792/2000001 [1:20:13<391:39:28,  1.41it/s]

buffer size = 15984, epsilon = 0.09610
mean_reward :  0.0


  0%|          | 7793/2000001 [1:20:14<387:56:59,  1.43it/s]

buffer size = 15986, epsilon = 0.09610
mean_reward :  0.0


  0%|          | 7794/2000001 [1:20:15<383:05:07,  1.44it/s]

buffer size = 15988, epsilon = 0.09610
mean_reward :  0.0


  0%|          | 7795/2000001 [1:20:15<384:54:29,  1.44it/s]

buffer size = 15990, epsilon = 0.09610
mean_reward :  0.0


  0%|          | 7796/2000001 [1:20:16<385:13:38,  1.44it/s]

buffer size = 15992, epsilon = 0.09610
mean_reward :  0.0


  0%|          | 7797/2000001 [1:20:17<381:34:06,  1.45it/s]

buffer size = 15994, epsilon = 0.09610
mean_reward :  0.0


  0%|          | 7798/2000001 [1:20:17<403:20:23,  1.37it/s]

buffer size = 15996, epsilon = 0.09610
mean_reward :  0.0


  0%|          | 7799/2000001 [1:20:18<416:32:20,  1.33it/s]

buffer size = 15998, epsilon = 0.09610
mean_reward :  0.0


  0%|          | 7800/2000001 [1:20:19<419:12:06,  1.32it/s]

buffer size = 16000, epsilon = 0.09610
mean_reward :  0.0


  0%|          | 7801/2000001 [1:20:20<451:52:54,  1.22it/s]

buffer size = 16002, epsilon = 0.09610
mean_reward :  0.0


  0%|          | 7802/2000001 [1:20:21<461:41:09,  1.20it/s]

buffer size = 16004, epsilon = 0.09610
mean_reward :  0.0


  0%|          | 7803/2000001 [1:20:22<447:00:23,  1.24it/s]

buffer size = 16006, epsilon = 0.09610
mean_reward :  0.0


  0%|          | 7804/2000001 [1:20:22<437:00:33,  1.27it/s]

buffer size = 16008, epsilon = 0.09610
mean_reward :  0.0


  0%|          | 7805/2000001 [1:20:23<424:32:07,  1.30it/s]

buffer size = 16010, epsilon = 0.09610
mean_reward :  0.0


  0%|          | 7806/2000001 [1:20:24<411:02:51,  1.35it/s]

buffer size = 16012, epsilon = 0.09610
mean_reward :  0.0


  0%|          | 7807/2000001 [1:20:24<401:04:12,  1.38it/s]

buffer size = 16014, epsilon = 0.09610
mean_reward :  0.0


  0%|          | 7808/2000001 [1:20:25<394:57:51,  1.40it/s]

buffer size = 16016, epsilon = 0.09610
mean_reward :  0.0


  0%|          | 7809/2000001 [1:20:26<394:18:51,  1.40it/s]

buffer size = 16018, epsilon = 0.09610
mean_reward :  0.0


  0%|          | 7810/2000001 [1:20:26<388:35:59,  1.42it/s]

buffer size = 16020, epsilon = 0.09610
mean_reward :  0.0


  0%|          | 7811/2000001 [1:20:27<386:21:36,  1.43it/s]

buffer size = 16022, epsilon = 0.09609
mean_reward :  0.0


  0%|          | 7812/2000001 [1:20:28<386:09:43,  1.43it/s]

buffer size = 16024, epsilon = 0.09609
mean_reward :  0.0


  0%|          | 7813/2000001 [1:20:29<389:09:45,  1.42it/s]

buffer size = 16026, epsilon = 0.09609
mean_reward :  0.0


  0%|          | 7814/2000001 [1:20:29<386:24:51,  1.43it/s]

buffer size = 16028, epsilon = 0.09609
mean_reward :  0.0


  0%|          | 7815/2000001 [1:20:30<387:29:44,  1.43it/s]

buffer size = 16030, epsilon = 0.09609
mean_reward :  0.0


  0%|          | 7816/2000001 [1:20:31<400:50:57,  1.38it/s]

buffer size = 16032, epsilon = 0.09609
mean_reward :  0.0


  0%|          | 7817/2000001 [1:20:32<423:14:09,  1.31it/s]

buffer size = 16034, epsilon = 0.09609
mean_reward :  0.0


  0%|          | 7818/2000001 [1:20:32<431:02:20,  1.28it/s]

buffer size = 16036, epsilon = 0.09609
mean_reward :  0.0


  0%|          | 7819/2000001 [1:20:33<443:10:18,  1.25it/s]

buffer size = 16038, epsilon = 0.09609
mean_reward :  0.0


  0%|          | 7820/2000001 [1:20:34<452:42:25,  1.22it/s]

buffer size = 16040, epsilon = 0.09609
mean_reward :  0.0


  0%|          | 7821/2000001 [1:20:35<450:17:45,  1.23it/s]

buffer size = 16042, epsilon = 0.09609
mean_reward :  0.0


  0%|          | 7822/2000001 [1:20:36<444:37:38,  1.24it/s]

buffer size = 16044, epsilon = 0.09609
mean_reward :  0.0


  0%|          | 7823/2000001 [1:20:36<438:10:13,  1.26it/s]

buffer size = 16046, epsilon = 0.09609
mean_reward :  0.0


  0%|          | 7824/2000001 [1:20:37<428:59:55,  1.29it/s]

buffer size = 16048, epsilon = 0.09609
mean_reward :  0.0


  0%|          | 7825/2000001 [1:20:38<413:12:48,  1.34it/s]

buffer size = 16050, epsilon = 0.09609
mean_reward :  0.0


  0%|          | 7826/2000001 [1:20:39<410:09:35,  1.35it/s]

buffer size = 16052, epsilon = 0.09609
mean_reward :  0.0


  0%|          | 7827/2000001 [1:20:39<407:31:45,  1.36it/s]

buffer size = 16054, epsilon = 0.09609
mean_reward :  0.0


  0%|          | 7828/2000001 [1:20:40<397:03:47,  1.39it/s]

buffer size = 16056, epsilon = 0.09609
mean_reward :  0.0


  0%|          | 7829/2000001 [1:20:41<393:48:46,  1.41it/s]

buffer size = 16058, epsilon = 0.09609
mean_reward :  0.0


  0%|          | 7830/2000001 [1:20:41<392:40:40,  1.41it/s]

buffer size = 16060, epsilon = 0.09609
mean_reward :  0.0


  0%|          | 7831/2000001 [1:20:42<389:58:44,  1.42it/s]

buffer size = 16062, epsilon = 0.09609
mean_reward :  0.0


  0%|          | 7832/2000001 [1:20:43<390:05:36,  1.42it/s]

buffer size = 16064, epsilon = 0.09608
mean_reward :  0.0


  0%|          | 7833/2000001 [1:20:44<392:03:09,  1.41it/s]

buffer size = 16066, epsilon = 0.09608
mean_reward :  0.0


  0%|          | 7834/2000001 [1:20:44<388:19:51,  1.43it/s]

buffer size = 16068, epsilon = 0.09608
mean_reward :  0.0


  0%|          | 7835/2000001 [1:20:45<409:09:02,  1.35it/s]

buffer size = 16070, epsilon = 0.09608
mean_reward :  0.0


  0%|          | 7836/2000001 [1:20:46<444:57:06,  1.24it/s]

buffer size = 16072, epsilon = 0.09608
mean_reward :  0.0


  0%|          | 7837/2000001 [1:20:47<474:28:41,  1.17it/s]

buffer size = 16074, epsilon = 0.09608
mean_reward :  0.0


  0%|          | 7838/2000001 [1:20:48<477:50:45,  1.16it/s]

buffer size = 16076, epsilon = 0.09608
mean_reward :  0.0


  0%|          | 7839/2000001 [1:20:49<459:33:05,  1.20it/s]

buffer size = 16078, epsilon = 0.09608
mean_reward :  0.0


  0%|          | 7840/2000001 [1:20:49<440:52:59,  1.26it/s]

buffer size = 16080, epsilon = 0.09608
mean_reward :  0.0


  0%|          | 7841/2000001 [1:20:50<432:36:47,  1.28it/s]

buffer size = 16082, epsilon = 0.09608
mean_reward :  0.0


  0%|          | 7842/2000001 [1:20:51<416:49:11,  1.33it/s]

buffer size = 16084, epsilon = 0.09608
mean_reward :  0.0


  0%|          | 7843/2000001 [1:20:51<408:29:32,  1.35it/s]

buffer size = 16086, epsilon = 0.09608
mean_reward :  0.0


  0%|          | 7844/2000001 [1:20:52<401:13:30,  1.38it/s]

buffer size = 16088, epsilon = 0.09608
mean_reward :  0.0


  0%|          | 7845/2000001 [1:20:53<397:46:37,  1.39it/s]

buffer size = 16090, epsilon = 0.09608
mean_reward :  0.0


  0%|          | 7846/2000001 [1:20:54<392:38:39,  1.41it/s]

buffer size = 16092, epsilon = 0.09608
mean_reward :  0.0


  0%|          | 7847/2000001 [1:20:54<390:57:15,  1.42it/s]

buffer size = 16094, epsilon = 0.09608
mean_reward :  0.0


  0%|          | 7848/2000001 [1:20:55<391:01:08,  1.42it/s]

buffer size = 16096, epsilon = 0.09608
mean_reward :  0.0


  0%|          | 7849/2000001 [1:20:56<392:15:50,  1.41it/s]

buffer size = 16098, epsilon = 0.09608
mean_reward :  0.0


  0%|          | 7850/2000001 [1:20:56<388:11:35,  1.43it/s]

buffer size = 16100, epsilon = 0.09608
mean_reward :  0.0


  0%|          | 7851/2000001 [1:20:57<383:20:22,  1.44it/s]

buffer size = 16102, epsilon = 0.09608
mean_reward :  0.0


  0%|          | 7852/2000001 [1:20:58<395:43:45,  1.40it/s]

buffer size = 16104, epsilon = 0.09607
mean_reward :  0.0


  0%|          | 7853/2000001 [1:20:59<411:37:26,  1.34it/s]

buffer size = 16106, epsilon = 0.09607
mean_reward :  0.0


  0%|          | 7854/2000001 [1:20:59<425:43:09,  1.30it/s]

buffer size = 16108, epsilon = 0.09607
mean_reward :  0.0


  0%|          | 7855/2000001 [1:21:00<438:57:02,  1.26it/s]

buffer size = 16110, epsilon = 0.09607
mean_reward :  0.0


  0%|          | 7856/2000001 [1:21:01<447:39:48,  1.24it/s]

buffer size = 16112, epsilon = 0.09607
mean_reward :  0.0


  0%|          | 7857/2000001 [1:21:02<446:04:51,  1.24it/s]

buffer size = 16114, epsilon = 0.09607
mean_reward :  0.0


  0%|          | 7858/2000001 [1:21:03<438:20:19,  1.26it/s]

buffer size = 16116, epsilon = 0.09607
mean_reward :  0.0


  0%|          | 7859/2000001 [1:21:03<431:39:33,  1.28it/s]

buffer size = 16118, epsilon = 0.09607
mean_reward :  0.0


  0%|          | 7860/2000001 [1:21:04<428:06:02,  1.29it/s]

buffer size = 16120, epsilon = 0.09607
mean_reward :  0.0


  0%|          | 7861/2000001 [1:21:05<429:34:51,  1.29it/s]

buffer size = 16122, epsilon = 0.09607
mean_reward :  0.0


  0%|          | 7862/2000001 [1:21:06<417:24:32,  1.33it/s]

buffer size = 16124, epsilon = 0.09607
mean_reward :  0.0


  0%|          | 7863/2000001 [1:21:06<411:11:02,  1.35it/s]

buffer size = 16126, epsilon = 0.09607
mean_reward :  0.0


  0%|          | 7864/2000001 [1:21:07<403:22:46,  1.37it/s]

buffer size = 16128, epsilon = 0.09607
mean_reward :  0.0


  0%|          | 7865/2000001 [1:21:08<400:49:50,  1.38it/s]

buffer size = 16130, epsilon = 0.09607
mean_reward :  0.0


  0%|          | 7866/2000001 [1:21:09<395:03:42,  1.40it/s]

buffer size = 16132, epsilon = 0.09607
mean_reward :  0.0


  0%|          | 7867/2000001 [1:21:09<392:18:24,  1.41it/s]

buffer size = 16134, epsilon = 0.09607
mean_reward :  0.0


  0%|          | 7868/2000001 [1:21:10<388:29:15,  1.42it/s]

buffer size = 16136, epsilon = 0.09607
mean_reward :  0.0


  0%|          | 7869/2000001 [1:21:11<393:26:45,  1.41it/s]

buffer size = 16138, epsilon = 0.09607
mean_reward :  0.0


  0%|          | 7870/2000001 [1:21:11<391:35:16,  1.41it/s]

buffer size = 16140, epsilon = 0.09607
mean_reward :  0.0


  0%|          | 7871/2000001 [1:21:12<404:46:30,  1.37it/s]

buffer size = 16142, epsilon = 0.09607
mean_reward :  0.0


  0%|          | 7872/2000001 [1:21:13<423:56:17,  1.31it/s]

buffer size = 16144, epsilon = 0.09606
mean_reward :  0.0


  0%|          | 7873/2000001 [1:21:14<445:20:34,  1.24it/s]

buffer size = 16146, epsilon = 0.09606
mean_reward :  0.0


  0%|          | 7874/2000001 [1:21:15<466:34:39,  1.19it/s]

buffer size = 16148, epsilon = 0.09606
mean_reward :  0.0


  0%|          | 7875/2000001 [1:21:16<459:28:39,  1.20it/s]

buffer size = 16150, epsilon = 0.09606
mean_reward :  0.0


  0%|          | 7876/2000001 [1:21:16<443:07:58,  1.25it/s]

buffer size = 16152, epsilon = 0.09606
mean_reward :  0.0


  0%|          | 7877/2000001 [1:21:17<437:40:24,  1.26it/s]

buffer size = 16154, epsilon = 0.09606
mean_reward :  0.0


  0%|          | 7878/2000001 [1:21:18<426:08:02,  1.30it/s]

buffer size = 16156, epsilon = 0.09606
mean_reward :  0.0


  0%|          | 7879/2000001 [1:21:18<409:28:27,  1.35it/s]

buffer size = 16158, epsilon = 0.09606
mean_reward :  0.0


  0%|          | 7880/2000001 [1:21:19<401:45:25,  1.38it/s]

buffer size = 16160, epsilon = 0.09606
mean_reward :  0.0


  0%|          | 7881/2000001 [1:21:20<399:34:57,  1.38it/s]

buffer size = 16162, epsilon = 0.09606
mean_reward :  0.0


  0%|          | 7882/2000001 [1:21:21<394:04:43,  1.40it/s]

buffer size = 16164, epsilon = 0.09606
mean_reward :  0.0


  0%|          | 7883/2000001 [1:21:21<394:29:55,  1.40it/s]

buffer size = 16166, epsilon = 0.09606
mean_reward :  0.0


  0%|          | 7884/2000001 [1:21:22<396:55:07,  1.39it/s]

buffer size = 16168, epsilon = 0.09606
mean_reward :  0.0


  0%|          | 7885/2000001 [1:21:23<390:49:19,  1.42it/s]

buffer size = 16170, epsilon = 0.09606
mean_reward :  0.0


  0%|          | 7886/2000001 [1:21:23<389:59:04,  1.42it/s]

buffer size = 16172, epsilon = 0.09606
mean_reward :  0.0


  0%|          | 7887/2000001 [1:21:24<387:02:55,  1.43it/s]

buffer size = 16174, epsilon = 0.09606
mean_reward :  0.0


  0%|          | 7888/2000001 [1:21:25<382:36:06,  1.45it/s]

buffer size = 16176, epsilon = 0.09606
mean_reward :  0.0


  0%|          | 7889/2000001 [1:21:26<402:59:57,  1.37it/s]

buffer size = 16178, epsilon = 0.09606
mean_reward :  0.0


  0%|          | 7890/2000001 [1:21:26<413:46:28,  1.34it/s]

buffer size = 16180, epsilon = 0.09606
mean_reward :  0.0


  0%|          | 7891/2000001 [1:21:27<423:11:16,  1.31it/s]

buffer size = 16182, epsilon = 0.09606
mean_reward :  0.0


  0%|          | 7892/2000001 [1:21:28<434:45:58,  1.27it/s]

buffer size = 16184, epsilon = 0.09605
mean_reward :  0.0


  0%|          | 7893/2000001 [1:21:29<456:22:12,  1.21it/s]

buffer size = 16186, epsilon = 0.09605
mean_reward :  0.0


  0%|          | 7894/2000001 [1:21:30<445:06:52,  1.24it/s]

buffer size = 16188, epsilon = 0.09605
mean_reward :  0.0


  0%|          | 7895/2000001 [1:21:30<440:20:48,  1.26it/s]

buffer size = 16190, epsilon = 0.09605
mean_reward :  0.0


  0%|          | 7896/2000001 [1:21:31<440:09:49,  1.26it/s]

buffer size = 16192, epsilon = 0.09605
mean_reward :  0.0


  0%|          | 7897/2000001 [1:21:32<429:50:18,  1.29it/s]

buffer size = 16194, epsilon = 0.09605
mean_reward :  0.0


  0%|          | 7898/2000001 [1:21:33<416:14:06,  1.33it/s]

buffer size = 16196, epsilon = 0.09605
mean_reward :  0.0


  0%|          | 7899/2000001 [1:21:33<405:07:46,  1.37it/s]

buffer size = 16198, epsilon = 0.09605
mean_reward :  0.0


  0%|          | 7900/2000001 [1:21:34<399:57:18,  1.38it/s]

buffer size = 16200, epsilon = 0.09605
mean_reward :  0.0


  0%|          | 7901/2000001 [1:21:35<392:44:40,  1.41it/s]

buffer size = 16202, epsilon = 0.09605
mean_reward :  0.0


  0%|          | 7902/2000001 [1:21:35<383:15:08,  1.44it/s]

buffer size = 16204, epsilon = 0.09605
mean_reward :  0.0


  0%|          | 7903/2000001 [1:21:36<384:04:19,  1.44it/s]

buffer size = 16206, epsilon = 0.09605
mean_reward :  0.0


  0%|          | 7904/2000001 [1:21:37<389:19:15,  1.42it/s]

buffer size = 16208, epsilon = 0.09605
mean_reward :  0.0


  0%|          | 7905/2000001 [1:21:37<382:39:49,  1.45it/s]

buffer size = 16210, epsilon = 0.09605
mean_reward :  0.0


  0%|          | 7906/2000001 [1:21:38<385:12:21,  1.44it/s]

buffer size = 16212, epsilon = 0.09605
mean_reward :  0.0


  0%|          | 7907/2000001 [1:21:39<390:44:00,  1.42it/s]

buffer size = 16214, epsilon = 0.09605
mean_reward :  0.0


  0%|          | 7908/2000001 [1:21:40<437:15:49,  1.27it/s]

buffer size = 16216, epsilon = 0.09605
mean_reward :  0.0


  0%|          | 7909/2000001 [1:21:41<442:52:55,  1.25it/s]

buffer size = 16218, epsilon = 0.09605
mean_reward :  0.0


  0%|          | 7910/2000001 [1:21:42<448:31:19,  1.23it/s]

buffer size = 16220, epsilon = 0.09605
mean_reward :  0.0


  0%|          | 7911/2000001 [1:21:42<461:36:33,  1.20it/s]

buffer size = 16222, epsilon = 0.09605
mean_reward :  0.0


  0%|          | 7912/2000001 [1:21:43<451:20:48,  1.23it/s]

buffer size = 16224, epsilon = 0.09604
mean_reward :  0.0


  0%|          | 7913/2000001 [1:21:44<449:04:16,  1.23it/s]

buffer size = 16226, epsilon = 0.09604
mean_reward :  0.0


  0%|          | 7914/2000001 [1:21:45<438:06:12,  1.26it/s]

buffer size = 16228, epsilon = 0.09604
mean_reward :  0.0


  0%|          | 7915/2000001 [1:21:46<432:34:36,  1.28it/s]

buffer size = 16230, epsilon = 0.09604
mean_reward :  0.0


  0%|          | 7916/2000001 [1:21:46<417:58:31,  1.32it/s]

buffer size = 16232, epsilon = 0.09604
mean_reward :  0.0


  0%|          | 7917/2000001 [1:21:47<408:46:32,  1.35it/s]

buffer size = 16234, epsilon = 0.09604
mean_reward :  0.0


  0%|          | 7918/2000001 [1:21:48<407:23:53,  1.36it/s]

buffer size = 16236, epsilon = 0.09604
mean_reward :  0.0


  0%|          | 7919/2000001 [1:21:48<406:00:14,  1.36it/s]

buffer size = 16238, epsilon = 0.09604
mean_reward :  0.0


  0%|          | 7920/2000001 [1:21:49<400:11:03,  1.38it/s]

buffer size = 16240, epsilon = 0.09604
mean_reward :  0.0


  0%|          | 7921/2000001 [1:21:50<398:30:36,  1.39it/s]

buffer size = 16242, epsilon = 0.09604
mean_reward :  0.0


  0%|          | 7922/2000001 [1:21:51<397:02:54,  1.39it/s]

buffer size = 16244, epsilon = 0.09604
mean_reward :  0.0


  0%|          | 7923/2000001 [1:21:51<392:14:20,  1.41it/s]

buffer size = 16246, epsilon = 0.09604
mean_reward :  0.0


  0%|          | 7924/2000001 [1:21:52<393:46:44,  1.41it/s]

buffer size = 16248, epsilon = 0.09604
mean_reward :  0.0


  0%|          | 7925/2000001 [1:21:53<393:57:38,  1.40it/s]

buffer size = 16250, epsilon = 0.09604
mean_reward :  0.0


  0%|          | 7926/2000001 [1:21:53<411:46:47,  1.34it/s]

buffer size = 16252, epsilon = 0.09604
mean_reward :  0.0


  0%|          | 7927/2000001 [1:21:54<425:30:57,  1.30it/s]

buffer size = 16254, epsilon = 0.09604
mean_reward :  0.0


  0%|          | 7928/2000001 [1:21:55<440:35:43,  1.26it/s]

buffer size = 16256, epsilon = 0.09604
mean_reward :  0.0


  0%|          | 7929/2000001 [1:21:56<460:53:08,  1.20it/s]

buffer size = 16258, epsilon = 0.09604
mean_reward :  0.0


  0%|          | 7930/2000001 [1:21:57<447:13:52,  1.24it/s]

buffer size = 16260, epsilon = 0.09604
mean_reward :  0.0


  0%|          | 7931/2000001 [1:21:58<439:01:45,  1.26it/s]

buffer size = 16262, epsilon = 0.09604
mean_reward :  0.0


  0%|          | 7932/2000001 [1:21:58<434:00:09,  1.27it/s]

buffer size = 16264, epsilon = 0.09603
mean_reward :  0.0


  0%|          | 7933/2000001 [1:21:59<424:05:25,  1.30it/s]

buffer size = 16266, epsilon = 0.09603
mean_reward :  0.0


  0%|          | 7934/2000001 [1:22:00<409:11:28,  1.35it/s]

buffer size = 16268, epsilon = 0.09603
mean_reward :  0.0


  0%|          | 7935/2000001 [1:22:00<400:55:05,  1.38it/s]

buffer size = 16270, epsilon = 0.09603
mean_reward :  0.0


  0%|          | 7936/2000001 [1:22:01<396:40:16,  1.39it/s]

buffer size = 16272, epsilon = 0.09603
mean_reward :  0.0


  0%|          | 7937/2000001 [1:22:02<398:33:08,  1.39it/s]

buffer size = 16274, epsilon = 0.09603
mean_reward :  0.0


  0%|          | 7938/2000001 [1:22:03<396:20:41,  1.40it/s]

buffer size = 16276, epsilon = 0.09603
mean_reward :  0.0


  0%|          | 7939/2000001 [1:22:03<391:33:29,  1.41it/s]

buffer size = 16278, epsilon = 0.09603
mean_reward :  0.0


  0%|          | 7940/2000001 [1:22:04<386:40:25,  1.43it/s]

buffer size = 16280, epsilon = 0.09603
mean_reward :  0.0


  0%|          | 7941/2000001 [1:22:05<386:44:24,  1.43it/s]

buffer size = 16282, epsilon = 0.09603
mean_reward :  0.0


  0%|          | 7942/2000001 [1:22:05<384:40:28,  1.44it/s]

buffer size = 16284, epsilon = 0.09603
mean_reward :  0.0


  0%|          | 7943/2000001 [1:22:06<384:59:04,  1.44it/s]

buffer size = 16286, epsilon = 0.09603
mean_reward :  0.0


  0%|          | 7944/2000001 [1:22:07<432:33:20,  1.28it/s]

buffer size = 16288, epsilon = 0.09603
mean_reward :  0.0


  0%|          | 7945/2000001 [1:22:08<455:37:43,  1.21it/s]

buffer size = 16290, epsilon = 0.09603
mean_reward :  0.0


  0%|          | 7946/2000001 [1:22:09<469:43:39,  1.18it/s]

buffer size = 16292, epsilon = 0.09603
mean_reward :  0.0


  0%|          | 7947/2000001 [1:22:10<481:39:21,  1.15it/s]

buffer size = 16294, epsilon = 0.09603
mean_reward :  0.0


  0%|          | 7948/2000001 [1:22:10<458:08:41,  1.21it/s]

buffer size = 16296, epsilon = 0.09603
mean_reward :  0.0


  0%|          | 7949/2000001 [1:22:11<437:30:52,  1.26it/s]

buffer size = 16298, epsilon = 0.09603
mean_reward :  0.0


  0%|          | 7950/2000001 [1:22:12<424:31:10,  1.30it/s]

buffer size = 16300, epsilon = 0.09603
mean_reward :  0.0


  0%|          | 7951/2000001 [1:22:13<410:39:37,  1.35it/s]

buffer size = 16302, epsilon = 0.09602
mean_reward :  0.0


  0%|          | 7952/2000001 [1:22:13<405:23:48,  1.36it/s]

buffer size = 16304, epsilon = 0.09602
mean_reward :  0.0


  0%|          | 7953/2000001 [1:22:14<404:21:50,  1.37it/s]

buffer size = 16306, epsilon = 0.09602
mean_reward :  0.0


  0%|          | 7954/2000001 [1:22:15<394:30:13,  1.40it/s]

buffer size = 16308, epsilon = 0.09602
mean_reward :  0.0


  0%|          | 7955/2000001 [1:22:15<391:35:08,  1.41it/s]

buffer size = 16310, epsilon = 0.09602
mean_reward :  0.0


  0%|          | 7956/2000001 [1:22:16<389:10:48,  1.42it/s]

buffer size = 16312, epsilon = 0.09602
mean_reward :  0.0


  0%|          | 7957/2000001 [1:22:17<385:15:58,  1.44it/s]

buffer size = 16314, epsilon = 0.09602
mean_reward :  0.0


  0%|          | 7958/2000001 [1:22:17<385:12:16,  1.44it/s]

buffer size = 16316, epsilon = 0.09602
mean_reward :  0.0


  0%|          | 7959/2000001 [1:22:18<390:16:31,  1.42it/s]

buffer size = 16318, epsilon = 0.09602
mean_reward :  0.0


  0%|          | 7960/2000001 [1:22:19<386:37:29,  1.43it/s]

buffer size = 16320, epsilon = 0.09602
mean_reward :  0.0


  0%|          | 7961/2000001 [1:22:20<393:23:31,  1.41it/s]

buffer size = 16322, epsilon = 0.09602
mean_reward :  0.0


  0%|          | 7962/2000001 [1:22:20<412:57:51,  1.34it/s]

buffer size = 16324, epsilon = 0.09602
mean_reward :  0.0


  0%|          | 7963/2000001 [1:22:21<433:46:44,  1.28it/s]

buffer size = 16326, epsilon = 0.09602
mean_reward :  0.0


  0%|          | 7964/2000001 [1:22:22<431:09:23,  1.28it/s]

buffer size = 16328, epsilon = 0.09602
mean_reward :  0.0


  0%|          | 7965/2000001 [1:22:23<441:43:03,  1.25it/s]

buffer size = 16330, epsilon = 0.09602
mean_reward :  0.0


  0%|          | 7966/2000001 [1:22:24<446:08:09,  1.24it/s]

buffer size = 16332, epsilon = 0.09602
mean_reward :  0.0


  0%|          | 7967/2000001 [1:22:25<443:41:53,  1.25it/s]

buffer size = 16334, epsilon = 0.09602
mean_reward :  0.0


  0%|          | 7968/2000001 [1:22:25<436:57:33,  1.27it/s]

buffer size = 16336, epsilon = 0.09602
mean_reward :  0.0


  0%|          | 7969/2000001 [1:22:26<433:03:30,  1.28it/s]

buffer size = 16338, epsilon = 0.09602
mean_reward :  0.0


  0%|          | 7970/2000001 [1:22:27<426:28:55,  1.30it/s]

buffer size = 16340, epsilon = 0.09602
mean_reward :  0.0


  0%|          | 7971/2000001 [1:22:27<413:05:23,  1.34it/s]

buffer size = 16342, epsilon = 0.09602
mean_reward :  0.0


  0%|          | 7972/2000001 [1:22:28<405:24:57,  1.36it/s]

buffer size = 16344, epsilon = 0.09601
mean_reward :  0.0


  0%|          | 7973/2000001 [1:22:29<405:26:09,  1.36it/s]

buffer size = 16346, epsilon = 0.09601
mean_reward :  0.0


  0%|          | 7974/2000001 [1:22:30<396:41:24,  1.39it/s]

buffer size = 16348, epsilon = 0.09601
mean_reward :  0.0


  0%|          | 7975/2000001 [1:22:30<394:02:45,  1.40it/s]

buffer size = 16350, epsilon = 0.09601
mean_reward :  0.0


  0%|          | 7976/2000001 [1:22:31<391:47:57,  1.41it/s]

buffer size = 16352, epsilon = 0.09601
mean_reward :  0.0


  0%|          | 7977/2000001 [1:22:32<395:38:20,  1.40it/s]

buffer size = 16354, epsilon = 0.09601
mean_reward :  0.0


  0%|          | 7978/2000001 [1:22:32<394:32:20,  1.40it/s]

buffer size = 16356, epsilon = 0.09601
mean_reward :  0.0


  0%|          | 7979/2000001 [1:22:33<392:24:40,  1.41it/s]

buffer size = 16358, epsilon = 0.09601
mean_reward :  0.0


  0%|          | 7980/2000001 [1:22:34<399:32:49,  1.38it/s]

buffer size = 16360, epsilon = 0.09601
mean_reward :  0.0


  0%|          | 7981/2000001 [1:22:35<422:12:09,  1.31it/s]

buffer size = 16362, epsilon = 0.09601
mean_reward :  0.0


  0%|          | 7982/2000001 [1:22:36<424:23:38,  1.30it/s]

buffer size = 16364, epsilon = 0.09601
mean_reward :  0.0


  0%|          | 7983/2000001 [1:22:36<440:05:44,  1.26it/s]

buffer size = 16366, epsilon = 0.09601
mean_reward :  0.0


  0%|          | 7984/2000001 [1:22:37<453:23:46,  1.22it/s]

buffer size = 16368, epsilon = 0.09601
mean_reward :  0.0


  0%|          | 7985/2000001 [1:22:38<446:33:29,  1.24it/s]

buffer size = 16370, epsilon = 0.09601
mean_reward :  0.0


  0%|          | 7986/2000001 [1:22:39<434:56:18,  1.27it/s]

buffer size = 16372, epsilon = 0.09601
mean_reward :  0.0


  0%|          | 7987/2000001 [1:22:40<429:59:46,  1.29it/s]

buffer size = 16374, epsilon = 0.09601
mean_reward :  0.0


  0%|          | 7988/2000001 [1:22:40<429:19:00,  1.29it/s]

buffer size = 16376, epsilon = 0.09601
mean_reward :  0.0


  0%|          | 7989/2000001 [1:22:41<425:40:58,  1.30it/s]

buffer size = 16378, epsilon = 0.09601
mean_reward :  0.0


  0%|          | 7990/2000001 [1:22:42<417:05:53,  1.33it/s]

buffer size = 16380, epsilon = 0.09601
mean_reward :  0.0


  0%|          | 7991/2000001 [1:22:42<406:09:06,  1.36it/s]

buffer size = 16382, epsilon = 0.09601
mean_reward :  0.0


  0%|          | 7992/2000001 [1:22:43<406:11:33,  1.36it/s]

buffer size = 16384, epsilon = 0.09600
mean_reward :  0.0


  0%|          | 7993/2000001 [1:22:44<401:39:59,  1.38it/s]

buffer size = 16386, epsilon = 0.09600
mean_reward :  0.0


  0%|          | 7994/2000001 [1:22:45<399:54:32,  1.38it/s]

buffer size = 16388, epsilon = 0.09600
mean_reward :  0.0


  0%|          | 7995/2000001 [1:22:45<393:25:56,  1.41it/s]

buffer size = 16390, epsilon = 0.09600
mean_reward :  0.0


  0%|          | 7996/2000001 [1:22:46<389:25:57,  1.42it/s]

buffer size = 16392, epsilon = 0.09600
mean_reward :  0.0


  0%|          | 7997/2000001 [1:22:47<387:51:08,  1.43it/s]

buffer size = 16394, epsilon = 0.09600
mean_reward :  0.0


  0%|          | 7998/2000001 [1:22:47<387:26:11,  1.43it/s]

buffer size = 16396, epsilon = 0.09600
mean_reward :  0.0


  0%|          | 7999/2000001 [1:22:48<410:24:04,  1.35it/s]

buffer size = 16398, epsilon = 0.09600
mean_reward :  0.0


  0%|          | 8000/2000001 [1:22:49<420:07:34,  1.32it/s]

buffer size = 16400, epsilon = 0.09600
mean_reward :  0.0


  0%|          | 8001/2000001 [1:22:50<425:01:38,  1.30it/s]

buffer size = 16402, epsilon = 0.09600
mean_reward :  0.0


  0%|          | 8002/2000001 [1:22:51<439:58:40,  1.26it/s]

buffer size = 16404, epsilon = 0.09600
mean_reward :  0.0


  0%|          | 8003/2000001 [1:22:52<453:37:30,  1.22it/s]

buffer size = 16406, epsilon = 0.09600
mean_reward :  0.0


  0%|          | 8004/2000001 [1:22:52<446:20:31,  1.24it/s]

buffer size = 16408, epsilon = 0.09600
mean_reward :  0.0


  0%|          | 8005/2000001 [1:22:53<439:26:57,  1.26it/s]

buffer size = 16410, epsilon = 0.09600
mean_reward :  0.0


  0%|          | 8006/2000001 [1:22:54<434:20:54,  1.27it/s]

buffer size = 16412, epsilon = 0.09600
mean_reward :  0.0


  0%|          | 8007/2000001 [1:22:55<435:16:31,  1.27it/s]

buffer size = 16414, epsilon = 0.09600
mean_reward :  0.0


  0%|          | 8008/2000001 [1:22:55<421:45:23,  1.31it/s]

buffer size = 16416, epsilon = 0.09600
mean_reward :  0.0


  0%|          | 8009/2000001 [1:22:56<414:44:50,  1.33it/s]

buffer size = 16418, epsilon = 0.09600
mean_reward :  0.0


  0%|          | 8010/2000001 [1:22:57<405:36:55,  1.36it/s]

buffer size = 16420, epsilon = 0.09600
mean_reward :  0.0


  0%|          | 8011/2000001 [1:22:57<399:27:30,  1.39it/s]

buffer size = 16422, epsilon = 0.09600
mean_reward :  0.0


  0%|          | 8012/2000001 [1:22:58<393:25:37,  1.41it/s]

buffer size = 16424, epsilon = 0.09599
mean_reward :  0.0


  0%|          | 8013/2000001 [1:22:59<391:57:59,  1.41it/s]

buffer size = 16426, epsilon = 0.09599
mean_reward :  0.0


  0%|          | 8014/2000001 [1:23:00<395:16:21,  1.40it/s]

buffer size = 16428, epsilon = 0.09599
mean_reward :  0.0


  0%|          | 8015/2000001 [1:23:00<392:17:21,  1.41it/s]

buffer size = 16430, epsilon = 0.09599
mean_reward :  0.0


  0%|          | 8016/2000001 [1:23:01<389:26:58,  1.42it/s]

buffer size = 16432, epsilon = 0.09599
mean_reward :  0.0


  0%|          | 8017/2000001 [1:23:02<400:33:41,  1.38it/s]

buffer size = 16434, epsilon = 0.09599
mean_reward :  0.0


  0%|          | 8018/2000001 [1:23:03<417:09:28,  1.33it/s]

buffer size = 16436, epsilon = 0.09599
mean_reward :  0.0


  0%|          | 8019/2000001 [1:23:03<420:07:13,  1.32it/s]

buffer size = 16438, epsilon = 0.09599
mean_reward :  0.0


  0%|          | 8020/2000001 [1:23:04<422:28:35,  1.31it/s]

buffer size = 16440, epsilon = 0.09599
mean_reward :  0.0


  0%|          | 8021/2000001 [1:23:05<439:34:08,  1.26it/s]

buffer size = 16442, epsilon = 0.09599
mean_reward :  0.0


  0%|          | 8022/2000001 [1:23:06<453:42:50,  1.22it/s]

buffer size = 16444, epsilon = 0.09599
mean_reward :  0.0


  0%|          | 8023/2000001 [1:23:07<456:24:59,  1.21it/s]

buffer size = 16446, epsilon = 0.09599
mean_reward :  0.0


  0%|          | 8024/2000001 [1:23:07<451:14:43,  1.23it/s]

buffer size = 16448, epsilon = 0.09599
mean_reward :  0.0


  0%|          | 8025/2000001 [1:23:08<442:08:19,  1.25it/s]

buffer size = 16450, epsilon = 0.09599
mean_reward :  0.0


  0%|          | 8026/2000001 [1:23:09<428:32:30,  1.29it/s]

buffer size = 16452, epsilon = 0.09599
mean_reward :  0.0


  0%|          | 8027/2000001 [1:23:10<417:45:30,  1.32it/s]

buffer size = 16454, epsilon = 0.09599
mean_reward :  0.0


  0%|          | 8028/2000001 [1:23:10<409:35:44,  1.35it/s]

buffer size = 16456, epsilon = 0.09599
mean_reward :  0.0


  0%|          | 8029/2000001 [1:23:11<400:11:18,  1.38it/s]

buffer size = 16458, epsilon = 0.09599
mean_reward :  0.0


  0%|          | 8030/2000001 [1:23:12<395:50:26,  1.40it/s]

buffer size = 16460, epsilon = 0.09599
mean_reward :  0.0


  0%|          | 8031/2000001 [1:23:12<396:50:59,  1.39it/s]

buffer size = 16462, epsilon = 0.09599
mean_reward :  0.0


  0%|          | 8032/2000001 [1:23:13<391:24:21,  1.41it/s]

buffer size = 16464, epsilon = 0.09598
mean_reward :  0.0


  0%|          | 8033/2000001 [1:23:14<392:08:45,  1.41it/s]

buffer size = 16466, epsilon = 0.09598
mean_reward :  0.0


  0%|          | 8034/2000001 [1:23:15<395:15:19,  1.40it/s]

buffer size = 16468, epsilon = 0.09598
mean_reward :  0.0


  0%|          | 8035/2000001 [1:23:15<390:58:30,  1.42it/s]

buffer size = 16470, epsilon = 0.09598
mean_reward :  0.0


  0%|          | 8036/2000001 [1:23:16<402:58:10,  1.37it/s]

buffer size = 16472, epsilon = 0.09598
mean_reward :  0.0


  0%|          | 8037/2000001 [1:23:17<438:14:42,  1.26it/s]

buffer size = 16474, epsilon = 0.09598
mean_reward :  0.0


  0%|          | 8038/2000001 [1:23:18<463:32:13,  1.19it/s]

buffer size = 16476, epsilon = 0.09598
mean_reward :  0.0


  0%|          | 8039/2000001 [1:23:19<472:21:52,  1.17it/s]

buffer size = 16478, epsilon = 0.09598
mean_reward :  0.0


  0%|          | 8040/2000001 [1:23:20<458:06:40,  1.21it/s]

buffer size = 16480, epsilon = 0.09598
mean_reward :  0.0


  0%|          | 8041/2000001 [1:23:20<443:14:51,  1.25it/s]

buffer size = 16482, epsilon = 0.09598
mean_reward :  0.0


  0%|          | 8042/2000001 [1:23:21<435:06:46,  1.27it/s]

buffer size = 16484, epsilon = 0.09598
mean_reward :  0.0


  0%|          | 8043/2000001 [1:23:22<424:50:29,  1.30it/s]

buffer size = 16486, epsilon = 0.09598
mean_reward :  0.0


  0%|          | 8044/2000001 [1:23:23<415:04:48,  1.33it/s]

buffer size = 16488, epsilon = 0.09598
mean_reward :  0.0


  0%|          | 8045/2000001 [1:23:23<405:15:04,  1.37it/s]

buffer size = 16490, epsilon = 0.09598
mean_reward :  0.0


  0%|          | 8046/2000001 [1:23:24<397:16:12,  1.39it/s]

buffer size = 16492, epsilon = 0.09598
mean_reward :  0.0


  0%|          | 8047/2000001 [1:23:25<389:50:18,  1.42it/s]

buffer size = 16494, epsilon = 0.09598
mean_reward :  0.0


  0%|          | 8048/2000001 [1:23:25<385:58:28,  1.43it/s]

buffer size = 16496, epsilon = 0.09598
mean_reward :  0.0


  0%|          | 8049/2000001 [1:23:26<387:14:15,  1.43it/s]

buffer size = 16498, epsilon = 0.09598
mean_reward :  0.0


  0%|          | 8050/2000001 [1:23:27<387:18:20,  1.43it/s]

buffer size = 16500, epsilon = 0.09598
mean_reward :  0.0


  0%|          | 8051/2000001 [1:23:27<386:15:43,  1.43it/s]

buffer size = 16502, epsilon = 0.09598
mean_reward :  0.0


  0%|          | 8052/2000001 [1:23:28<380:55:25,  1.45it/s]

buffer size = 16504, epsilon = 0.09597
mean_reward :  0.0


  0%|          | 8053/2000001 [1:23:29<387:58:45,  1.43it/s]

buffer size = 16506, epsilon = 0.09597
mean_reward :  0.0


  0%|          | 8054/2000001 [1:23:30<405:30:25,  1.36it/s]

buffer size = 16508, epsilon = 0.09597
mean_reward :  0.0


  0%|          | 8055/2000001 [1:23:30<417:11:04,  1.33it/s]

buffer size = 16510, epsilon = 0.09597
mean_reward :  0.0


  0%|          | 8056/2000001 [1:23:31<421:06:44,  1.31it/s]

buffer size = 16512, epsilon = 0.09597
mean_reward :  0.0


  0%|          | 8057/2000001 [1:23:32<432:02:21,  1.28it/s]

buffer size = 16514, epsilon = 0.09597
mean_reward :  0.0


  0%|          | 8058/2000001 [1:23:33<442:18:30,  1.25it/s]

buffer size = 16516, epsilon = 0.09597
mean_reward :  0.0


  0%|          | 8059/2000001 [1:23:34<432:21:43,  1.28it/s]

buffer size = 16518, epsilon = 0.09597
mean_reward :  0.0


  0%|          | 8060/2000001 [1:23:34<427:24:50,  1.29it/s]

buffer size = 16520, epsilon = 0.09597
mean_reward :  0.0


  0%|          | 8061/2000001 [1:23:35<429:47:03,  1.29it/s]

buffer size = 16522, epsilon = 0.09597
mean_reward :  0.0


  0%|          | 8062/2000001 [1:23:36<428:06:01,  1.29it/s]

buffer size = 16524, epsilon = 0.09597
mean_reward :  0.0


  0%|          | 8063/2000001 [1:23:37<425:56:16,  1.30it/s]

buffer size = 16526, epsilon = 0.09597
mean_reward :  0.0


  0%|          | 8064/2000001 [1:23:37<423:40:56,  1.31it/s]

buffer size = 16528, epsilon = 0.09597
mean_reward :  0.0


  0%|          | 8065/2000001 [1:23:38<418:28:12,  1.32it/s]

buffer size = 16530, epsilon = 0.09597
mean_reward :  0.0


  0%|          | 8066/2000001 [1:23:39<403:37:18,  1.37it/s]

buffer size = 16532, epsilon = 0.09597
mean_reward :  0.0


  0%|          | 8067/2000001 [1:23:39<402:15:51,  1.38it/s]

buffer size = 16534, epsilon = 0.09597
mean_reward :  0.0


  0%|          | 8068/2000001 [1:23:40<401:20:59,  1.38it/s]

buffer size = 16536, epsilon = 0.09597
mean_reward :  0.0


  0%|          | 8069/2000001 [1:23:41<396:24:11,  1.40it/s]

buffer size = 16538, epsilon = 0.09597
mean_reward :  0.0


  0%|          | 8070/2000001 [1:23:42<399:25:32,  1.39it/s]

buffer size = 16540, epsilon = 0.09597
mean_reward :  0.0


  0%|          | 8071/2000001 [1:23:42<397:11:37,  1.39it/s]

buffer size = 16542, epsilon = 0.09597
mean_reward :  0.0


  0%|          | 8072/2000001 [1:23:43<403:18:07,  1.37it/s]

buffer size = 16544, epsilon = 0.09596
mean_reward :  0.0


  0%|          | 8073/2000001 [1:23:44<420:37:34,  1.32it/s]

buffer size = 16546, epsilon = 0.09596
mean_reward :  0.0


  0%|          | 8074/2000001 [1:23:45<424:34:55,  1.30it/s]

buffer size = 16548, epsilon = 0.09596
mean_reward :  0.0


  0%|          | 8075/2000001 [1:23:46<437:17:13,  1.27it/s]

buffer size = 16550, epsilon = 0.09596
mean_reward :  0.0


  0%|          | 8076/2000001 [1:23:46<450:18:34,  1.23it/s]

buffer size = 16552, epsilon = 0.09596
mean_reward :  0.0


  0%|          | 8077/2000001 [1:23:47<440:19:33,  1.26it/s]

buffer size = 16554, epsilon = 0.09596
mean_reward :  0.0


  0%|          | 8078/2000001 [1:23:48<433:02:13,  1.28it/s]

buffer size = 16556, epsilon = 0.09596
mean_reward :  0.0


  0%|          | 8079/2000001 [1:23:49<429:57:30,  1.29it/s]

buffer size = 16558, epsilon = 0.09596
mean_reward :  0.0


  0%|          | 8080/2000001 [1:23:49<429:24:08,  1.29it/s]

buffer size = 16560, epsilon = 0.09596
mean_reward :  0.0


  0%|          | 8081/2000001 [1:23:50<427:55:20,  1.29it/s]

buffer size = 16562, epsilon = 0.09596
mean_reward :  0.0


  0%|          | 8082/2000001 [1:23:51<427:19:29,  1.29it/s]

buffer size = 16564, epsilon = 0.09596
mean_reward :  0.0


  0%|          | 8083/2000001 [1:23:52<415:22:09,  1.33it/s]

buffer size = 16566, epsilon = 0.09596
mean_reward :  0.0


  0%|          | 8084/2000001 [1:23:52<407:40:56,  1.36it/s]

buffer size = 16568, epsilon = 0.09596
mean_reward :  0.0


  0%|          | 8085/2000001 [1:23:53<402:25:24,  1.37it/s]

buffer size = 16570, epsilon = 0.09596
mean_reward :  0.0


  0%|          | 8086/2000001 [1:23:54<396:05:06,  1.40it/s]

buffer size = 16572, epsilon = 0.09596
mean_reward :  0.0


  0%|          | 8087/2000001 [1:23:55<404:55:08,  1.37it/s]

buffer size = 16574, epsilon = 0.09596
mean_reward :  0.0


  0%|          | 8088/2000001 [1:23:55<402:09:43,  1.38it/s]

buffer size = 16576, epsilon = 0.09596
mean_reward :  0.0


  0%|          | 8089/2000001 [1:23:56<395:37:50,  1.40it/s]

buffer size = 16578, epsilon = 0.09596
mean_reward :  0.0


  0%|          | 8090/2000001 [1:23:57<411:39:13,  1.34it/s]

buffer size = 16580, epsilon = 0.09596
mean_reward :  0.0


  0%|          | 8091/2000001 [1:23:58<422:04:09,  1.31it/s]

buffer size = 16582, epsilon = 0.09596
mean_reward :  0.0


  0%|          | 8092/2000001 [1:23:58<421:10:29,  1.31it/s]

buffer size = 16584, epsilon = 0.09595
mean_reward :  0.0


  0%|          | 8093/2000001 [1:23:59<429:10:57,  1.29it/s]

buffer size = 16586, epsilon = 0.09595
mean_reward :  0.0


  0%|          | 8094/2000001 [1:24:00<443:18:37,  1.25it/s]

buffer size = 16588, epsilon = 0.09595
mean_reward :  0.0


  0%|          | 8095/2000001 [1:24:01<440:00:57,  1.26it/s]

buffer size = 16590, epsilon = 0.09595
mean_reward :  0.0


  0%|          | 8096/2000001 [1:24:02<432:08:07,  1.28it/s]

buffer size = 16592, epsilon = 0.09595
mean_reward :  0.0


  0%|          | 8097/2000001 [1:24:02<429:14:12,  1.29it/s]

buffer size = 16594, epsilon = 0.09595
mean_reward :  0.0


  0%|          | 8098/2000001 [1:24:03<426:56:46,  1.30it/s]

buffer size = 16596, epsilon = 0.09595
mean_reward :  0.0


  0%|          | 8099/2000001 [1:24:04<423:43:29,  1.31it/s]

buffer size = 16598, epsilon = 0.09595
mean_reward :  0.0


  0%|          | 8100/2000001 [1:24:05<412:53:56,  1.34it/s]

buffer size = 16600, epsilon = 0.09595
mean_reward :  0.0


  0%|          | 8101/2000001 [1:24:05<402:25:29,  1.37it/s]

buffer size = 16602, epsilon = 0.09595
mean_reward :  0.0


  0%|          | 8102/2000001 [1:24:06<393:46:21,  1.41it/s]

buffer size = 16604, epsilon = 0.09595
mean_reward :  0.0


  0%|          | 8103/2000001 [1:24:07<392:53:59,  1.41it/s]

buffer size = 16606, epsilon = 0.09595
mean_reward :  0.0


  0%|          | 8104/2000001 [1:24:07<394:21:31,  1.40it/s]

buffer size = 16608, epsilon = 0.09595
mean_reward :  0.0


  0%|          | 8105/2000001 [1:24:08<390:30:42,  1.42it/s]

buffer size = 16610, epsilon = 0.09595
mean_reward :  0.0


  0%|          | 8106/2000001 [1:24:09<392:21:35,  1.41it/s]

buffer size = 16612, epsilon = 0.09595
mean_reward :  0.0


  0%|          | 8107/2000001 [1:24:09<392:02:25,  1.41it/s]

buffer size = 16614, epsilon = 0.09595
mean_reward :  0.0


  0%|          | 8108/2000001 [1:24:10<390:20:30,  1.42it/s]

buffer size = 16616, epsilon = 0.09595
mean_reward :  0.0


  0%|          | 8109/2000001 [1:24:11<404:06:11,  1.37it/s]

buffer size = 16618, epsilon = 0.09595
mean_reward :  0.0


  0%|          | 8110/2000001 [1:24:12<412:34:40,  1.34it/s]

buffer size = 16620, epsilon = 0.09595
mean_reward :  0.0


  0%|          | 8111/2000001 [1:24:12<417:47:16,  1.32it/s]

buffer size = 16622, epsilon = 0.09595
mean_reward :  0.0


  0%|          | 8112/2000001 [1:24:13<434:33:51,  1.27it/s]

buffer size = 16624, epsilon = 0.09594
mean_reward :  0.0


  0%|          | 8113/2000001 [1:24:14<451:15:43,  1.23it/s]

buffer size = 16626, epsilon = 0.09594
mean_reward :  0.0


  0%|          | 8114/2000001 [1:24:15<446:26:35,  1.24it/s]

buffer size = 16628, epsilon = 0.09594
mean_reward :  0.0


  0%|          | 8115/2000001 [1:24:16<443:47:10,  1.25it/s]

buffer size = 16630, epsilon = 0.09594
mean_reward :  0.0


  0%|          | 8116/2000001 [1:24:17<435:46:31,  1.27it/s]

buffer size = 16632, epsilon = 0.09594
mean_reward :  0.0


  0%|          | 8117/2000001 [1:24:17<429:03:59,  1.29it/s]

buffer size = 16634, epsilon = 0.09594
mean_reward :  0.0


  0%|          | 8118/2000001 [1:24:18<422:34:57,  1.31it/s]

buffer size = 16636, epsilon = 0.09594
mean_reward :  0.0


  0%|          | 8119/2000001 [1:24:19<412:16:24,  1.34it/s]

buffer size = 16638, epsilon = 0.09594
mean_reward :  0.0


  0%|          | 8120/2000001 [1:24:19<412:55:57,  1.34it/s]

buffer size = 16640, epsilon = 0.09594
mean_reward :  0.0


  0%|          | 8121/2000001 [1:24:20<402:30:42,  1.37it/s]

buffer size = 16642, epsilon = 0.09594
mean_reward :  0.0


  0%|          | 8122/2000001 [1:24:21<398:18:14,  1.39it/s]

buffer size = 16644, epsilon = 0.09594
mean_reward :  0.0


  0%|          | 8123/2000001 [1:24:22<395:40:42,  1.40it/s]

buffer size = 16646, epsilon = 0.09594
mean_reward :  0.0


  0%|          | 8124/2000001 [1:24:22<396:18:46,  1.40it/s]

buffer size = 16648, epsilon = 0.09594
mean_reward :  0.0


  0%|          | 8125/2000001 [1:24:23<399:20:11,  1.39it/s]

buffer size = 16650, epsilon = 0.09594
mean_reward :  0.0


  0%|          | 8126/2000001 [1:24:24<394:12:00,  1.40it/s]

buffer size = 16652, epsilon = 0.09594
mean_reward :  0.0


  0%|          | 8127/2000001 [1:24:25<415:56:23,  1.33it/s]

buffer size = 16654, epsilon = 0.09594
mean_reward :  0.0


  0%|          | 8128/2000001 [1:24:25<425:34:13,  1.30it/s]

buffer size = 16656, epsilon = 0.09594
mean_reward :  0.0


  0%|          | 8129/2000001 [1:24:26<424:52:47,  1.30it/s]

buffer size = 16658, epsilon = 0.09594
mean_reward :  0.0


  0%|          | 8130/2000001 [1:24:27<434:32:32,  1.27it/s]

buffer size = 16660, epsilon = 0.09594
mean_reward :  0.0


  0%|          | 8131/2000001 [1:24:28<443:35:39,  1.25it/s]

buffer size = 16662, epsilon = 0.09594
mean_reward :  0.0


  0%|          | 8132/2000001 [1:24:29<442:23:38,  1.25it/s]

buffer size = 16664, epsilon = 0.09593
mean_reward :  0.0


  0%|          | 8133/2000001 [1:24:29<433:09:52,  1.28it/s]

buffer size = 16666, epsilon = 0.09593
mean_reward :  0.0


  0%|          | 8134/2000001 [1:24:30<428:08:07,  1.29it/s]

buffer size = 16668, epsilon = 0.09593
mean_reward :  0.0


  0%|          | 8135/2000001 [1:24:31<427:03:49,  1.30it/s]

buffer size = 16670, epsilon = 0.09593
mean_reward :  0.0


  0%|          | 8136/2000001 [1:24:32<425:41:55,  1.30it/s]

buffer size = 16672, epsilon = 0.09593
mean_reward :  0.0


  0%|          | 8137/2000001 [1:24:32<426:30:18,  1.30it/s]

buffer size = 16674, epsilon = 0.09593
mean_reward :  0.0


  0%|          | 8138/2000001 [1:24:33<418:45:34,  1.32it/s]

buffer size = 16676, epsilon = 0.09593
mean_reward :  0.0


  0%|          | 8139/2000001 [1:24:34<414:05:07,  1.34it/s]

buffer size = 16678, epsilon = 0.09593
mean_reward :  0.0


  0%|          | 8140/2000001 [1:24:35<408:11:55,  1.36it/s]

buffer size = 16680, epsilon = 0.09593
mean_reward :  0.0


  0%|          | 8141/2000001 [1:24:35<400:43:06,  1.38it/s]

buffer size = 16682, epsilon = 0.09593
mean_reward :  0.0


  0%|          | 8142/2000001 [1:24:36<393:05:37,  1.41it/s]

buffer size = 16684, epsilon = 0.09593
mean_reward :  0.0


  0%|          | 8143/2000001 [1:24:37<392:05:07,  1.41it/s]

buffer size = 16686, epsilon = 0.09593
mean_reward :  0.0


  0%|          | 8144/2000001 [1:24:37<389:35:52,  1.42it/s]

buffer size = 16688, epsilon = 0.09593
mean_reward :  0.0


  0%|          | 8145/2000001 [1:24:38<384:24:58,  1.44it/s]

buffer size = 16690, epsilon = 0.09593
mean_reward :  0.0


  0%|          | 8146/2000001 [1:24:39<407:27:51,  1.36it/s]

buffer size = 16692, epsilon = 0.09593
mean_reward :  0.0


  0%|          | 8147/2000001 [1:24:40<419:10:42,  1.32it/s]

buffer size = 16694, epsilon = 0.09593
mean_reward :  0.0


  0%|          | 8148/2000001 [1:24:40<433:56:37,  1.28it/s]

buffer size = 16696, epsilon = 0.09593
mean_reward :  0.0


  0%|          | 8149/2000001 [1:24:41<451:15:47,  1.23it/s]

buffer size = 16698, epsilon = 0.09593
mean_reward :  0.0


  0%|          | 8150/2000001 [1:24:42<445:46:25,  1.24it/s]

buffer size = 16700, epsilon = 0.09593
mean_reward :  0.0


  0%|          | 8151/2000001 [1:24:43<437:07:58,  1.27it/s]

buffer size = 16702, epsilon = 0.09593
mean_reward :  0.0


  0%|          | 8152/2000001 [1:24:44<440:19:00,  1.26it/s]

buffer size = 16704, epsilon = 0.09592
mean_reward :  0.0


  0%|          | 8153/2000001 [1:24:44<429:19:15,  1.29it/s]

buffer size = 16706, epsilon = 0.09592
mean_reward :  0.0


  0%|          | 8154/2000001 [1:24:45<413:10:10,  1.34it/s]

buffer size = 16708, epsilon = 0.09592
mean_reward :  0.0


  0%|          | 8155/2000001 [1:24:46<404:17:13,  1.37it/s]

buffer size = 16710, epsilon = 0.09592
mean_reward :  0.0


  0%|          | 8156/2000001 [1:24:47<397:37:27,  1.39it/s]

buffer size = 16712, epsilon = 0.09592
mean_reward :  0.0


  0%|          | 8157/2000001 [1:24:47<392:19:47,  1.41it/s]

buffer size = 16714, epsilon = 0.09592
mean_reward :  0.0


  0%|          | 8158/2000001 [1:24:48<388:42:56,  1.42it/s]

buffer size = 16716, epsilon = 0.09592
mean_reward :  0.0


  0%|          | 8159/2000001 [1:24:49<388:32:41,  1.42it/s]

buffer size = 16718, epsilon = 0.09592
mean_reward :  0.0


  0%|          | 8160/2000001 [1:24:49<387:57:05,  1.43it/s]

buffer size = 16720, epsilon = 0.09592
mean_reward :  0.0


  0%|          | 8161/2000001 [1:24:50<390:03:20,  1.42it/s]

buffer size = 16722, epsilon = 0.09592
mean_reward :  0.0


  0%|          | 8162/2000001 [1:24:51<393:20:31,  1.41it/s]

buffer size = 16724, epsilon = 0.09592
mean_reward :  0.0


  0%|          | 8163/2000001 [1:24:51<399:37:16,  1.38it/s]

buffer size = 16726, epsilon = 0.09592
mean_reward :  0.0


  0%|          | 8164/2000001 [1:24:52<418:01:59,  1.32it/s]

buffer size = 16728, epsilon = 0.09592
mean_reward :  0.0


  0%|          | 8165/2000001 [1:24:53<435:26:03,  1.27it/s]

buffer size = 16730, epsilon = 0.09592
mean_reward :  0.0


  0%|          | 8166/2000001 [1:24:54<445:00:29,  1.24it/s]

buffer size = 16732, epsilon = 0.09592
mean_reward :  0.0


  0%|          | 8167/2000001 [1:24:55<463:49:26,  1.19it/s]

buffer size = 16734, epsilon = 0.09592
mean_reward :  0.0


  0%|          | 8168/2000001 [1:24:56<475:15:16,  1.16it/s]

buffer size = 16736, epsilon = 0.09592
mean_reward :  0.0


  0%|          | 8169/2000001 [1:24:57<460:14:14,  1.20it/s]

buffer size = 16738, epsilon = 0.09592
mean_reward :  0.0


  0%|          | 8170/2000001 [1:24:57<447:25:30,  1.24it/s]

buffer size = 16740, epsilon = 0.09592
mean_reward :  0.0


  0%|          | 8171/2000001 [1:24:58<438:16:11,  1.26it/s]

buffer size = 16742, epsilon = 0.09592
mean_reward :  0.0


  0%|          | 8172/2000001 [1:24:59<428:20:07,  1.29it/s]

buffer size = 16744, epsilon = 0.09591
mean_reward :  0.0


  0%|          | 8173/2000001 [1:25:00<419:13:30,  1.32it/s]

buffer size = 16746, epsilon = 0.09591
mean_reward :  0.0


  0%|          | 8174/2000001 [1:25:00<413:54:02,  1.34it/s]

buffer size = 16748, epsilon = 0.09591
mean_reward :  0.0


  0%|          | 8175/2000001 [1:25:01<403:21:38,  1.37it/s]

buffer size = 16750, epsilon = 0.09591
mean_reward :  0.0


  0%|          | 8176/2000001 [1:25:02<399:38:12,  1.38it/s]

buffer size = 16752, epsilon = 0.09591
mean_reward :  0.0


  0%|          | 8177/2000001 [1:25:02<404:09:33,  1.37it/s]

buffer size = 16754, epsilon = 0.09591
mean_reward :  0.0


  0%|          | 8178/2000001 [1:25:03<398:33:51,  1.39it/s]

buffer size = 16756, epsilon = 0.09591
mean_reward :  0.0


  0%|          | 8179/2000001 [1:25:04<397:20:24,  1.39it/s]

buffer size = 16758, epsilon = 0.09591
mean_reward :  0.0


  0%|          | 8180/2000001 [1:25:05<398:22:10,  1.39it/s]

buffer size = 16760, epsilon = 0.09591
mean_reward :  0.0


  0%|          | 8181/2000001 [1:25:05<395:15:50,  1.40it/s]

buffer size = 16762, epsilon = 0.09591
mean_reward :  0.0


  0%|          | 8182/2000001 [1:25:06<409:47:18,  1.35it/s]

buffer size = 16764, epsilon = 0.09591
mean_reward :  0.0


  0%|          | 8183/2000001 [1:25:07<436:12:09,  1.27it/s]

buffer size = 16766, epsilon = 0.09591
mean_reward :  0.0


  0%|          | 8184/2000001 [1:25:08<441:55:39,  1.25it/s]

buffer size = 16768, epsilon = 0.09591
mean_reward :  0.0


  0%|          | 8185/2000001 [1:25:09<455:23:17,  1.21it/s]

buffer size = 16770, epsilon = 0.09591
mean_reward :  0.0


  0%|          | 8186/2000001 [1:25:10<459:10:32,  1.20it/s]

buffer size = 16772, epsilon = 0.09591
mean_reward :  0.0


  0%|          | 8187/2000001 [1:25:10<449:33:38,  1.23it/s]

buffer size = 16774, epsilon = 0.09591
mean_reward :  0.0


  0%|          | 8188/2000001 [1:25:11<442:19:53,  1.25it/s]

buffer size = 16776, epsilon = 0.09591
mean_reward :  0.0


  0%|          | 8189/2000001 [1:25:12<433:53:57,  1.28it/s]

buffer size = 16778, epsilon = 0.09591
mean_reward :  0.0


  0%|          | 8190/2000001 [1:25:13<428:23:23,  1.29it/s]

buffer size = 16780, epsilon = 0.09591
mean_reward :  0.0


  0%|          | 8191/2000001 [1:25:13<418:27:34,  1.32it/s]

buffer size = 16782, epsilon = 0.09591
mean_reward :  0.0


  0%|          | 8192/2000001 [1:25:14<413:05:06,  1.34it/s]

buffer size = 16784, epsilon = 0.09590
mean_reward :  0.0


  0%|          | 8193/2000001 [1:25:15<408:44:11,  1.35it/s]

buffer size = 16786, epsilon = 0.09590
mean_reward :  0.0


  0%|          | 8194/2000001 [1:25:15<403:48:01,  1.37it/s]

buffer size = 16788, epsilon = 0.09590
mean_reward :  0.0


  0%|          | 8195/2000001 [1:25:16<400:06:49,  1.38it/s]

buffer size = 16790, epsilon = 0.09590
mean_reward :  0.0


  0%|          | 8196/2000001 [1:25:17<402:09:08,  1.38it/s]

buffer size = 16792, epsilon = 0.09590
mean_reward :  0.0


  0%|          | 8197/2000001 [1:25:18<398:07:32,  1.39it/s]

buffer size = 16794, epsilon = 0.09590
mean_reward :  0.0


  0%|          | 8198/2000001 [1:25:18<395:26:24,  1.40it/s]

buffer size = 16796, epsilon = 0.09590
mean_reward :  0.0


  0%|          | 8199/2000001 [1:25:19<395:25:37,  1.40it/s]

buffer size = 16798, epsilon = 0.09590
mean_reward :  0.0


  0%|          | 8200/2000001 [1:25:20<405:16:51,  1.37it/s]

buffer size = 16800, epsilon = 0.09590
mean_reward :  0.0


  0%|          | 8201/2000001 [1:25:21<420:44:40,  1.31it/s]

buffer size = 16802, epsilon = 0.09590
mean_reward :  0.0


  0%|          | 8202/2000001 [1:25:21<422:29:44,  1.31it/s]

buffer size = 16804, epsilon = 0.09590
mean_reward :  0.0


  0%|          | 8203/2000001 [1:25:22<432:26:58,  1.28it/s]

buffer size = 16806, epsilon = 0.09590
mean_reward :  0.0


  0%|          | 8204/2000001 [1:25:23<448:34:48,  1.23it/s]

buffer size = 16808, epsilon = 0.09590
mean_reward :  0.0


  0%|          | 8205/2000001 [1:25:24<447:11:47,  1.24it/s]

buffer size = 16810, epsilon = 0.09590
mean_reward :  0.0


  0%|          | 8206/2000001 [1:25:25<440:01:40,  1.26it/s]

buffer size = 16812, epsilon = 0.09590
mean_reward :  0.0


  0%|          | 8207/2000001 [1:25:25<430:15:44,  1.29it/s]

buffer size = 16814, epsilon = 0.09590
mean_reward :  0.0


  0%|          | 8208/2000001 [1:25:26<423:42:24,  1.31it/s]

buffer size = 16816, epsilon = 0.09590
mean_reward :  0.0


  0%|          | 8209/2000001 [1:25:27<419:17:46,  1.32it/s]

buffer size = 16818, epsilon = 0.09590
mean_reward :  0.0


  0%|          | 8210/2000001 [1:25:28<415:01:26,  1.33it/s]

buffer size = 16820, epsilon = 0.09590
mean_reward :  0.0


  0%|          | 8211/2000001 [1:25:28<417:06:48,  1.33it/s]

buffer size = 16822, epsilon = 0.09590
mean_reward :  0.0


  0%|          | 8212/2000001 [1:25:29<416:58:24,  1.33it/s]

buffer size = 16824, epsilon = 0.09589
mean_reward :  0.0


  0%|          | 8213/2000001 [1:25:30<405:15:36,  1.37it/s]

buffer size = 16826, epsilon = 0.09589
mean_reward :  0.0


  0%|          | 8214/2000001 [1:25:31<405:36:56,  1.36it/s]

buffer size = 16828, epsilon = 0.09589
mean_reward :  0.0


  0%|          | 8215/2000001 [1:25:31<400:13:11,  1.38it/s]

buffer size = 16830, epsilon = 0.09589
mean_reward :  0.0


  0%|          | 8216/2000001 [1:25:32<392:07:28,  1.41it/s]

buffer size = 16832, epsilon = 0.09589
mean_reward :  0.0


  0%|          | 8217/2000001 [1:25:33<394:28:52,  1.40it/s]

buffer size = 16834, epsilon = 0.09589
mean_reward :  0.0


  0%|          | 8218/2000001 [1:25:33<392:55:45,  1.41it/s]

buffer size = 16836, epsilon = 0.09589
mean_reward :  0.0


  0%|          | 8219/2000001 [1:25:34<411:25:37,  1.34it/s]

buffer size = 16838, epsilon = 0.09589
mean_reward :  0.0


  0%|          | 8220/2000001 [1:25:35<420:18:49,  1.32it/s]

buffer size = 16840, epsilon = 0.09589
mean_reward :  0.0


  0%|          | 8221/2000001 [1:25:36<422:58:03,  1.31it/s]

buffer size = 16842, epsilon = 0.09589
mean_reward :  0.0


  0%|          | 8222/2000001 [1:25:37<439:33:41,  1.26it/s]

buffer size = 16844, epsilon = 0.09589
mean_reward :  0.0


  0%|          | 8223/2000001 [1:25:37<448:02:08,  1.23it/s]

buffer size = 16846, epsilon = 0.09589
mean_reward :  0.0


  0%|          | 8224/2000001 [1:25:38<442:33:01,  1.25it/s]

buffer size = 16848, epsilon = 0.09589
mean_reward :  0.0


  0%|          | 8225/2000001 [1:25:39<439:56:21,  1.26it/s]

buffer size = 16850, epsilon = 0.09589
mean_reward :  0.0


  0%|          | 8226/2000001 [1:25:40<442:34:44,  1.25it/s]

buffer size = 16852, epsilon = 0.09589
mean_reward :  0.0


  0%|          | 8227/2000001 [1:25:41<438:34:13,  1.26it/s]

buffer size = 16854, epsilon = 0.09589
mean_reward :  0.0


  0%|          | 8228/2000001 [1:25:41<437:33:09,  1.26it/s]

buffer size = 16856, epsilon = 0.09589
mean_reward :  0.0


  0%|          | 8229/2000001 [1:25:42<431:20:14,  1.28it/s]

buffer size = 16858, epsilon = 0.09589
mean_reward :  0.0


  0%|          | 8230/2000001 [1:25:43<426:54:11,  1.30it/s]

buffer size = 16860, epsilon = 0.09589
mean_reward :  0.0


  0%|          | 8231/2000001 [1:25:44<415:36:08,  1.33it/s]

buffer size = 16862, epsilon = 0.09589
mean_reward :  0.0


  0%|          | 8232/2000001 [1:25:44<407:50:51,  1.36it/s]

buffer size = 16864, epsilon = 0.09588
mean_reward :  0.0


  0%|          | 8233/2000001 [1:25:45<402:55:29,  1.37it/s]

buffer size = 16866, epsilon = 0.09588
mean_reward :  0.0


  0%|          | 8234/2000001 [1:25:46<399:40:26,  1.38it/s]

buffer size = 16868, epsilon = 0.09588
mean_reward :  0.0


  0%|          | 8235/2000001 [1:25:46<401:26:00,  1.38it/s]

buffer size = 16870, epsilon = 0.09588
mean_reward :  0.0


  0%|          | 8236/2000001 [1:25:47<398:09:21,  1.39it/s]

buffer size = 16872, epsilon = 0.09588
mean_reward :  0.0


  0%|          | 8237/2000001 [1:25:48<409:30:00,  1.35it/s]

buffer size = 16874, epsilon = 0.09588
mean_reward :  0.0


  0%|          | 8238/2000001 [1:25:49<421:23:01,  1.31it/s]

buffer size = 16876, epsilon = 0.09588
mean_reward :  0.0


  0%|          | 8239/2000001 [1:25:50<430:33:05,  1.29it/s]

buffer size = 16878, epsilon = 0.09588
mean_reward :  0.0


  0%|          | 8240/2000001 [1:25:50<435:32:07,  1.27it/s]

buffer size = 16880, epsilon = 0.09588
mean_reward :  0.0


  0%|          | 8241/2000001 [1:25:51<446:23:49,  1.24it/s]

buffer size = 16882, epsilon = 0.09588
mean_reward :  0.0


  0%|          | 8242/2000001 [1:25:52<439:44:39,  1.26it/s]

buffer size = 16884, epsilon = 0.09588
mean_reward :  0.0


  0%|          | 8243/2000001 [1:25:53<432:07:45,  1.28it/s]

buffer size = 16886, epsilon = 0.09588
mean_reward :  0.0


  0%|          | 8244/2000001 [1:25:53<424:04:42,  1.30it/s]

buffer size = 16888, epsilon = 0.09588
mean_reward :  0.0


  0%|          | 8245/2000001 [1:25:54<419:30:59,  1.32it/s]

buffer size = 16890, epsilon = 0.09588
mean_reward :  0.0


  0%|          | 8246/2000001 [1:25:55<421:00:26,  1.31it/s]

buffer size = 16892, epsilon = 0.09588
mean_reward :  0.0


  0%|          | 8247/2000001 [1:25:56<418:50:09,  1.32it/s]

buffer size = 16894, epsilon = 0.09588
mean_reward :  0.0


  0%|          | 8248/2000001 [1:25:56<413:33:55,  1.34it/s]

buffer size = 16896, epsilon = 0.09588
mean_reward :  0.0


  0%|          | 8249/2000001 [1:25:57<414:12:33,  1.34it/s]

buffer size = 16898, epsilon = 0.09588
mean_reward :  0.0


  0%|          | 8250/2000001 [1:25:58<413:32:46,  1.34it/s]

buffer size = 16900, epsilon = 0.09588
mean_reward :  0.0


  0%|          | 8251/2000001 [1:25:59<409:01:58,  1.35it/s]

buffer size = 16902, epsilon = 0.09588
mean_reward :  0.0


  0%|          | 8252/2000001 [1:25:59<404:58:17,  1.37it/s]

buffer size = 16904, epsilon = 0.09587
mean_reward :  0.0


  0%|          | 8253/2000001 [1:26:00<401:11:04,  1.38it/s]

buffer size = 16906, epsilon = 0.09587
mean_reward :  0.0


  0%|          | 8254/2000001 [1:26:01<401:23:37,  1.38it/s]

buffer size = 16908, epsilon = 0.09587
mean_reward :  0.0


  0%|          | 8255/2000001 [1:26:02<406:53:13,  1.36it/s]

buffer size = 16910, epsilon = 0.09587
mean_reward :  0.0


  0%|          | 8256/2000001 [1:26:02<433:09:13,  1.28it/s]

buffer size = 16912, epsilon = 0.09587
mean_reward :  0.0


  0%|          | 8257/2000001 [1:26:03<447:54:58,  1.24it/s]

buffer size = 16914, epsilon = 0.09587
mean_reward :  0.0


  0%|          | 8258/2000001 [1:26:04<476:32:34,  1.16it/s]

buffer size = 16916, epsilon = 0.09587
mean_reward :  0.0


  0%|          | 8259/2000001 [1:26:05<461:00:45,  1.20it/s]

buffer size = 16918, epsilon = 0.09587
mean_reward :  0.0


  0%|          | 8260/2000001 [1:26:06<450:42:59,  1.23it/s]

buffer size = 16920, epsilon = 0.09587
mean_reward :  0.0


  0%|          | 8261/2000001 [1:26:07<444:22:45,  1.25it/s]

buffer size = 16922, epsilon = 0.09587
mean_reward :  0.0


  0%|          | 8262/2000001 [1:26:07<439:07:06,  1.26it/s]

buffer size = 16924, epsilon = 0.09587
mean_reward :  0.0


  0%|          | 8263/2000001 [1:26:08<431:49:19,  1.28it/s]

buffer size = 16926, epsilon = 0.09587
mean_reward :  0.0


  0%|          | 8264/2000001 [1:26:09<425:44:56,  1.30it/s]

buffer size = 16928, epsilon = 0.09587
mean_reward :  0.0


  0%|          | 8265/2000001 [1:26:10<422:01:47,  1.31it/s]

buffer size = 16930, epsilon = 0.09587
mean_reward :  0.0


  0%|          | 8266/2000001 [1:26:10<414:47:07,  1.33it/s]

buffer size = 16932, epsilon = 0.09587
mean_reward :  0.0


  0%|          | 8267/2000001 [1:26:11<408:11:14,  1.36it/s]

buffer size = 16934, epsilon = 0.09587
mean_reward :  0.0


  0%|          | 8268/2000001 [1:26:12<404:15:14,  1.37it/s]

buffer size = 16936, epsilon = 0.09587
mean_reward :  0.0


  0%|          | 8269/2000001 [1:26:12<400:14:22,  1.38it/s]

buffer size = 16938, epsilon = 0.09587
mean_reward :  0.0


  0%|          | 8270/2000001 [1:26:13<398:13:40,  1.39it/s]

buffer size = 16940, epsilon = 0.09587
mean_reward :  0.0


  0%|          | 8271/2000001 [1:26:14<397:36:19,  1.39it/s]

buffer size = 16942, epsilon = 0.09587
mean_reward :  0.0


  0%|          | 8272/2000001 [1:26:15<402:44:28,  1.37it/s]

buffer size = 16944, epsilon = 0.09586
mean_reward :  0.0


  0%|          | 8273/2000001 [1:26:15<418:14:04,  1.32it/s]

buffer size = 16946, epsilon = 0.09586
mean_reward :  0.0


  0%|          | 8274/2000001 [1:26:16<421:13:25,  1.31it/s]

buffer size = 16948, epsilon = 0.09586
mean_reward :  0.0


  0%|          | 8275/2000001 [1:26:17<426:59:13,  1.30it/s]

buffer size = 16950, epsilon = 0.09586
mean_reward :  0.0


  0%|          | 8276/2000001 [1:26:18<432:36:11,  1.28it/s]

buffer size = 16952, epsilon = 0.09586
mean_reward :  0.0


  0%|          | 8277/2000001 [1:26:19<444:34:32,  1.24it/s]

buffer size = 16954, epsilon = 0.09586
mean_reward :  0.0


  0%|          | 8278/2000001 [1:26:19<441:02:12,  1.25it/s]

buffer size = 16956, epsilon = 0.09586
mean_reward :  0.0


  0%|          | 8279/2000001 [1:26:20<439:40:03,  1.26it/s]

buffer size = 16958, epsilon = 0.09586
mean_reward :  0.0


  0%|          | 8280/2000001 [1:26:21<430:41:32,  1.28it/s]

buffer size = 16960, epsilon = 0.09586
mean_reward :  0.0


  0%|          | 8281/2000001 [1:26:22<427:53:14,  1.29it/s]

buffer size = 16962, epsilon = 0.09586
mean_reward :  0.0


  0%|          | 8282/2000001 [1:26:23<423:36:54,  1.31it/s]

buffer size = 16964, epsilon = 0.09586
mean_reward :  0.0


  0%|          | 8283/2000001 [1:26:23<416:23:33,  1.33it/s]

buffer size = 16966, epsilon = 0.09586
mean_reward :  0.0


  0%|          | 8284/2000001 [1:26:24<416:08:15,  1.33it/s]

buffer size = 16968, epsilon = 0.09586
mean_reward :  0.0


  0%|          | 8285/2000001 [1:26:25<416:06:31,  1.33it/s]

buffer size = 16970, epsilon = 0.09586
mean_reward :  0.0


  0%|          | 8286/2000001 [1:26:25<413:04:50,  1.34it/s]

buffer size = 16972, epsilon = 0.09586
mean_reward :  0.0


  0%|          | 8287/2000001 [1:26:26<411:24:34,  1.34it/s]

buffer size = 16974, epsilon = 0.09586
mean_reward :  0.0


  0%|          | 8288/2000001 [1:26:27<404:24:08,  1.37it/s]

buffer size = 16976, epsilon = 0.09586
mean_reward :  0.0


  0%|          | 8289/2000001 [1:26:28<398:51:32,  1.39it/s]

buffer size = 16978, epsilon = 0.09586
mean_reward :  0.0


  0%|          | 8290/2000001 [1:26:28<394:53:25,  1.40it/s]

buffer size = 16980, epsilon = 0.09586
mean_reward :  0.0


  0%|          | 8291/2000001 [1:26:29<410:59:29,  1.35it/s]

buffer size = 16982, epsilon = 0.09586
mean_reward :  0.0


  0%|          | 8292/2000001 [1:26:30<420:20:37,  1.32it/s]

buffer size = 16984, epsilon = 0.09585
mean_reward :  0.0


  0%|          | 8293/2000001 [1:26:31<426:17:21,  1.30it/s]

buffer size = 16986, epsilon = 0.09585
mean_reward :  0.0


  0%|          | 8294/2000001 [1:26:32<441:06:55,  1.25it/s]

buffer size = 16988, epsilon = 0.09585
mean_reward :  0.0


  0%|          | 8295/2000001 [1:26:32<450:32:35,  1.23it/s]

buffer size = 16990, epsilon = 0.09585
mean_reward :  0.0


  0%|          | 8296/2000001 [1:26:33<443:04:27,  1.25it/s]

buffer size = 16992, epsilon = 0.09585
mean_reward :  0.0


  0%|          | 8297/2000001 [1:26:34<438:06:56,  1.26it/s]

buffer size = 16994, epsilon = 0.09585
mean_reward :  0.0


  0%|          | 8298/2000001 [1:26:35<435:11:06,  1.27it/s]

buffer size = 16996, epsilon = 0.09585
mean_reward :  0.0


  0%|          | 8299/2000001 [1:26:36<427:24:51,  1.29it/s]

buffer size = 16998, epsilon = 0.09585
mean_reward :  0.0


  0%|          | 8300/2000001 [1:26:36<426:22:08,  1.30it/s]

buffer size = 17000, epsilon = 0.09585
mean_reward :  0.0


  0%|          | 8301/2000001 [1:26:37<422:53:01,  1.31it/s]

buffer size = 17002, epsilon = 0.09585
mean_reward :  0.0


  0%|          | 8302/2000001 [1:26:38<418:28:34,  1.32it/s]

buffer size = 17004, epsilon = 0.09585
mean_reward :  0.0


  0%|          | 8303/2000001 [1:26:39<418:14:48,  1.32it/s]

buffer size = 17006, epsilon = 0.09585
mean_reward :  0.0


  0%|          | 8304/2000001 [1:26:39<413:45:20,  1.34it/s]

buffer size = 17008, epsilon = 0.09585
mean_reward :  0.0


  0%|          | 8305/2000001 [1:26:40<414:23:15,  1.34it/s]

buffer size = 17010, epsilon = 0.09585
mean_reward :  0.0


  0%|          | 8306/2000001 [1:26:41<411:52:54,  1.34it/s]

buffer size = 17012, epsilon = 0.09585
mean_reward :  0.0


  0%|          | 8307/2000001 [1:26:41<405:09:53,  1.37it/s]

buffer size = 17014, epsilon = 0.09585
mean_reward :  0.0


  0%|          | 8308/2000001 [1:26:42<400:56:14,  1.38it/s]

buffer size = 17016, epsilon = 0.09585
mean_reward :  0.0


  0%|          | 8309/2000001 [1:26:43<410:17:14,  1.35it/s]

buffer size = 17018, epsilon = 0.09585
mean_reward :  0.0


  0%|          | 8310/2000001 [1:26:44<431:08:26,  1.28it/s]

buffer size = 17020, epsilon = 0.09585
mean_reward :  0.0


  0%|          | 8311/2000001 [1:26:45<426:29:25,  1.30it/s]

buffer size = 17022, epsilon = 0.09584
mean_reward :  0.0


  0%|          | 8312/2000001 [1:26:45<435:52:15,  1.27it/s]

buffer size = 17024, epsilon = 0.09584
mean_reward :  0.0


  0%|          | 8313/2000001 [1:26:46<442:34:06,  1.25it/s]

buffer size = 17026, epsilon = 0.09584
mean_reward :  0.0


  0%|          | 8314/2000001 [1:26:47<434:09:11,  1.27it/s]

buffer size = 17028, epsilon = 0.09584
mean_reward :  0.0


  0%|          | 8315/2000001 [1:26:48<425:52:20,  1.30it/s]

buffer size = 17030, epsilon = 0.09584
mean_reward :  0.0


  0%|          | 8316/2000001 [1:26:48<422:10:00,  1.31it/s]

buffer size = 17032, epsilon = 0.09584
mean_reward :  0.0


  0%|          | 8317/2000001 [1:26:49<420:55:08,  1.31it/s]

buffer size = 17034, epsilon = 0.09584
mean_reward :  0.0


  0%|          | 8318/2000001 [1:26:50<419:56:27,  1.32it/s]

buffer size = 17036, epsilon = 0.09584
mean_reward :  0.0


  0%|          | 8319/2000001 [1:26:51<421:03:58,  1.31it/s]

buffer size = 17038, epsilon = 0.09584
mean_reward :  0.0


  0%|          | 8320/2000001 [1:26:51<422:28:24,  1.31it/s]

buffer size = 17040, epsilon = 0.09584
mean_reward :  0.0


  0%|          | 8321/2000001 [1:26:52<422:36:50,  1.31it/s]

buffer size = 17042, epsilon = 0.09584
mean_reward :  0.0


  0%|          | 8322/2000001 [1:26:53<419:35:31,  1.32it/s]

buffer size = 17044, epsilon = 0.09584
mean_reward :  0.0


  0%|          | 8323/2000001 [1:26:54<420:06:58,  1.32it/s]

buffer size = 17046, epsilon = 0.09584
mean_reward :  0.0


  0%|          | 8324/2000001 [1:26:55<422:22:59,  1.31it/s]

buffer size = 17048, epsilon = 0.09584
mean_reward :  0.0


  0%|          | 8325/2000001 [1:26:55<420:44:49,  1.31it/s]

buffer size = 17050, epsilon = 0.09584
mean_reward :  0.0


  0%|          | 8326/2000001 [1:26:56<413:03:58,  1.34it/s]

buffer size = 17052, epsilon = 0.09584
mean_reward :  0.0


  0%|          | 8327/2000001 [1:26:57<428:47:44,  1.29it/s]

buffer size = 17054, epsilon = 0.09584
mean_reward :  0.0


  0%|          | 8328/2000001 [1:26:58<431:32:13,  1.28it/s]

buffer size = 17056, epsilon = 0.09584
mean_reward :  0.0


  0%|          | 8329/2000001 [1:26:59<454:37:18,  1.22it/s]

buffer size = 17058, epsilon = 0.09584
mean_reward :  0.0


  0%|          | 8330/2000001 [1:26:59<465:55:28,  1.19it/s]

buffer size = 17060, epsilon = 0.09584
mean_reward :  0.0


  0%|          | 8331/2000001 [1:27:00<461:32:23,  1.20it/s]

buffer size = 17062, epsilon = 0.09584
mean_reward :  0.0


  0%|          | 8332/2000001 [1:27:01<446:24:58,  1.24it/s]

buffer size = 17064, epsilon = 0.09583
mean_reward :  0.0


  0%|          | 8333/2000001 [1:27:02<434:21:35,  1.27it/s]

buffer size = 17066, epsilon = 0.09583
mean_reward :  0.0


  0%|          | 8334/2000001 [1:27:02<429:53:32,  1.29it/s]

buffer size = 17068, epsilon = 0.09583
mean_reward :  0.0


  0%|          | 8335/2000001 [1:27:03<423:25:52,  1.31it/s]

buffer size = 17070, epsilon = 0.09583
mean_reward :  0.0


  0%|          | 8336/2000001 [1:27:04<417:18:48,  1.33it/s]

buffer size = 17072, epsilon = 0.09583
mean_reward :  0.0


  0%|          | 8337/2000001 [1:27:05<417:37:22,  1.32it/s]

buffer size = 17074, epsilon = 0.09583
mean_reward :  0.0


  0%|          | 8338/2000001 [1:27:05<416:30:07,  1.33it/s]

buffer size = 17076, epsilon = 0.09583
mean_reward :  0.0


  0%|          | 8339/2000001 [1:27:06<415:55:34,  1.33it/s]

buffer size = 17078, epsilon = 0.09583
mean_reward :  0.0


  0%|          | 8340/2000001 [1:27:07<416:37:08,  1.33it/s]

buffer size = 17080, epsilon = 0.09583
mean_reward :  0.0


  0%|          | 8341/2000001 [1:27:08<414:39:45,  1.33it/s]

buffer size = 17082, epsilon = 0.09583
mean_reward :  0.0


  0%|          | 8342/2000001 [1:27:08<408:17:39,  1.35it/s]

buffer size = 17084, epsilon = 0.09583
mean_reward :  0.0


  0%|          | 8343/2000001 [1:27:09<406:21:18,  1.36it/s]

buffer size = 17086, epsilon = 0.09583
mean_reward :  0.0


  0%|          | 8344/2000001 [1:27:10<410:31:44,  1.35it/s]

buffer size = 17088, epsilon = 0.09583
mean_reward :  0.0


  0%|          | 8345/2000001 [1:27:11<418:53:14,  1.32it/s]

buffer size = 17090, epsilon = 0.09583
mean_reward :  0.0


  0%|          | 8346/2000001 [1:27:11<421:10:22,  1.31it/s]

buffer size = 17092, epsilon = 0.09583
mean_reward :  0.0


  0%|          | 8347/2000001 [1:27:12<423:23:32,  1.31it/s]

buffer size = 17094, epsilon = 0.09583
mean_reward :  0.0


  0%|          | 8348/2000001 [1:27:13<437:36:54,  1.26it/s]

buffer size = 17096, epsilon = 0.09583
mean_reward :  0.0


  0%|          | 8349/2000001 [1:27:14<440:50:26,  1.25it/s]

buffer size = 17098, epsilon = 0.09583
mean_reward :  0.0


  0%|          | 8350/2000001 [1:27:15<428:35:28,  1.29it/s]

buffer size = 17100, epsilon = 0.09583
mean_reward :  0.0


  0%|          | 8351/2000001 [1:27:15<424:11:12,  1.30it/s]

buffer size = 17102, epsilon = 0.09583
mean_reward :  0.0


  0%|          | 8352/2000001 [1:27:16<418:20:04,  1.32it/s]

buffer size = 17104, epsilon = 0.09582
mean_reward :  0.0


  0%|          | 8353/2000001 [1:27:17<414:09:00,  1.34it/s]

buffer size = 17106, epsilon = 0.09582
mean_reward :  0.0


  0%|          | 8354/2000001 [1:27:18<411:52:25,  1.34it/s]

buffer size = 17108, epsilon = 0.09582
mean_reward :  0.0


  0%|          | 8355/2000001 [1:27:18<409:53:06,  1.35it/s]

buffer size = 17110, epsilon = 0.09582
mean_reward :  0.0


  0%|          | 8356/2000001 [1:27:19<409:19:11,  1.35it/s]

buffer size = 17112, epsilon = 0.09582
mean_reward :  0.0


  0%|          | 8357/2000001 [1:27:20<410:48:32,  1.35it/s]

buffer size = 17114, epsilon = 0.09582
mean_reward :  0.0


  0%|          | 8358/2000001 [1:27:21<410:35:22,  1.35it/s]

buffer size = 17116, epsilon = 0.09582
mean_reward :  0.0


  0%|          | 8359/2000001 [1:27:21<409:39:59,  1.35it/s]

buffer size = 17118, epsilon = 0.09582
mean_reward :  0.0


  0%|          | 8360/2000001 [1:27:22<410:23:03,  1.35it/s]

buffer size = 17120, epsilon = 0.09582
mean_reward :  0.0


  0%|          | 8361/2000001 [1:27:23<409:12:12,  1.35it/s]

buffer size = 17122, epsilon = 0.09582
mean_reward :  0.0


  0%|          | 8362/2000001 [1:27:23<413:50:23,  1.34it/s]

buffer size = 17124, epsilon = 0.09582
mean_reward :  0.0


  0%|          | 8363/2000001 [1:27:24<427:54:01,  1.29it/s]

buffer size = 17126, epsilon = 0.09582
mean_reward :  0.0


  0%|          | 8364/2000001 [1:27:25<434:10:41,  1.27it/s]

buffer size = 17128, epsilon = 0.09582
mean_reward :  0.0


  0%|          | 8365/2000001 [1:27:26<434:58:51,  1.27it/s]

buffer size = 17130, epsilon = 0.09582
mean_reward :  0.0


  0%|          | 8366/2000001 [1:27:27<442:46:11,  1.25it/s]

buffer size = 17132, epsilon = 0.09582
mean_reward :  0.0


  0%|          | 8367/2000001 [1:27:28<447:23:39,  1.24it/s]

buffer size = 17134, epsilon = 0.09582
mean_reward :  0.0


  0%|          | 8368/2000001 [1:27:28<442:31:03,  1.25it/s]

buffer size = 17136, epsilon = 0.09582
mean_reward :  0.0


  0%|          | 8369/2000001 [1:27:29<440:37:28,  1.26it/s]

buffer size = 17138, epsilon = 0.09582
mean_reward :  0.0


  0%|          | 8370/2000001 [1:27:30<435:13:41,  1.27it/s]

buffer size = 17140, epsilon = 0.09582
mean_reward :  0.0


  0%|          | 8371/2000001 [1:27:31<435:03:29,  1.27it/s]

buffer size = 17142, epsilon = 0.09582
mean_reward :  0.0


  0%|          | 8372/2000001 [1:27:31<434:22:21,  1.27it/s]

buffer size = 17144, epsilon = 0.09581
mean_reward :  0.0


  0%|          | 8373/2000001 [1:27:32<432:13:44,  1.28it/s]

buffer size = 17146, epsilon = 0.09581
mean_reward :  0.0


  0%|          | 8374/2000001 [1:27:33<425:58:50,  1.30it/s]

buffer size = 17148, epsilon = 0.09581
mean_reward :  0.0


  0%|          | 8375/2000001 [1:27:34<419:05:35,  1.32it/s]

buffer size = 17150, epsilon = 0.09581
mean_reward :  0.0


  0%|          | 8376/2000001 [1:27:34<411:29:37,  1.34it/s]

buffer size = 17152, epsilon = 0.09581
mean_reward :  0.0


  0%|          | 8377/2000001 [1:27:35<406:10:26,  1.36it/s]

buffer size = 17154, epsilon = 0.09581
mean_reward :  0.0


  0%|          | 8378/2000001 [1:27:36<401:37:36,  1.38it/s]

buffer size = 17156, epsilon = 0.09581
mean_reward :  0.0


  0%|          | 8379/2000001 [1:27:37<405:39:31,  1.36it/s]

buffer size = 17158, epsilon = 0.09581
mean_reward :  0.0


  0%|          | 8380/2000001 [1:27:37<404:06:43,  1.37it/s]

buffer size = 17160, epsilon = 0.09581
mean_reward :  0.0


  0%|          | 8381/2000001 [1:27:38<422:27:00,  1.31it/s]

buffer size = 17162, epsilon = 0.09581
mean_reward :  0.0


  0%|          | 8382/2000001 [1:27:39<424:00:37,  1.30it/s]

buffer size = 17164, epsilon = 0.09581
mean_reward :  0.0


  0%|          | 8383/2000001 [1:27:40<426:33:35,  1.30it/s]

buffer size = 17166, epsilon = 0.09581
mean_reward :  0.0


  0%|          | 8384/2000001 [1:27:41<440:35:15,  1.26it/s]

buffer size = 17168, epsilon = 0.09581
mean_reward :  0.0


  0%|          | 8385/2000001 [1:27:41<452:43:28,  1.22it/s]

buffer size = 17170, epsilon = 0.09581
mean_reward :  0.0


  0%|          | 8386/2000001 [1:27:42<440:39:11,  1.26it/s]

buffer size = 17172, epsilon = 0.09581
mean_reward :  0.0


  0%|          | 8387/2000001 [1:27:43<435:00:27,  1.27it/s]

buffer size = 17174, epsilon = 0.09581
mean_reward :  0.0


  0%|          | 8388/2000001 [1:27:44<431:05:57,  1.28it/s]

buffer size = 17176, epsilon = 0.09581
mean_reward :  0.0


  0%|          | 8389/2000001 [1:27:44<425:54:12,  1.30it/s]

buffer size = 17178, epsilon = 0.09581
mean_reward :  0.0


  0%|          | 8390/2000001 [1:27:45<424:31:30,  1.30it/s]

buffer size = 17180, epsilon = 0.09581
mean_reward :  0.0


  0%|          | 8391/2000001 [1:27:46<424:34:35,  1.30it/s]

buffer size = 17182, epsilon = 0.09581
mean_reward :  0.0


  0%|          | 8392/2000001 [1:27:47<419:27:18,  1.32it/s]

buffer size = 17184, epsilon = 0.09580
mean_reward :  0.0


  0%|          | 8393/2000001 [1:27:47<413:19:09,  1.34it/s]

buffer size = 17186, epsilon = 0.09580
mean_reward :  0.0


  0%|          | 8394/2000001 [1:27:48<411:25:27,  1.34it/s]

buffer size = 17188, epsilon = 0.09580
mean_reward :  0.0


  0%|          | 8395/2000001 [1:27:49<406:07:06,  1.36it/s]

buffer size = 17190, epsilon = 0.09580
mean_reward :  0.0


  0%|          | 8396/2000001 [1:27:50<404:02:56,  1.37it/s]

buffer size = 17192, epsilon = 0.09580
mean_reward :  0.0


  0%|          | 8397/2000001 [1:27:50<397:45:24,  1.39it/s]

buffer size = 17194, epsilon = 0.09580
mean_reward :  0.0


  0%|          | 8398/2000001 [1:27:51<398:40:59,  1.39it/s]

buffer size = 17196, epsilon = 0.09580
mean_reward :  0.0


  0%|          | 8399/2000001 [1:27:52<419:40:45,  1.32it/s]

buffer size = 17198, epsilon = 0.09580
mean_reward :  0.0


  0%|          | 8400/2000001 [1:27:53<452:18:16,  1.22it/s]

buffer size = 17200, epsilon = 0.09580
mean_reward :  0.0


  0%|          | 8401/2000001 [1:27:54<470:46:10,  1.18it/s]

buffer size = 17202, epsilon = 0.09580
mean_reward :  0.0


  0%|          | 8402/2000001 [1:27:55<483:52:20,  1.14it/s]

buffer size = 17204, epsilon = 0.09580
mean_reward :  0.0


  0%|          | 8403/2000001 [1:27:55<464:36:57,  1.19it/s]

buffer size = 17206, epsilon = 0.09580
mean_reward :  0.0


  0%|          | 8404/2000001 [1:27:56<446:44:03,  1.24it/s]

buffer size = 17208, epsilon = 0.09580
mean_reward :  0.0


  0%|          | 8405/2000001 [1:27:57<440:22:15,  1.26it/s]

buffer size = 17210, epsilon = 0.09580
mean_reward :  0.0


  0%|          | 8406/2000001 [1:27:58<432:05:40,  1.28it/s]

buffer size = 17212, epsilon = 0.09580
mean_reward :  0.0


  0%|          | 8407/2000001 [1:27:58<425:35:22,  1.30it/s]

buffer size = 17214, epsilon = 0.09580
mean_reward :  0.0


  0%|          | 8408/2000001 [1:27:59<415:41:03,  1.33it/s]

buffer size = 17216, epsilon = 0.09580
mean_reward :  0.0


  0%|          | 8409/2000001 [1:28:00<411:23:06,  1.34it/s]

buffer size = 17218, epsilon = 0.09580
mean_reward :  0.0


  0%|          | 8410/2000001 [1:28:01<408:46:18,  1.35it/s]

buffer size = 17220, epsilon = 0.09580
mean_reward :  0.0


  0%|          | 8411/2000001 [1:28:01<408:37:00,  1.35it/s]

buffer size = 17222, epsilon = 0.09580
mean_reward :  0.0


  0%|          | 8412/2000001 [1:28:02<407:28:01,  1.36it/s]

buffer size = 17224, epsilon = 0.09579
mean_reward :  0.0


  0%|          | 8413/2000001 [1:28:03<404:34:37,  1.37it/s]

buffer size = 17226, epsilon = 0.09579
mean_reward :  0.0


  0%|          | 8414/2000001 [1:28:04<398:29:45,  1.39it/s]

buffer size = 17228, epsilon = 0.09579
mean_reward :  0.0


  0%|          | 8415/2000001 [1:28:04<398:58:49,  1.39it/s]

buffer size = 17230, epsilon = 0.09579
mean_reward :  0.0


  0%|          | 8416/2000001 [1:28:05<408:48:26,  1.35it/s]

buffer size = 17232, epsilon = 0.09579
mean_reward :  0.0


  0%|          | 8417/2000001 [1:28:06<417:58:13,  1.32it/s]

buffer size = 17234, epsilon = 0.09579
mean_reward :  0.0


  0%|          | 8418/2000001 [1:28:07<430:00:56,  1.29it/s]

buffer size = 17236, epsilon = 0.09579
mean_reward :  0.0


  0%|          | 8419/2000001 [1:28:07<439:38:18,  1.26it/s]

buffer size = 17238, epsilon = 0.09579
mean_reward :  0.0


  0%|          | 8420/2000001 [1:28:08<438:26:46,  1.26it/s]

buffer size = 17240, epsilon = 0.09579
mean_reward :  0.0


  0%|          | 8421/2000001 [1:28:09<443:35:16,  1.25it/s]

buffer size = 17242, epsilon = 0.09579
mean_reward :  0.0


  0%|          | 8422/2000001 [1:28:10<435:02:01,  1.27it/s]

buffer size = 17244, epsilon = 0.09579
mean_reward :  0.0


  0%|          | 8423/2000001 [1:28:11<424:59:06,  1.30it/s]

buffer size = 17246, epsilon = 0.09579
mean_reward :  0.0


  0%|          | 8424/2000001 [1:28:11<419:17:08,  1.32it/s]

buffer size = 17248, epsilon = 0.09579
mean_reward :  0.0


  0%|          | 8425/2000001 [1:28:12<413:52:26,  1.34it/s]

buffer size = 17250, epsilon = 0.09579
mean_reward :  0.0


  0%|          | 8426/2000001 [1:28:13<416:03:10,  1.33it/s]

buffer size = 17252, epsilon = 0.09579
mean_reward :  0.0


  0%|          | 8427/2000001 [1:28:14<416:42:14,  1.33it/s]

buffer size = 17254, epsilon = 0.09579
mean_reward :  0.0


  0%|          | 8428/2000001 [1:28:14<413:05:09,  1.34it/s]

buffer size = 17256, epsilon = 0.09579
mean_reward :  0.0


  0%|          | 8429/2000001 [1:28:15<414:48:31,  1.33it/s]

buffer size = 17258, epsilon = 0.09579
mean_reward :  0.0


  0%|          | 8430/2000001 [1:28:16<415:09:08,  1.33it/s]

buffer size = 17260, epsilon = 0.09579
mean_reward :  0.0


  0%|          | 8431/2000001 [1:28:16<409:57:09,  1.35it/s]

buffer size = 17262, epsilon = 0.09579
mean_reward :  0.0


  0%|          | 8432/2000001 [1:28:17<411:43:45,  1.34it/s]

buffer size = 17264, epsilon = 0.09578
mean_reward :  0.0


  0%|          | 8433/2000001 [1:28:18<410:02:22,  1.35it/s]

buffer size = 17266, epsilon = 0.09578
mean_reward :  0.0


  0%|          | 8434/2000001 [1:28:19<403:31:10,  1.37it/s]

buffer size = 17268, epsilon = 0.09578
mean_reward :  0.0


  0%|          | 8435/2000001 [1:28:20<419:57:19,  1.32it/s]

buffer size = 17270, epsilon = 0.09578
mean_reward :  0.0


  0%|          | 8436/2000001 [1:28:20<424:38:43,  1.30it/s]

buffer size = 17272, epsilon = 0.09578
mean_reward :  0.0


  0%|          | 8437/2000001 [1:28:21<427:36:25,  1.29it/s]

buffer size = 17274, epsilon = 0.09578
mean_reward :  0.0


  0%|          | 8438/2000001 [1:28:22<442:45:01,  1.25it/s]

buffer size = 17276, epsilon = 0.09578
mean_reward :  0.0


  0%|          | 8439/2000001 [1:28:23<452:48:01,  1.22it/s]

buffer size = 17278, epsilon = 0.09578
mean_reward :  0.0


  0%|          | 8440/2000001 [1:28:24<440:13:57,  1.26it/s]

buffer size = 17280, epsilon = 0.09578
mean_reward :  0.0


  0%|          | 8441/2000001 [1:28:24<432:42:21,  1.28it/s]

buffer size = 17282, epsilon = 0.09578
mean_reward :  0.0


  0%|          | 8442/2000001 [1:28:25<425:15:57,  1.30it/s]

buffer size = 17284, epsilon = 0.09578
mean_reward :  0.0


  0%|          | 8443/2000001 [1:28:26<427:37:01,  1.29it/s]

buffer size = 17286, epsilon = 0.09578
mean_reward :  0.0


  0%|          | 8444/2000001 [1:28:27<428:51:39,  1.29it/s]

buffer size = 17288, epsilon = 0.09578
mean_reward :  0.0


  0%|          | 8445/2000001 [1:28:27<429:30:23,  1.29it/s]

buffer size = 17290, epsilon = 0.09578
mean_reward :  0.0


  0%|          | 8446/2000001 [1:28:28<421:24:21,  1.31it/s]

buffer size = 17292, epsilon = 0.09578
mean_reward :  0.0


  0%|          | 8447/2000001 [1:28:29<419:19:41,  1.32it/s]

buffer size = 17294, epsilon = 0.09578
mean_reward :  0.0


  0%|          | 8448/2000001 [1:28:30<416:37:37,  1.33it/s]

buffer size = 17296, epsilon = 0.09578
mean_reward :  0.0


  0%|          | 8449/2000001 [1:28:30<411:28:50,  1.34it/s]

buffer size = 17298, epsilon = 0.09578
mean_reward :  0.0


  0%|          | 8450/2000001 [1:28:31<407:49:31,  1.36it/s]

buffer size = 17300, epsilon = 0.09578
mean_reward :  0.0


  0%|          | 8451/2000001 [1:28:32<403:35:23,  1.37it/s]

buffer size = 17302, epsilon = 0.09577
mean_reward :  0.0


  0%|          | 8452/2000001 [1:28:32<399:23:53,  1.39it/s]

buffer size = 17304, epsilon = 0.09577
mean_reward :  0.0


  0%|          | 8453/2000001 [1:28:33<414:53:33,  1.33it/s]

buffer size = 17306, epsilon = 0.09577
mean_reward :  0.0


  0%|          | 8454/2000001 [1:28:34<444:41:31,  1.24it/s]

buffer size = 17308, epsilon = 0.09577
mean_reward :  0.0


  0%|          | 8455/2000001 [1:28:35<462:54:17,  1.20it/s]

buffer size = 17310, epsilon = 0.09577
mean_reward :  0.0


  0%|          | 8456/2000001 [1:28:36<482:04:32,  1.15it/s]

buffer size = 17312, epsilon = 0.09577
mean_reward :  0.0


  0%|          | 8457/2000001 [1:28:37<465:48:21,  1.19it/s]

buffer size = 17314, epsilon = 0.09577
mean_reward :  0.0


  0%|          | 8458/2000001 [1:28:38<447:09:40,  1.24it/s]

buffer size = 17316, epsilon = 0.09577
mean_reward :  0.0


  0%|          | 8459/2000001 [1:28:38<435:49:06,  1.27it/s]

buffer size = 17318, epsilon = 0.09577
mean_reward :  0.0


  0%|          | 8460/2000001 [1:28:39<433:41:15,  1.28it/s]

buffer size = 17320, epsilon = 0.09577
mean_reward :  0.0


  0%|          | 8461/2000001 [1:28:40<424:10:23,  1.30it/s]

buffer size = 17322, epsilon = 0.09577
mean_reward :  0.0


  0%|          | 8462/2000001 [1:28:41<422:10:00,  1.31it/s]

buffer size = 17324, epsilon = 0.09577
mean_reward :  0.0


  0%|          | 8463/2000001 [1:28:41<420:39:31,  1.32it/s]

buffer size = 17326, epsilon = 0.09577
mean_reward :  0.0


  0%|          | 8464/2000001 [1:28:42<414:15:30,  1.34it/s]

buffer size = 17328, epsilon = 0.09577
mean_reward :  0.0


  0%|          | 8465/2000001 [1:28:43<409:41:09,  1.35it/s]

buffer size = 17330, epsilon = 0.09577
mean_reward :  0.0


  0%|          | 8466/2000001 [1:28:44<411:09:48,  1.35it/s]

buffer size = 17332, epsilon = 0.09577
mean_reward :  0.0


  0%|          | 8467/2000001 [1:28:44<404:40:38,  1.37it/s]

buffer size = 17334, epsilon = 0.09577
mean_reward :  0.0


  0%|          | 8468/2000001 [1:28:45<402:12:10,  1.38it/s]

buffer size = 17336, epsilon = 0.09577
mean_reward :  0.0


  0%|          | 8469/2000001 [1:28:46<402:34:17,  1.37it/s]

buffer size = 17338, epsilon = 0.09577
mean_reward :  0.0


  0%|          | 8470/2000001 [1:28:46<418:21:49,  1.32it/s]

buffer size = 17340, epsilon = 0.09577
mean_reward :  0.0


  0%|          | 8471/2000001 [1:28:47<432:01:41,  1.28it/s]

buffer size = 17342, epsilon = 0.09577
mean_reward :  0.0


  0%|          | 8472/2000001 [1:28:48<430:40:28,  1.28it/s]

buffer size = 17344, epsilon = 0.09576
mean_reward :  0.0


  0%|          | 8473/2000001 [1:28:49<436:33:49,  1.27it/s]

buffer size = 17346, epsilon = 0.09576
mean_reward :  0.0


  0%|          | 8474/2000001 [1:28:50<458:45:48,  1.21it/s]

buffer size = 17348, epsilon = 0.09576
mean_reward :  0.0


  0%|          | 8475/2000001 [1:28:51<445:36:57,  1.24it/s]

buffer size = 17350, epsilon = 0.09576
mean_reward :  0.0


  0%|          | 8476/2000001 [1:28:51<445:09:22,  1.24it/s]

buffer size = 17352, epsilon = 0.09576
mean_reward :  0.0


  0%|          | 8477/2000001 [1:28:52<441:28:21,  1.25it/s]

buffer size = 17354, epsilon = 0.09576
mean_reward :  0.0


  0%|          | 8478/2000001 [1:28:53<438:07:38,  1.26it/s]

buffer size = 17356, epsilon = 0.09576
mean_reward :  0.0


  0%|          | 8479/2000001 [1:28:54<430:59:07,  1.28it/s]

buffer size = 17358, epsilon = 0.09576
mean_reward :  0.0


  0%|          | 8480/2000001 [1:28:54<430:18:32,  1.29it/s]

buffer size = 17360, epsilon = 0.09576
mean_reward :  0.0


  0%|          | 8481/2000001 [1:28:55<429:36:28,  1.29it/s]

buffer size = 17362, epsilon = 0.09576
mean_reward :  0.0


  0%|          | 8482/2000001 [1:28:56<424:13:37,  1.30it/s]

buffer size = 17364, epsilon = 0.09576
mean_reward :  0.0


  0%|          | 8483/2000001 [1:28:57<414:41:28,  1.33it/s]

buffer size = 17366, epsilon = 0.09576
mean_reward :  0.0


  0%|          | 8484/2000001 [1:28:57<410:12:15,  1.35it/s]

buffer size = 17368, epsilon = 0.09576
mean_reward :  0.0


  0%|          | 8485/2000001 [1:28:58<407:53:38,  1.36it/s]

buffer size = 17370, epsilon = 0.09576
mean_reward :  0.0


  0%|          | 8486/2000001 [1:28:59<404:57:18,  1.37it/s]

buffer size = 17372, epsilon = 0.09576
mean_reward :  0.0


  0%|          | 8487/2000001 [1:29:00<402:25:48,  1.37it/s]

buffer size = 17374, epsilon = 0.09576
mean_reward :  0.0


  0%|          | 8488/2000001 [1:29:00<412:46:31,  1.34it/s]

buffer size = 17376, epsilon = 0.09576
mean_reward :  0.0


  0%|          | 8489/2000001 [1:29:01<423:24:01,  1.31it/s]

buffer size = 17378, epsilon = 0.09576
mean_reward :  0.0


  0%|          | 8490/2000001 [1:29:02<430:10:49,  1.29it/s]

buffer size = 17380, epsilon = 0.09576
mean_reward :  0.0


  0%|          | 8491/2000001 [1:29:03<442:28:27,  1.25it/s]

buffer size = 17382, epsilon = 0.09576
mean_reward :  0.0


  0%|          | 8492/2000001 [1:29:04<454:09:09,  1.22it/s]

buffer size = 17384, epsilon = 0.09575
mean_reward :  0.0


  0%|          | 8493/2000001 [1:29:05<452:10:28,  1.22it/s]

buffer size = 17386, epsilon = 0.09575
mean_reward :  0.0


  0%|          | 8494/2000001 [1:29:05<452:38:34,  1.22it/s]

buffer size = 17388, epsilon = 0.09575
mean_reward :  0.0


  0%|          | 8495/2000001 [1:29:06<446:47:25,  1.24it/s]

buffer size = 17390, epsilon = 0.09575
mean_reward :  0.0


  0%|          | 8496/2000001 [1:29:07<443:02:04,  1.25it/s]

buffer size = 17392, epsilon = 0.09575
mean_reward :  0.0


  0%|          | 8497/2000001 [1:29:08<445:59:43,  1.24it/s]

buffer size = 17394, epsilon = 0.09575
mean_reward :  0.0


  0%|          | 8498/2000001 [1:29:09<442:13:20,  1.25it/s]

buffer size = 17396, epsilon = 0.09575
mean_reward :  0.0


  0%|          | 8499/2000001 [1:29:09<436:22:01,  1.27it/s]

buffer size = 17398, epsilon = 0.09575
mean_reward :  0.0


  0%|          | 8500/2000001 [1:29:10<431:14:55,  1.28it/s]

buffer size = 17400, epsilon = 0.09575
mean_reward :  0.0


  0%|          | 8501/2000001 [1:29:11<417:42:02,  1.32it/s]

buffer size = 17402, epsilon = 0.09575
mean_reward :  0.0


  0%|          | 8502/2000001 [1:29:11<413:25:58,  1.34it/s]

buffer size = 17404, epsilon = 0.09575
mean_reward :  0.0


  0%|          | 8503/2000001 [1:29:12<407:42:21,  1.36it/s]

buffer size = 17406, epsilon = 0.09575
mean_reward :  0.0


  0%|          | 8504/2000001 [1:29:13<406:36:39,  1.36it/s]

buffer size = 17408, epsilon = 0.09575
mean_reward :  0.0


  0%|          | 8505/2000001 [1:29:14<406:56:53,  1.36it/s]

buffer size = 17410, epsilon = 0.09575
mean_reward :  0.0


  0%|          | 8506/2000001 [1:29:14<413:41:46,  1.34it/s]

buffer size = 17412, epsilon = 0.09575
mean_reward :  0.0


  0%|          | 8507/2000001 [1:29:15<429:43:26,  1.29it/s]

buffer size = 17414, epsilon = 0.09575
mean_reward :  0.0


  0%|          | 8508/2000001 [1:29:16<427:42:52,  1.29it/s]

buffer size = 17416, epsilon = 0.09575
mean_reward :  0.0


  0%|          | 8509/2000001 [1:29:17<444:42:56,  1.24it/s]

buffer size = 17418, epsilon = 0.09575
mean_reward :  0.0


  0%|          | 8510/2000001 [1:29:18<448:28:11,  1.23it/s]

buffer size = 17420, epsilon = 0.09575
mean_reward :  0.0


  0%|          | 8511/2000001 [1:29:18<438:19:48,  1.26it/s]

buffer size = 17422, epsilon = 0.09575
mean_reward :  0.0


  0%|          | 8512/2000001 [1:29:19<428:30:12,  1.29it/s]

buffer size = 17424, epsilon = 0.09574
mean_reward :  0.0


  0%|          | 8513/2000001 [1:29:20<433:07:22,  1.28it/s]

buffer size = 17426, epsilon = 0.09574
mean_reward :  0.0


  0%|          | 8514/2000001 [1:29:21<430:53:14,  1.28it/s]

buffer size = 17428, epsilon = 0.09574
mean_reward :  0.0


  0%|          | 8515/2000001 [1:29:22<422:07:29,  1.31it/s]

buffer size = 17430, epsilon = 0.09574
mean_reward :  0.0


  0%|          | 8516/2000001 [1:29:22<419:16:09,  1.32it/s]

buffer size = 17432, epsilon = 0.09574
mean_reward :  0.0


  0%|          | 8517/2000001 [1:29:23<419:46:51,  1.32it/s]

buffer size = 17434, epsilon = 0.09574
mean_reward :  0.0


  0%|          | 8518/2000001 [1:29:24<417:33:23,  1.32it/s]

buffer size = 17436, epsilon = 0.09574
mean_reward :  0.0


  0%|          | 8519/2000001 [1:29:24<413:01:36,  1.34it/s]

buffer size = 17438, epsilon = 0.09574
mean_reward :  0.0


  0%|          | 8520/2000001 [1:29:25<412:46:22,  1.34it/s]

buffer size = 17440, epsilon = 0.09574
mean_reward :  0.0


  0%|          | 8521/2000001 [1:29:26<412:02:58,  1.34it/s]

buffer size = 17442, epsilon = 0.09574
mean_reward :  0.0


  0%|          | 8522/2000001 [1:29:27<431:21:56,  1.28it/s]

buffer size = 17444, epsilon = 0.09574
mean_reward :  0.0


  0%|          | 8523/2000001 [1:29:28<455:45:11,  1.21it/s]

buffer size = 17446, epsilon = 0.09574
mean_reward :  0.0


  0%|          | 8524/2000001 [1:29:29<459:48:12,  1.20it/s]

buffer size = 17448, epsilon = 0.09574
mean_reward :  0.0


  0%|          | 8525/2000001 [1:29:29<449:45:08,  1.23it/s]

buffer size = 17450, epsilon = 0.09574
mean_reward :  0.0


  0%|          | 8526/2000001 [1:29:30<443:21:29,  1.25it/s]

buffer size = 17452, epsilon = 0.09574
mean_reward :  0.0


  0%|          | 8527/2000001 [1:29:31<447:41:31,  1.24it/s]

buffer size = 17454, epsilon = 0.09574
mean_reward :  0.0


  0%|          | 8528/2000001 [1:29:32<440:05:08,  1.26it/s]

buffer size = 17456, epsilon = 0.09574
mean_reward :  0.0


  0%|          | 8529/2000001 [1:29:33<432:26:05,  1.28it/s]

buffer size = 17458, epsilon = 0.09574
mean_reward :  0.0


  0%|          | 8530/2000001 [1:29:33<422:56:52,  1.31it/s]

buffer size = 17460, epsilon = 0.09574
mean_reward :  0.0


  0%|          | 8531/2000001 [1:29:34<419:01:42,  1.32it/s]

buffer size = 17462, epsilon = 0.09574
mean_reward :  0.0


  0%|          | 8532/2000001 [1:29:35<417:40:49,  1.32it/s]

buffer size = 17464, epsilon = 0.09573
mean_reward :  0.0


  0%|          | 8533/2000001 [1:29:35<413:19:58,  1.34it/s]

buffer size = 17466, epsilon = 0.09573
mean_reward :  0.0


  0%|          | 8534/2000001 [1:29:36<413:15:12,  1.34it/s]

buffer size = 17468, epsilon = 0.09573
mean_reward :  0.0


  0%|          | 8535/2000001 [1:29:37<413:09:46,  1.34it/s]

buffer size = 17470, epsilon = 0.09573
mean_reward :  0.0


  0%|          | 8536/2000001 [1:29:38<413:45:37,  1.34it/s]

buffer size = 17472, epsilon = 0.09573
mean_reward :  0.0


  0%|          | 8537/2000001 [1:29:38<414:20:50,  1.34it/s]

buffer size = 17474, epsilon = 0.09573
mean_reward :  0.0


  0%|          | 8538/2000001 [1:29:39<419:54:49,  1.32it/s]

buffer size = 17476, epsilon = 0.09573
mean_reward :  0.0


  0%|          | 8539/2000001 [1:29:40<420:10:41,  1.32it/s]

buffer size = 17478, epsilon = 0.09573
mean_reward :  0.0


  0%|          | 8540/2000001 [1:29:41<416:58:08,  1.33it/s]

buffer size = 17480, epsilon = 0.09573
mean_reward :  0.0


  0%|          | 8541/2000001 [1:29:41<415:55:29,  1.33it/s]

buffer size = 17482, epsilon = 0.09573
mean_reward :  0.0


  0%|          | 8542/2000001 [1:29:42<436:07:07,  1.27it/s]

buffer size = 17484, epsilon = 0.09573
mean_reward :  0.0


  0%|          | 8543/2000001 [1:29:43<434:54:10,  1.27it/s]

buffer size = 17486, epsilon = 0.09573
mean_reward :  0.0


  0%|          | 8544/2000001 [1:29:44<432:00:19,  1.28it/s]

buffer size = 17488, epsilon = 0.09573
mean_reward :  0.0


  0%|          | 8545/2000001 [1:29:45<437:56:23,  1.26it/s]

buffer size = 17490, epsilon = 0.09573
mean_reward :  0.0


  0%|          | 8546/2000001 [1:29:46<447:34:51,  1.24it/s]

buffer size = 17492, epsilon = 0.09573
mean_reward :  0.0


  0%|          | 8547/2000001 [1:29:46<439:21:13,  1.26it/s]

buffer size = 17494, epsilon = 0.09573
mean_reward :  0.0


  0%|          | 8548/2000001 [1:29:47<436:10:30,  1.27it/s]

buffer size = 17496, epsilon = 0.09573
mean_reward :  0.0


  0%|          | 8549/2000001 [1:29:48<431:59:19,  1.28it/s]

buffer size = 17498, epsilon = 0.09573
mean_reward :  0.0


  0%|          | 8550/2000001 [1:29:49<425:49:43,  1.30it/s]

buffer size = 17500, epsilon = 0.09573
mean_reward :  0.0


  0%|          | 8551/2000001 [1:29:49<426:25:47,  1.30it/s]

buffer size = 17502, epsilon = 0.09573
mean_reward :  0.0


  0%|          | 8552/2000001 [1:29:50<421:47:51,  1.31it/s]

buffer size = 17504, epsilon = 0.09572
mean_reward :  0.0


  0%|          | 8553/2000001 [1:29:51<417:11:36,  1.33it/s]

buffer size = 17506, epsilon = 0.09572
mean_reward :  0.0


  0%|          | 8554/2000001 [1:29:52<413:48:33,  1.34it/s]

buffer size = 17508, epsilon = 0.09572
mean_reward :  0.0


  0%|          | 8555/2000001 [1:29:52<419:05:13,  1.32it/s]

buffer size = 17510, epsilon = 0.09572
mean_reward :  0.0


  0%|          | 8556/2000001 [1:29:53<424:22:19,  1.30it/s]

buffer size = 17512, epsilon = 0.09572
mean_reward :  0.0


  0%|          | 8557/2000001 [1:29:54<421:23:53,  1.31it/s]

buffer size = 17514, epsilon = 0.09572
mean_reward :  0.0


  0%|          | 8558/2000001 [1:29:55<420:50:18,  1.31it/s]

buffer size = 17516, epsilon = 0.09572
mean_reward :  0.0


  0%|          | 8559/2000001 [1:29:55<424:07:03,  1.30it/s]

buffer size = 17518, epsilon = 0.09572
mean_reward :  0.0


  0%|          | 8560/2000001 [1:29:56<433:51:40,  1.28it/s]

buffer size = 17520, epsilon = 0.09572
mean_reward :  0.0


  0%|          | 8561/2000001 [1:29:57<448:17:10,  1.23it/s]

buffer size = 17522, epsilon = 0.09572
mean_reward :  0.0


  0%|          | 8562/2000001 [1:29:58<449:13:03,  1.23it/s]

buffer size = 17524, epsilon = 0.09572
mean_reward :  0.0


  0%|          | 8563/2000001 [1:29:59<465:25:34,  1.19it/s]

buffer size = 17526, epsilon = 0.09572
mean_reward :  0.0


  0%|          | 8564/2000001 [1:30:00<464:41:24,  1.19it/s]

buffer size = 17528, epsilon = 0.09572
mean_reward :  0.0


  0%|          | 8565/2000001 [1:30:00<452:31:14,  1.22it/s]

buffer size = 17530, epsilon = 0.09572
mean_reward :  0.0


  0%|          | 8566/2000001 [1:30:01<438:51:32,  1.26it/s]

buffer size = 17532, epsilon = 0.09572
mean_reward :  0.0


  0%|          | 8567/2000001 [1:30:02<434:54:23,  1.27it/s]

buffer size = 17534, epsilon = 0.09572
mean_reward :  0.0


  0%|          | 8568/2000001 [1:30:03<429:04:56,  1.29it/s]

buffer size = 17536, epsilon = 0.09572
mean_reward :  0.0


  0%|          | 8569/2000001 [1:30:03<426:56:35,  1.30it/s]

buffer size = 17538, epsilon = 0.09572
mean_reward :  0.0


  0%|          | 8570/2000001 [1:30:04<424:50:57,  1.30it/s]

buffer size = 17540, epsilon = 0.09572
mean_reward :  0.0


  0%|          | 8571/2000001 [1:30:05<419:20:40,  1.32it/s]

buffer size = 17542, epsilon = 0.09572
mean_reward :  0.0


  0%|          | 8572/2000001 [1:30:06<416:32:37,  1.33it/s]

buffer size = 17544, epsilon = 0.09571
mean_reward :  0.0


  0%|          | 8573/2000001 [1:30:06<413:07:35,  1.34it/s]

buffer size = 17546, epsilon = 0.09571
mean_reward :  0.0


  0%|          | 8574/2000001 [1:30:07<415:34:41,  1.33it/s]

buffer size = 17548, epsilon = 0.09571
mean_reward :  0.0
