In [1]:
import pygame  # helps us make GUI games in python
import random  # help us define which direction the ball will start moving in

from pygame.transform import scale

# DQN. CNN reads in pixel data.
# reinforcement learning. trial and error.
# maximize action based on reward
# agent environment loop
# this is called Q Learning
# based on just game state. mapping of state to action is policy
# experience replay. learns from past policies


# frame rate per second
FPS = 60

# size of our window
WINDOW_WIDTH = 400
WINDOW_HEIGHT = 400

# size of our paddle
PADDLE_WIDTH = 10
PADDLE_HEIGHT = 60
# distance from the edge of the window
PADDLE_BUFFER = 10

# size of our ball
BALL_WIDTH = 10
BALL_HEIGHT = 10

# speeds of our paddle and ball
PADDLE_SPEED = 2
BALL_X_SPEED = 3
BALL_Y_SPEED = 2

# RGB colors for our paddle and ball
WHITE = (255, 255, 255)
BLACK = (0, 0, 0)


# initialize our screen using width and height vars
screen = pygame.display.set_mode((WINDOW_WIDTH, WINDOW_HEIGHT))


def drawBall(ballXPos, ballYPos):
    ball = pygame.Rect(ballXPos, ballYPos, BALL_WIDTH, BALL_HEIGHT)
    pygame.draw.rect(screen, WHITE, ball)


# Paddle 1 is our learning agent/us
# draw to the left of the screen
def drawPaddle1(paddle1YPos):
    paddle1 = pygame.Rect(PADDLE_BUFFER,
                          paddle1YPos,
                          PADDLE_WIDTH,
                          PADDLE_HEIGHT)
    pygame.draw.rect(screen, WHITE, paddle1)


# paddle 2 is the evil AI
# draw to the right of the screen
def drawPaddle2(paddle2YPos):
    paddle2 = pygame.Rect(WINDOW_WIDTH - PADDLE_BUFFER - PADDLE_WIDTH,
                          paddle2YPos,
                          PADDLE_WIDTH,
                          PADDLE_HEIGHT)
    pygame.draw.rect(screen, WHITE, paddle2)


# update the ball, using the paddle posistions the balls positions and the
# balls directions
def updateBall(paddle1YPos, paddle2YPos,
               ballXPos, ballYPos,
               ballXDirection, ballYDirection):

    # update the x and y position
    ballXPos = ballXPos + ballXDirection * BALL_X_SPEED
    ballYPos = ballYPos + ballYDirection * BALL_Y_SPEED
    score = 0

    # checks for a collision, if the ball hits the left side,
    # our learning agent
    if (ballXPos <= PADDLE_BUFFER + PADDLE_WIDTH and
        ballYPos + BALL_HEIGHT >= paddle1YPos and
        ballYPos - BALL_HEIGHT <= paddle1YPos + PADDLE_HEIGHT):
        # switches directions
        ballXDirection = 1
    # past it
    elif (ballXPos <= 0):
        # negative score
        ballXDirection = 1
        score = -1
        return [score, paddle1YPos, paddle2YPos,
                ballXPos, ballYPos,
                ballXDirection, ballYDirection]

    # check if hits the other side
    if (ballXPos >= WINDOW_WIDTH - PADDLE_WIDTH - PADDLE_BUFFER and
        ballYPos + BALL_HEIGHT >= paddle2YPos and
        ballYPos - BALL_HEIGHT <= paddle2YPos + PADDLE_HEIGHT):
        # switch directions
        ballXDirection = -1
    # past it
    elif (ballXPos >= WINDOW_WIDTH - BALL_WIDTH):
        # positive score
        ballXDirection = -1
        score = 1
        return [score, paddle1YPos, paddle2YPos,
                ballXPos, ballYPos,
                ballXDirection, ballYDirection]

    # if it hits the top, move down
    if (ballYPos <= 0):
        ballYPos = 0
        ballYDirection = 1
    # if it hits the bottom, move up
    elif (ballYPos >= WINDOW_HEIGHT - BALL_HEIGHT):
        ballYPos = WINDOW_HEIGHT - BALL_HEIGHT
        ballYDirection = -1
    return [score, paddle1YPos, paddle2YPos,
            ballXPos, ballYPos,
            ballXDirection, ballYDirection]


# update our paddle position
# this is controlled by the action input
def updatePaddle1(action, paddle1YPos):
    # if move up
    if (action[1] == 1):
        paddle1YPos = paddle1YPos - PADDLE_SPEED
    # if move down
    if (action[2] == 1):
        paddle1YPos = paddle1YPos + PADDLE_SPEED

    # don't let it move off the screen
    if (paddle1YPos < 0):
        paddle1YPos = 0
    if (paddle1YPos > WINDOW_HEIGHT - PADDLE_HEIGHT):
        paddle1YPos = WINDOW_HEIGHT - PADDLE_HEIGHT
    return paddle1YPos

# update evil AI paddle position
# this is controlled by the position of the ball
def updatePaddle2(paddle2YPos, ballYPos):
    # move down if ball is in upper half
    if (paddle2YPos + PADDLE_HEIGHT / 2 < ballYPos + BALL_HEIGHT / 2):
        paddle2YPos = paddle2YPos + PADDLE_SPEED
    # move up if ball is in lower half
    if (paddle2YPos + PADDLE_HEIGHT / 2 > ballYPos + BALL_HEIGHT / 2):
        paddle2YPos = paddle2YPos - PADDLE_SPEED
    # don't let it hit top
    if (paddle2YPos < 0):
        paddle2YPos = 0
    # dont let it hit bottom
    if (paddle2YPos > WINDOW_HEIGHT - PADDLE_HEIGHT):
        paddle2YPos = WINDOW_HEIGHT - PADDLE_HEIGHT
    return paddle2YPos


# game class
class PongGame:
    def __init__(self):
        # random number for initial direction of ball
        num = random.randint(0, 9)
        # keep score
        self.tally = 0
        # initialie positions of paddle
        self.paddle1YPos = WINDOW_HEIGHT / 2 - PADDLE_HEIGHT / 2
        self.paddle2YPos = WINDOW_HEIGHT / 2 - PADDLE_HEIGHT / 2
        # and ball direction
        self.ballXDirection = 1
        self.ballYDirection = 1
        # starting point
        self.ballXPos = WINDOW_WIDTH / 2 - BALL_WIDTH / 2

        # randomly decide where the ball will move
        if(0 < num < 3):
            self.ballXDirection = 1
            self.ballYDirection = 1
        if (3 <= num < 5):
            self.ballXDirection = -1
            self.ballYDirection = 1
        if (5 <= num < 8):
            self.ballXDirection = 1
            self.ballYDirection = -1
        if (8 <= num < 10):
            self.ballXDirection = -1
            self.ballYDirection = -1
        # new random number
        num = random.randint(0, 9)
        # where it will start, y part
        self.ballYPos = num * (WINDOW_HEIGHT - BALL_HEIGHT) / 9
        # scaled surface
        self.scaled_surface = pygame.Surface((84, 84), depth=32)


    def getPresentFrame(self):
        # for each frame, calls the event queue, like if the main window needs
        # to be repainted
        pygame.event.pump()
        # make the background black
        screen.fill(BLACK)
        # draw our paddles
        drawPaddle1(self.paddle1YPos)
        drawPaddle2(self.paddle2YPos)
        # draw our ball
        drawBall(self.ballXPos, self.ballYPos)
        # updates the window
        pygame.display.flip()

        # copies the pixels from our surface to a 3D array. we'll use this for
        # RL
        pygame.transform.scale(pygame.display.get_surface(), (84, 84), self.scaled_surface)
        image_data = pygame.surfarray.array2d(self.scaled_surface)
        # return our surface data
        return image_data


    def getNextFrame(self, action):
        pygame.event.pump()
        score = 0
        screen.fill(BLACK)
        
        # update our paddle
        self.paddle1YPos = updatePaddle1(action, self.paddle1YPos)
        drawPaddle1(self.paddle1YPos)

        # update evil AI paddle
        self.paddle2YPos = updatePaddle2(self.paddle2YPos, self.ballYPos)
        drawPaddle2(self.paddle2YPos)
        
        # update our vars by updating ball position
        [score, self.paddle1YPos, self.paddle2YPos, self.ballXPos,
         self.ballYPos, self.ballXDirection, self.ballYDirection] = updateBall(self.paddle1YPos, self.paddle2YPos, self.ballXPos,
                                                                               self.ballYPos, self.ballXDirection, self.ballYDirection)  # draw the ball
        drawBall(self.ballXPos, self.ballYPos)
        
        # get the surface data
        # copies the pixels from our surface to a 3D array. we'll use this for
        # RL
        pygame.transform.scale(pygame.display.get_surface(), (84, 84), self.scaled_surface)
        image_data = pygame.surfarray.array2d(self.scaled_surface)
        # return our surface data
        # update the window
        pygame.display.flip()
        # record the total score
        self.tally = self.tally + score
        print("Tally is " + str(self.tally))
        # return the score and the surface data
        return [score, image_data]

pygame 1.9.5
Hello from the pygame community. https://www.pygame.org/contribute.html


In [3]:
import tensorflow as tf
#import pong  # our class
import numpy as np  # math
import random  # random
# queue data structure. fast appends. and pops. replay memory
from collections import deque


# hyper params
ACTIONS = 3  # up, down, stay
# define our learning rate
GAMMA = 0.99
# for updating our gradient or training over time
INITIAL_EPSILON = 1.0
FINAL_EPSILON = 0.05
# how many frames to anneal epsilon
EXPLORE = 500000
OBSERVE = 50000
# store our experiences, the size of it
REPLAY_MEMORY = 500000
# batch size to train on
BATCH = 100
# input image size in pixels
INPUT_SIZE = 84

# create tensorflow graph
def createGraph():

    # CNN
    # creates an empty tensor with all elements set to zero with a shape
    W_conv1 = tf.Variable(tf.random_normal([8, 8, 4, 32]))
    b_conv1 = tf.Variable(tf.random_normal([32]))

    W_conv2 = tf.Variable(tf.random_normal([4, 4, 32, 64]))
    b_conv2 = tf.Variable(tf.random_normal([64]))

    W_conv3 = tf.Variable(tf.random_normal([3, 3, 64, 64]))
    b_conv3 = tf.Variable(tf.random_normal([64]))

    W_fc4 = tf.Variable(tf.random_normal([7 * 7 * 64, 784]))  # image size 7x7 due to convolutions
    b_fc4 = tf.Variable(tf.random_normal([784]))

    W_fc5 = tf.Variable(tf.random_normal([784, ACTIONS]))
    b_fc5 = tf.Variable(tf.random_normal([ACTIONS]))

    # input for pixel data
    s = tf.placeholder("float", [None, INPUT_SIZE, INPUT_SIZE, 4])

    # Computes rectified linear unit activation fucntion on  a 2-D convolution
    # given 4-D input and filter tensors. and
    conv1 = tf.nn.relu(
        tf.nn.conv2d(s, W_conv1, strides=[1, 4, 4, 1], padding="VALID") + b_conv1)

    conv2 = tf.nn.relu(
        tf.nn.conv2d(conv1, W_conv2, strides=[1, 2, 2, 1], padding="VALID") + b_conv2)

    conv3 = tf.nn.relu(
        tf.nn.conv2d(conv2, W_conv3, strides=[1, 1, 1, 1], padding="VALID") + b_conv3)

    conv3_flat = tf.reshape(conv3, [-1, 7 * 7 * 64])

    fc4 = tf.nn.relu(tf.matmul(conv3_flat, W_fc4) + b_fc4)

    fc5 = tf.matmul(fc4, W_fc5) + b_fc5

    # return input and output to the network
    return s, fc5



# deep q network. feed in pixel data to graph session
def trainGraph(inp, out, sess):

    # to calculate the argmax, we multiply the predicted output with a vector
    # with one value 1 and rest as 0
    argmax = tf.placeholder("float", [None, ACTIONS])
    gt = tf.placeholder("float", [None])  # ground truth

    # action
    action = tf.reduce_sum(tf.multiply(out, argmax), reduction_indices=1)
    # cost function we will reduce through backpropagation
    cost = tf.reduce_mean(tf.square(action - gt))
    # optimization function to reduce our minimize our cost function
    train_step = tf.train.AdamOptimizer(1e-6).minimize(cost)

    # initialize our game
    game = PongGame()

    # create a queue for experience replay to store policies
    D = deque()

    # intial frame
    frame = game.getPresentFrame()
    # convert rgb to gray scale for processing

    # stack frames, that is our input tensor
    inp_t = np.stack((frame, frame, frame, frame), axis=2)

    # saver
    saver = tf.train.Saver()

    sess.run(tf.global_variables_initializer())

    t = 0
    epsilon = INITIAL_EPSILON

    # training time
    while(1):
        # output tensor
        out_t = out.eval(feed_dict={inp: [inp_t]})[0]
        # argmax function
        argmax_t = np.zeros([ACTIONS])

        # random action with prob epsilon
        if(random.random() <= epsilon):
            maxIndex = random.randrange(ACTIONS)
        # predicted action with prob (1 - epsilon)
        else:
            maxIndex = np.argmax(out_t)
        argmax_t[maxIndex] = 1

        if epsilon > FINAL_EPSILON:
            epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE

        # reward tensor if score is positive
        reward_t, frame = game.getNextFrame(argmax_t)

        frame = np.reshape(frame, (INPUT_SIZE, INPUT_SIZE, 1))
        
        # new input tensor
        inp_t1 = np.append(frame, inp_t[:, :, 0:3], axis=2)

        # add our input tensor, argmax tensor, reward and updated input tensor
        # to stack of experiences
        D.append((inp_t, argmax_t, reward_t, inp_t1))

        # if we run out of replay memory, make room
        if len(D) > REPLAY_MEMORY:
            D.popleft()

        # training iteration
        if t > OBSERVE:

            # get values from our replay memory
            minibatch = random.sample(D, BATCH)

            inp_batch = [d[0] for d in minibatch]
            argmax_batch = [d[1] for d in minibatch]
            reward_batch = [d[2] for d in minibatch]
            inp_t1_batch = [d[3] for d in minibatch]

            gt_batch = []
            out_batch = out.eval(feed_dict={inp: inp_t1_batch})

            # add values to our batch
            for i in range(0, len(minibatch)):
                gt_batch.append(reward_batch[i] + GAMMA * np.max(out_batch[i]))

            # train on that
            train_step.run(feed_dict={
                           gt: gt_batch,
                           argmax: argmax_batch,
                           inp: inp_batch
                           })

        # update our input tensor the the next frame
        inp_t = inp_t1
        t = t+1

        # print our where wer are after saving where we are
        if t % 10000 == 0:
            saver.save(sess, './' + 'pong' + '-dqn', global_step=t)

        print("TIMESTEP", t, "/ EPSILON", epsilon, "/ ACTION", maxIndex,
              "/ REWARD", reward_t, "/ Q_MAX %e" % np.max(out_t))


def main():
    # create session
    sess = tf.InteractiveSession()
    # input layer and output layer by creating graph
    inp, out = createGraph()
    # train our graph on input and output with session variables
    trainGraph(inp, out, sess)

if __name__ == "__main__":
    main()

Tally is 0
TIMESTEP 1 / EPSILON 0.9999981 / ACTION 0 / REWARD 0 / Q_MAX -1.096599e+12
Tally is 0
TIMESTEP 2 / EPSILON 0.9999962 / ACTION 0 / REWARD 0 / Q_MAX -6.037113e+11
Tally is 0
TIMESTEP 3 / EPSILON 0.9999943 / ACTION 2 / REWARD 0 / Q_MAX -2.538163e+11
Tally is 0
TIMESTEP 4 / EPSILON 0.9999924 / ACTION 1 / REWARD 0 / Q_MAX 2.830074e+11
Tally is 0
TIMESTEP 5 / EPSILON 0.9999905 / ACTION 1 / REWARD 0 / Q_MAX -1.633174e+11
Tally is 0
TIMESTEP 6 / EPSILON 0.9999886 / ACTION 2 / REWARD 0 / Q_MAX 1.548097e+12
Tally is 0
TIMESTEP 7 / EPSILON 0.9999867 / ACTION 2 / REWARD 0 / Q_MAX 1.111687e+12
Tally is 0
TIMESTEP 8 / EPSILON 0.9999848 / ACTION 1 / REWARD 0 / Q_MAX 1.584131e+12
Tally is 0
TIMESTEP 9 / EPSILON 0.9999829 / ACTION 2 / REWARD 0 / Q_MAX 1.236410e+12
Tally is 0
TIMESTEP 10 / EPSILON 0.999981 / ACTION 1 / REWARD 0 / Q_MAX -1.052681e+12
Tally is 0
TIMESTEP 11 / EPSILON 0.9999791 / ACTION 0 / REWARD 0 / Q_MAX -1.912115e+12
Tally is 0
TIMESTEP 12 / EPSILON 0.9999772 / ACTION 0 / RE

TIMESTEP 118 / EPSILON 0.9997758000000001 / ACTION 2 / REWARD 0 / Q_MAX 7.418386e+11
Tally is -1
TIMESTEP 119 / EPSILON 0.9997739000000001 / ACTION 2 / REWARD 0 / Q_MAX 1.657658e+12
Tally is -1
TIMESTEP 120 / EPSILON 0.9997720000000001 / ACTION 2 / REWARD 0 / Q_MAX 6.696868e+11
Tally is -1
TIMESTEP 121 / EPSILON 0.9997701000000001 / ACTION 1 / REWARD 0 / Q_MAX 1.767578e+12
Tally is -1
TIMESTEP 122 / EPSILON 0.9997682000000001 / ACTION 0 / REWARD 0 / Q_MAX 2.705456e+11
Tally is -1
TIMESTEP 123 / EPSILON 0.9997663000000001 / ACTION 1 / REWARD 0 / Q_MAX 6.074151e+11
Tally is -1
TIMESTEP 124 / EPSILON 0.9997644000000001 / ACTION 2 / REWARD 0 / Q_MAX 4.862117e+11
Tally is -1
TIMESTEP 125 / EPSILON 0.9997625000000001 / ACTION 2 / REWARD 0 / Q_MAX 2.618701e+11
Tally is -1
TIMESTEP 126 / EPSILON 0.9997606000000001 / ACTION 1 / REWARD 0 / Q_MAX 1.764291e+12
Tally is -1
TIMESTEP 127 / EPSILON 0.9997587000000001 / ACTION 2 / REWARD 0 / Q_MAX 1.026607e+12
Tally is -1
TIMESTEP 128 / EPSILON 0.99975

Tally is -1
TIMESTEP 214 / EPSILON 0.9995934000000002 / ACTION 0 / REWARD 0 / Q_MAX -7.318311e+11
Tally is -1
TIMESTEP 215 / EPSILON 0.9995915000000002 / ACTION 0 / REWARD 0 / Q_MAX -1.840837e+12
Tally is -1
TIMESTEP 216 / EPSILON 0.9995896000000002 / ACTION 0 / REWARD 0 / Q_MAX -2.715413e+12
Tally is -1
TIMESTEP 217 / EPSILON 0.9995877000000002 / ACTION 0 / REWARD 0 / Q_MAX -1.941635e+12
Tally is -1
TIMESTEP 218 / EPSILON 0.9995858000000002 / ACTION 2 / REWARD 0 / Q_MAX -1.707902e+12
Tally is -1
TIMESTEP 219 / EPSILON 0.9995839000000002 / ACTION 0 / REWARD 0 / Q_MAX -1.751903e+12
Tally is -1
TIMESTEP 220 / EPSILON 0.9995820000000002 / ACTION 0 / REWARD 0 / Q_MAX -4.219571e+11
Tally is -1
TIMESTEP 221 / EPSILON 0.9995801000000002 / ACTION 1 / REWARD 0 / Q_MAX -2.784673e+12
Tally is -1
TIMESTEP 222 / EPSILON 0.9995782000000002 / ACTION 1 / REWARD 0 / Q_MAX 2.039669e+10
Tally is -1
TIMESTEP 223 / EPSILON 0.9995763000000002 / ACTION 2 / REWARD 0 / Q_MAX 6.233755e+11
Tally is -1
TIMESTEP 2

TIMESTEP 311 / EPSILON 0.9994091000000003 / ACTION 2 / REWARD 0 / Q_MAX 4.595987e+11
Tally is -1
TIMESTEP 312 / EPSILON 0.9994072000000003 / ACTION 2 / REWARD 0 / Q_MAX 6.575646e+11
Tally is -1
TIMESTEP 313 / EPSILON 0.9994053000000003 / ACTION 1 / REWARD 0 / Q_MAX 7.634502e+11
Tally is -1
TIMESTEP 314 / EPSILON 0.9994034000000003 / ACTION 2 / REWARD 0 / Q_MAX 2.543465e+11
Tally is -1
TIMESTEP 315 / EPSILON 0.9994015000000003 / ACTION 0 / REWARD 0 / Q_MAX 2.771326e+11
Tally is -1
TIMESTEP 316 / EPSILON 0.9993996000000003 / ACTION 1 / REWARD 0 / Q_MAX 3.084241e+11
Tally is -1
TIMESTEP 317 / EPSILON 0.9993977000000003 / ACTION 1 / REWARD 0 / Q_MAX 2.281467e+11
Tally is -1
TIMESTEP 318 / EPSILON 0.9993958000000003 / ACTION 1 / REWARD 0 / Q_MAX 5.366816e+11
Tally is -2
TIMESTEP 319 / EPSILON 0.9993939000000003 / ACTION 0 / REWARD -1 / Q_MAX 2.779084e+11
Tally is -2
TIMESTEP 320 / EPSILON 0.9993920000000003 / ACTION 0 / REWARD 0 / Q_MAX 6.840377e+10
Tally is -2
TIMESTEP 321 / EPSILON 0.9993

TIMESTEP 400 / EPSILON 0.9992400000000004 / ACTION 1 / REWARD 0 / Q_MAX 1.731099e+12
Tally is -2
TIMESTEP 401 / EPSILON 0.9992381000000004 / ACTION 2 / REWARD 0 / Q_MAX 1.960690e+12
Tally is -2
TIMESTEP 402 / EPSILON 0.9992362000000004 / ACTION 0 / REWARD 0 / Q_MAX 8.411783e+11
Tally is -2
TIMESTEP 403 / EPSILON 0.9992343000000004 / ACTION 1 / REWARD 0 / Q_MAX 6.496329e+11
Tally is -2
TIMESTEP 404 / EPSILON 0.9992324000000004 / ACTION 2 / REWARD 0 / Q_MAX 2.485543e+12
Tally is -2
TIMESTEP 405 / EPSILON 0.9992305000000004 / ACTION 2 / REWARD 0 / Q_MAX 2.690676e+12
Tally is -2
TIMESTEP 406 / EPSILON 0.9992286000000004 / ACTION 0 / REWARD 0 / Q_MAX 5.286725e+11
Tally is -2
TIMESTEP 407 / EPSILON 0.9992267000000004 / ACTION 0 / REWARD 0 / Q_MAX 7.848024e+11
Tally is -2
TIMESTEP 408 / EPSILON 0.9992248000000004 / ACTION 0 / REWARD 0 / Q_MAX 7.962672e+11
Tally is -2
TIMESTEP 409 / EPSILON 0.9992229000000004 / ACTION 0 / REWARD 0 / Q_MAX 1.989923e+12
Tally is -2
TIMESTEP 410 / EPSILON 0.99922

TIMESTEP 494 / EPSILON 0.9990614000000004 / ACTION 1 / REWARD 0 / Q_MAX 2.349583e+12
Tally is -2
TIMESTEP 495 / EPSILON 0.9990595000000004 / ACTION 2 / REWARD 0 / Q_MAX 1.812715e+12
Tally is -2
TIMESTEP 496 / EPSILON 0.9990576000000004 / ACTION 1 / REWARD 0 / Q_MAX 9.446541e+11
Tally is -2
TIMESTEP 497 / EPSILON 0.9990557000000004 / ACTION 1 / REWARD 0 / Q_MAX 1.057327e+12
Tally is -2
TIMESTEP 498 / EPSILON 0.9990538000000004 / ACTION 2 / REWARD 0 / Q_MAX 1.362893e+12
Tally is -2
TIMESTEP 499 / EPSILON 0.9990519000000004 / ACTION 1 / REWARD 0 / Q_MAX 8.366660e+11
Tally is -2
TIMESTEP 500 / EPSILON 0.9990500000000004 / ACTION 1 / REWARD 0 / Q_MAX 2.722764e+11
Tally is -2
TIMESTEP 501 / EPSILON 0.9990481000000004 / ACTION 0 / REWARD 0 / Q_MAX 2.216350e+11
Tally is -2
TIMESTEP 502 / EPSILON 0.9990462000000004 / ACTION 0 / REWARD 0 / Q_MAX 4.702650e+11
Tally is -2
TIMESTEP 503 / EPSILON 0.9990443000000004 / ACTION 1 / REWARD 0 / Q_MAX 4.967070e+11
Tally is -2
TIMESTEP 504 / EPSILON 0.99904

TIMESTEP 581 / EPSILON 0.9988961000000005 / ACTION 1 / REWARD 0 / Q_MAX 1.358194e+12
Tally is -3
TIMESTEP 582 / EPSILON 0.9988942000000005 / ACTION 0 / REWARD 0 / Q_MAX 1.676619e+12
Tally is -3
TIMESTEP 583 / EPSILON 0.9988923000000005 / ACTION 0 / REWARD 0 / Q_MAX 1.510320e+12
Tally is -3
TIMESTEP 584 / EPSILON 0.9988904000000005 / ACTION 1 / REWARD 0 / Q_MAX 1.701930e+12
Tally is -3
TIMESTEP 585 / EPSILON 0.9988885000000005 / ACTION 0 / REWARD 0 / Q_MAX 1.689574e+12
Tally is -3
TIMESTEP 586 / EPSILON 0.9988866000000005 / ACTION 2 / REWARD 0 / Q_MAX 2.321589e+12
Tally is -3
TIMESTEP 587 / EPSILON 0.9988847000000005 / ACTION 0 / REWARD 0 / Q_MAX 2.990175e+12
Tally is -3
TIMESTEP 588 / EPSILON 0.9988828000000005 / ACTION 1 / REWARD 0 / Q_MAX 2.875061e+12
Tally is -3
TIMESTEP 589 / EPSILON 0.9988809000000005 / ACTION 0 / REWARD 0 / Q_MAX 2.817757e+12
Tally is -3
TIMESTEP 590 / EPSILON 0.9988790000000005 / ACTION 1 / REWARD 0 / Q_MAX 1.349184e+12
Tally is -3
TIMESTEP 591 / EPSILON 0.99887

Tally is -3
TIMESTEP 674 / EPSILON 0.9987194000000006 / ACTION 0 / REWARD 0 / Q_MAX 2.764246e+12
Tally is -3
TIMESTEP 675 / EPSILON 0.9987175000000006 / ACTION 0 / REWARD 0 / Q_MAX 1.293179e+12
Tally is -3
TIMESTEP 676 / EPSILON 0.9987156000000006 / ACTION 2 / REWARD 0 / Q_MAX 1.492660e+12
Tally is -3
TIMESTEP 677 / EPSILON 0.9987137000000006 / ACTION 2 / REWARD 0 / Q_MAX 3.942700e+11
Tally is -3
TIMESTEP 678 / EPSILON 0.9987118000000006 / ACTION 0 / REWARD 0 / Q_MAX 5.208708e+11
Tally is -3
TIMESTEP 679 / EPSILON 0.9987099000000006 / ACTION 2 / REWARD 0 / Q_MAX 5.215343e+11
Tally is -3
TIMESTEP 680 / EPSILON 0.9987080000000006 / ACTION 1 / REWARD 0 / Q_MAX 4.961618e+11
Tally is -3
TIMESTEP 681 / EPSILON 0.9987061000000006 / ACTION 0 / REWARD 0 / Q_MAX 1.263752e+12
Tally is -3
TIMESTEP 682 / EPSILON 0.9987042000000006 / ACTION 2 / REWARD 0 / Q_MAX 1.263941e+12
Tally is -3
TIMESTEP 683 / EPSILON 0.9987023000000006 / ACTION 1 / REWARD 0 / Q_MAX 7.328417e+11
Tally is -3
TIMESTEP 684 / EPS

Tally is -3
TIMESTEP 787 / EPSILON 0.9985047000000007 / ACTION 0 / REWARD 0 / Q_MAX 4.698901e+12
Tally is -3
TIMESTEP 788 / EPSILON 0.9985028000000007 / ACTION 2 / REWARD 0 / Q_MAX 3.368034e+12
Tally is -3
TIMESTEP 789 / EPSILON 0.9985009000000007 / ACTION 2 / REWARD 0 / Q_MAX 3.798680e+12
Tally is -3
TIMESTEP 790 / EPSILON 0.9984990000000007 / ACTION 2 / REWARD 0 / Q_MAX 2.860705e+12
Tally is -3
TIMESTEP 791 / EPSILON 0.9984971000000007 / ACTION 1 / REWARD 0 / Q_MAX 2.922175e+12
Tally is -3
TIMESTEP 792 / EPSILON 0.9984952000000007 / ACTION 0 / REWARD 0 / Q_MAX 4.221881e+12
Tally is -3
TIMESTEP 793 / EPSILON 0.9984933000000007 / ACTION 0 / REWARD 0 / Q_MAX 3.470359e+12
Tally is -3
TIMESTEP 794 / EPSILON 0.9984914000000007 / ACTION 1 / REWARD 0 / Q_MAX 3.954442e+12
Tally is -3
TIMESTEP 795 / EPSILON 0.9984895000000007 / ACTION 2 / REWARD 0 / Q_MAX 3.477520e+12
Tally is -3
TIMESTEP 796 / EPSILON 0.9984876000000007 / ACTION 0 / REWARD 0 / Q_MAX 3.311849e+12
Tally is -3
TIMESTEP 797 / EPS

Tally is -3
TIMESTEP 872 / EPSILON 0.9983432000000008 / ACTION 1 / REWARD 0 / Q_MAX 3.944238e+12
Tally is -3
TIMESTEP 873 / EPSILON 0.9983413000000008 / ACTION 0 / REWARD 0 / Q_MAX 5.168869e+12
Tally is -3
TIMESTEP 874 / EPSILON 0.9983394000000008 / ACTION 2 / REWARD 0 / Q_MAX 4.247794e+12
Tally is -3
TIMESTEP 875 / EPSILON 0.9983375000000008 / ACTION 2 / REWARD 0 / Q_MAX 6.591957e+12
Tally is -3
TIMESTEP 876 / EPSILON 0.9983356000000008 / ACTION 0 / REWARD 0 / Q_MAX 5.827950e+12
Tally is -3
TIMESTEP 877 / EPSILON 0.9983337000000008 / ACTION 0 / REWARD 0 / Q_MAX 3.603904e+12
Tally is -3
TIMESTEP 878 / EPSILON 0.9983318000000008 / ACTION 0 / REWARD 0 / Q_MAX 5.076018e+12
Tally is -3
TIMESTEP 879 / EPSILON 0.9983299000000008 / ACTION 0 / REWARD 0 / Q_MAX 5.033187e+12
Tally is -3
TIMESTEP 880 / EPSILON 0.9983280000000008 / ACTION 1 / REWARD 0 / Q_MAX 3.641066e+12
Tally is -3
TIMESTEP 881 / EPSILON 0.9983261000000008 / ACTION 1 / REWARD 0 / Q_MAX 3.417646e+12
Tally is -3
TIMESTEP 882 / EPS

TIMESTEP 972 / EPSILON 0.9981532000000009 / ACTION 0 / REWARD 0 / Q_MAX -9.996138e+11
Tally is -3
TIMESTEP 973 / EPSILON 0.9981513000000009 / ACTION 0 / REWARD 0 / Q_MAX 7.573977e+11
Tally is -3
TIMESTEP 974 / EPSILON 0.9981494000000009 / ACTION 2 / REWARD 0 / Q_MAX 1.916959e+12
Tally is -3
TIMESTEP 975 / EPSILON 0.9981475000000009 / ACTION 0 / REWARD 0 / Q_MAX 2.377238e+12
Tally is -3
TIMESTEP 976 / EPSILON 0.9981456000000009 / ACTION 1 / REWARD 0 / Q_MAX 3.254317e+12
Tally is -3
TIMESTEP 977 / EPSILON 0.9981437000000009 / ACTION 2 / REWARD 0 / Q_MAX 2.341767e+12
Tally is -3
TIMESTEP 978 / EPSILON 0.9981418000000009 / ACTION 1 / REWARD 0 / Q_MAX 2.445304e+12
Tally is -3
TIMESTEP 979 / EPSILON 0.9981399000000009 / ACTION 1 / REWARD 0 / Q_MAX 1.041150e+12
Tally is -3
TIMESTEP 980 / EPSILON 0.9981380000000009 / ACTION 1 / REWARD 0 / Q_MAX 1.517872e+12
Tally is -3
TIMESTEP 981 / EPSILON 0.9981361000000009 / ACTION 1 / REWARD 0 / Q_MAX 1.767947e+12
Tally is -3
TIMESTEP 982 / EPSILON 0.9981

TIMESTEP 1075 / EPSILON 0.9979575000000009 / ACTION 0 / REWARD 0 / Q_MAX 1.310698e+11
Tally is -4
TIMESTEP 1076 / EPSILON 0.9979556000000009 / ACTION 2 / REWARD 0 / Q_MAX 1.908917e+11
Tally is -4
TIMESTEP 1077 / EPSILON 0.9979537000000009 / ACTION 2 / REWARD 0 / Q_MAX 1.233615e+11
Tally is -4
TIMESTEP 1078 / EPSILON 0.9979518000000009 / ACTION 0 / REWARD 0 / Q_MAX 5.795431e+11
Tally is -4
TIMESTEP 1079 / EPSILON 0.997949900000001 / ACTION 0 / REWARD 0 / Q_MAX 1.292951e+12
Tally is -4
TIMESTEP 1080 / EPSILON 0.997948000000001 / ACTION 1 / REWARD 0 / Q_MAX 1.386140e+12
Tally is -4
TIMESTEP 1081 / EPSILON 0.997946100000001 / ACTION 2 / REWARD 0 / Q_MAX 5.702163e+11
Tally is -4
TIMESTEP 1082 / EPSILON 0.997944200000001 / ACTION 1 / REWARD 0 / Q_MAX 9.268593e+11
Tally is -4
TIMESTEP 1083 / EPSILON 0.997942300000001 / ACTION 0 / REWARD 0 / Q_MAX -7.870517e+09
Tally is -4
TIMESTEP 1084 / EPSILON 0.997940400000001 / ACTION 1 / REWARD 0 / Q_MAX 2.007092e+11
Tally is -4
TIMESTEP 1085 / EPSILON 0

TIMESTEP 1170 / EPSILON 0.997777000000001 / ACTION 1 / REWARD 0 / Q_MAX 9.688776e+11
Tally is -4
TIMESTEP 1171 / EPSILON 0.997775100000001 / ACTION 1 / REWARD 0 / Q_MAX 7.346132e+11
Tally is -4
TIMESTEP 1172 / EPSILON 0.997773200000001 / ACTION 2 / REWARD 0 / Q_MAX 4.401969e+11
Tally is -4
TIMESTEP 1173 / EPSILON 0.997771300000001 / ACTION 2 / REWARD 0 / Q_MAX 1.140856e+12
Tally is -4
TIMESTEP 1174 / EPSILON 0.997769400000001 / ACTION 2 / REWARD 0 / Q_MAX 3.223673e+12
Tally is -4
TIMESTEP 1175 / EPSILON 0.997767500000001 / ACTION 2 / REWARD 0 / Q_MAX 2.914579e+12
Tally is -4
TIMESTEP 1176 / EPSILON 0.997765600000001 / ACTION 0 / REWARD 0 / Q_MAX 3.305624e+12
Tally is -4
TIMESTEP 1177 / EPSILON 0.997763700000001 / ACTION 1 / REWARD 0 / Q_MAX 1.796192e+12
Tally is -4
TIMESTEP 1178 / EPSILON 0.997761800000001 / ACTION 0 / REWARD 0 / Q_MAX 1.916838e+12
Tally is -4
TIMESTEP 1179 / EPSILON 0.997759900000001 / ACTION 0 / REWARD 0 / Q_MAX 1.305486e+12
Tally is -4
TIMESTEP 1180 / EPSILON 0.9977

TIMESTEP 1275 / EPSILON 0.9975775000000011 / ACTION 2 / REWARD 0 / Q_MAX 4.759319e+12
Tally is -4
TIMESTEP 1276 / EPSILON 0.9975756000000011 / ACTION 0 / REWARD 0 / Q_MAX 6.177585e+12
Tally is -4
TIMESTEP 1277 / EPSILON 0.9975737000000011 / ACTION 1 / REWARD 0 / Q_MAX 4.500377e+12
Tally is -4
TIMESTEP 1278 / EPSILON 0.9975718000000011 / ACTION 0 / REWARD 0 / Q_MAX 4.081463e+12
Tally is -4
TIMESTEP 1279 / EPSILON 0.9975699000000011 / ACTION 1 / REWARD 0 / Q_MAX 3.562895e+12
Tally is -4
TIMESTEP 1280 / EPSILON 0.9975680000000011 / ACTION 2 / REWARD 0 / Q_MAX 1.920474e+12
Tally is -4
TIMESTEP 1281 / EPSILON 0.9975661000000011 / ACTION 1 / REWARD 0 / Q_MAX 2.633610e+12
Tally is -4
TIMESTEP 1282 / EPSILON 0.9975642000000011 / ACTION 2 / REWARD 0 / Q_MAX 1.725268e+12
Tally is -4
TIMESTEP 1283 / EPSILON 0.9975623000000011 / ACTION 2 / REWARD 0 / Q_MAX 2.751692e+12
Tally is -4
TIMESTEP 1284 / EPSILON 0.9975604000000011 / ACTION 1 / REWARD 0 / Q_MAX 1.812409e+12
Tally is -4
TIMESTEP 1285 / EPSI

TIMESTEP 1388 / EPSILON 0.9973628000000012 / ACTION 2 / REWARD 0 / Q_MAX 7.251345e+11
Tally is -5
TIMESTEP 1389 / EPSILON 0.9973609000000012 / ACTION 0 / REWARD 0 / Q_MAX -1.960288e+11
Tally is -5
TIMESTEP 1390 / EPSILON 0.9973590000000012 / ACTION 0 / REWARD 0 / Q_MAX 4.813919e+11
Tally is -5
TIMESTEP 1391 / EPSILON 0.9973571000000012 / ACTION 1 / REWARD 0 / Q_MAX -2.393338e+11
Tally is -5
TIMESTEP 1392 / EPSILON 0.9973552000000012 / ACTION 0 / REWARD 0 / Q_MAX -1.088309e+11
Tally is -5
TIMESTEP 1393 / EPSILON 0.9973533000000012 / ACTION 0 / REWARD 0 / Q_MAX 7.882791e+11
Tally is -5
TIMESTEP 1394 / EPSILON 0.9973514000000012 / ACTION 0 / REWARD 0 / Q_MAX 1.032661e+12
Tally is -5
TIMESTEP 1395 / EPSILON 0.9973495000000012 / ACTION 0 / REWARD 0 / Q_MAX -4.608396e+11
Tally is -5
TIMESTEP 1396 / EPSILON 0.9973476000000012 / ACTION 1 / REWARD 0 / Q_MAX 1.917295e+12
Tally is -5
TIMESTEP 1397 / EPSILON 0.9973457000000012 / ACTION 2 / REWARD 0 / Q_MAX -4.274124e+11
Tally is -5
TIMESTEP 1398 /

Tally is -5
TIMESTEP 1495 / EPSILON 0.9971595000000013 / ACTION 0 / REWARD 0 / Q_MAX 6.168309e+11
Tally is -5
TIMESTEP 1496 / EPSILON 0.9971576000000013 / ACTION 1 / REWARD 0 / Q_MAX 9.683256e+11
Tally is -5
TIMESTEP 1497 / EPSILON 0.9971557000000013 / ACTION 1 / REWARD 0 / Q_MAX 4.634150e+11
Tally is -5
TIMESTEP 1498 / EPSILON 0.9971538000000013 / ACTION 0 / REWARD 0 / Q_MAX 8.391101e+10
Tally is -5
TIMESTEP 1499 / EPSILON 0.9971519000000013 / ACTION 2 / REWARD 0 / Q_MAX 1.410541e+12
Tally is -5
TIMESTEP 1500 / EPSILON 0.9971500000000013 / ACTION 1 / REWARD 0 / Q_MAX 5.141262e+11
Tally is -5
TIMESTEP 1501 / EPSILON 0.9971481000000013 / ACTION 1 / REWARD 0 / Q_MAX 1.415398e+11
Tally is -5
TIMESTEP 1502 / EPSILON 0.9971462000000013 / ACTION 0 / REWARD 0 / Q_MAX 1.289928e+12
Tally is -5
TIMESTEP 1503 / EPSILON 0.9971443000000013 / ACTION 2 / REWARD 0 / Q_MAX 1.988947e+11
Tally is -5
TIMESTEP 1504 / EPSILON 0.9971424000000013 / ACTION 0 / REWARD 0 / Q_MAX 3.110681e+11
Tally is -5
TIMESTEP

Tally is -6
TIMESTEP 1600 / EPSILON 0.9969600000000014 / ACTION 2 / REWARD 0 / Q_MAX 2.157611e+11
Tally is -6
TIMESTEP 1601 / EPSILON 0.9969581000000014 / ACTION 1 / REWARD 0 / Q_MAX -1.046249e+11
Tally is -6
TIMESTEP 1602 / EPSILON 0.9969562000000014 / ACTION 1 / REWARD 0 / Q_MAX 6.966335e+11
Tally is -6
TIMESTEP 1603 / EPSILON 0.9969543000000014 / ACTION 1 / REWARD 0 / Q_MAX 2.455289e+11
Tally is -6
TIMESTEP 1604 / EPSILON 0.9969524000000014 / ACTION 0 / REWARD 0 / Q_MAX -6.943450e+11
Tally is -6
TIMESTEP 1605 / EPSILON 0.9969505000000014 / ACTION 2 / REWARD 0 / Q_MAX -1.477960e+11
Tally is -6
TIMESTEP 1606 / EPSILON 0.9969486000000014 / ACTION 0 / REWARD 0 / Q_MAX 1.897844e+11
Tally is -6
TIMESTEP 1607 / EPSILON 0.9969467000000014 / ACTION 0 / REWARD 0 / Q_MAX 3.237064e+11
Tally is -6
TIMESTEP 1608 / EPSILON 0.9969448000000014 / ACTION 1 / REWARD 0 / Q_MAX -1.580193e+12
Tally is -6
TIMESTEP 1609 / EPSILON 0.9969429000000014 / ACTION 2 / REWARD 0 / Q_MAX -3.759251e+11
Tally is -6
TIM

Tally is -6
TIMESTEP 1708 / EPSILON 0.9967548000000015 / ACTION 1 / REWARD 0 / Q_MAX 1.275501e+12
Tally is -6
TIMESTEP 1709 / EPSILON 0.9967529000000015 / ACTION 1 / REWARD 0 / Q_MAX 1.022332e+12
Tally is -6
TIMESTEP 1710 / EPSILON 0.9967510000000015 / ACTION 1 / REWARD 0 / Q_MAX 7.959293e+11
Tally is -6
TIMESTEP 1711 / EPSILON 0.9967491000000015 / ACTION 0 / REWARD 0 / Q_MAX 6.757290e+11
Tally is -6
TIMESTEP 1712 / EPSILON 0.9967472000000015 / ACTION 0 / REWARD 0 / Q_MAX 4.770565e+11
Tally is -6
TIMESTEP 1713 / EPSILON 0.9967453000000015 / ACTION 2 / REWARD 0 / Q_MAX 9.954688e+11
Tally is -6
TIMESTEP 1714 / EPSILON 0.9967434000000015 / ACTION 2 / REWARD 0 / Q_MAX 1.859892e+12
Tally is -6
TIMESTEP 1715 / EPSILON 0.9967415000000015 / ACTION 1 / REWARD 0 / Q_MAX 1.692555e+12
Tally is -6
TIMESTEP 1716 / EPSILON 0.9967396000000015 / ACTION 1 / REWARD 0 / Q_MAX 1.349891e+12
Tally is -6
TIMESTEP 1717 / EPSILON 0.9967377000000015 / ACTION 0 / REWARD 0 / Q_MAX 2.127808e+12
Tally is -6
TIMESTEP

TIMESTEP 1819 / EPSILON 0.9965439000000016 / ACTION 0 / REWARD 0 / Q_MAX 3.202644e+12
Tally is -6
TIMESTEP 1820 / EPSILON 0.9965420000000016 / ACTION 2 / REWARD 0 / Q_MAX 2.354776e+12
Tally is -6
TIMESTEP 1821 / EPSILON 0.9965401000000016 / ACTION 1 / REWARD 0 / Q_MAX 1.555875e+12
Tally is -6
TIMESTEP 1822 / EPSILON 0.9965382000000016 / ACTION 0 / REWARD 0 / Q_MAX 2.242700e+12
Tally is -6
TIMESTEP 1823 / EPSILON 0.9965363000000016 / ACTION 0 / REWARD 0 / Q_MAX 3.545027e+12
Tally is -6
TIMESTEP 1824 / EPSILON 0.9965344000000016 / ACTION 2 / REWARD 0 / Q_MAX 2.089939e+12
Tally is -6
TIMESTEP 1825 / EPSILON 0.9965325000000016 / ACTION 0 / REWARD 0 / Q_MAX 1.748028e+12
Tally is -6
TIMESTEP 1826 / EPSILON 0.9965306000000016 / ACTION 1 / REWARD 0 / Q_MAX 1.312626e+12
Tally is -6
TIMESTEP 1827 / EPSILON 0.9965287000000016 / ACTION 0 / REWARD 0 / Q_MAX 9.020128e+11
Tally is -6
TIMESTEP 1828 / EPSILON 0.9965268000000016 / ACTION 1 / REWARD 0 / Q_MAX 1.091832e+12
Tally is -6
TIMESTEP 1829 / EPSI

TIMESTEP 1933 / EPSILON 0.9963273000000017 / ACTION 0 / REWARD 0 / Q_MAX 1.176527e+12
Tally is -7
TIMESTEP 1934 / EPSILON 0.9963254000000017 / ACTION 2 / REWARD 0 / Q_MAX 5.753466e+11
Tally is -7
TIMESTEP 1935 / EPSILON 0.9963235000000017 / ACTION 2 / REWARD 0 / Q_MAX 1.973977e+12
Tally is -7
TIMESTEP 1936 / EPSILON 0.9963216000000017 / ACTION 2 / REWARD 0 / Q_MAX 1.152330e+12
Tally is -7
TIMESTEP 1937 / EPSILON 0.9963197000000017 / ACTION 1 / REWARD 0 / Q_MAX 1.897372e+12
Tally is -7
TIMESTEP 1938 / EPSILON 0.9963178000000017 / ACTION 0 / REWARD 0 / Q_MAX 1.530682e+12
Tally is -7
TIMESTEP 1939 / EPSILON 0.9963159000000017 / ACTION 1 / REWARD 0 / Q_MAX 2.052744e+12
Tally is -7
TIMESTEP 1940 / EPSILON 0.9963140000000017 / ACTION 0 / REWARD 0 / Q_MAX 5.630395e+11
Tally is -7
TIMESTEP 1941 / EPSILON 0.9963121000000017 / ACTION 1 / REWARD 0 / Q_MAX 1.047788e+12
Tally is -7
TIMESTEP 1942 / EPSILON 0.9963102000000017 / ACTION 2 / REWARD 0 / Q_MAX 2.129915e+12
Tally is -7
TIMESTEP 1943 / EPSI

TIMESTEP 2046 / EPSILON 0.9961126000000018 / ACTION 2 / REWARD 0 / Q_MAX 2.174035e+12
Tally is -7
TIMESTEP 2047 / EPSILON 0.9961107000000018 / ACTION 0 / REWARD 0 / Q_MAX 6.895269e+11
Tally is -7
TIMESTEP 2048 / EPSILON 0.9961088000000018 / ACTION 1 / REWARD 0 / Q_MAX 1.291611e+12
Tally is -7
TIMESTEP 2049 / EPSILON 0.9961069000000018 / ACTION 0 / REWARD 0 / Q_MAX 1.519106e+12
Tally is -7
TIMESTEP 2050 / EPSILON 0.9961050000000018 / ACTION 2 / REWARD 0 / Q_MAX 2.192489e+12
Tally is -7
TIMESTEP 2051 / EPSILON 0.9961031000000018 / ACTION 1 / REWARD 0 / Q_MAX 1.693805e+12
Tally is -7
TIMESTEP 2052 / EPSILON 0.9961012000000018 / ACTION 1 / REWARD 0 / Q_MAX 2.082515e+12
Tally is -7
TIMESTEP 2053 / EPSILON 0.9960993000000018 / ACTION 1 / REWARD 0 / Q_MAX 1.850912e+12
Tally is -7
TIMESTEP 2054 / EPSILON 0.9960974000000018 / ACTION 2 / REWARD 0 / Q_MAX 2.004419e+12
Tally is -7
TIMESTEP 2055 / EPSILON 0.9960955000000018 / ACTION 0 / REWARD 0 / Q_MAX 2.036733e+12
Tally is -7
TIMESTEP 2056 / EPSI

TIMESTEP 2160 / EPSILON 0.9958960000000019 / ACTION 2 / REWARD 0 / Q_MAX 1.012493e+12
Tally is -8
TIMESTEP 2161 / EPSILON 0.9958941000000019 / ACTION 0 / REWARD 0 / Q_MAX 1.225476e+12
Tally is -8
TIMESTEP 2162 / EPSILON 0.9958922000000019 / ACTION 1 / REWARD 0 / Q_MAX 3.086641e+11
Tally is -8
TIMESTEP 2163 / EPSILON 0.9958903000000019 / ACTION 2 / REWARD 0 / Q_MAX 9.235397e+11
Tally is -8
TIMESTEP 2164 / EPSILON 0.9958884000000019 / ACTION 2 / REWARD 0 / Q_MAX 1.600046e+12
Tally is -8
TIMESTEP 2165 / EPSILON 0.9958865000000019 / ACTION 0 / REWARD 0 / Q_MAX 5.970507e+11
Tally is -8
TIMESTEP 2166 / EPSILON 0.9958846000000019 / ACTION 0 / REWARD 0 / Q_MAX 1.347081e+12
Tally is -8
TIMESTEP 2167 / EPSILON 0.9958827000000019 / ACTION 0 / REWARD 0 / Q_MAX 3.397305e+11
Tally is -8
TIMESTEP 2168 / EPSILON 0.9958808000000019 / ACTION 1 / REWARD 0 / Q_MAX 2.281794e+12
Tally is -8
TIMESTEP 2169 / EPSILON 0.9958789000000019 / ACTION 0 / REWARD 0 / Q_MAX 2.812235e+12
Tally is -8
TIMESTEP 2170 / EPSI

Tally is -8
TIMESTEP 2269 / EPSILON 0.995688900000002 / ACTION 2 / REWARD 0 / Q_MAX 1.710910e+12
Tally is -8
TIMESTEP 2270 / EPSILON 0.995687000000002 / ACTION 1 / REWARD 0 / Q_MAX 8.181765e+11
Tally is -8
TIMESTEP 2271 / EPSILON 0.995685100000002 / ACTION 1 / REWARD 0 / Q_MAX 1.724680e+12
Tally is -8
TIMESTEP 2272 / EPSILON 0.995683200000002 / ACTION 1 / REWARD 0 / Q_MAX 2.164033e+12
Tally is -8
TIMESTEP 2273 / EPSILON 0.995681300000002 / ACTION 0 / REWARD 0 / Q_MAX 2.478441e+12
Tally is -8
TIMESTEP 2274 / EPSILON 0.995679400000002 / ACTION 0 / REWARD 0 / Q_MAX 2.592709e+12
Tally is -8
TIMESTEP 2275 / EPSILON 0.995677500000002 / ACTION 2 / REWARD 0 / Q_MAX 3.035751e+12
Tally is -8
TIMESTEP 2276 / EPSILON 0.995675600000002 / ACTION 0 / REWARD 0 / Q_MAX 2.396902e+12
Tally is -8
TIMESTEP 2277 / EPSILON 0.995673700000002 / ACTION 1 / REWARD 0 / Q_MAX 2.362045e+12
Tally is -8
TIMESTEP 2278 / EPSILON 0.995671800000002 / ACTION 1 / REWARD 0 / Q_MAX 1.889650e+12
Tally is -8
TIMESTEP 2279 / EP

Tally is -9
TIMESTEP 2385 / EPSILON 0.9954685000000021 / ACTION 2 / REWARD 0 / Q_MAX 3.353218e+12
Tally is -9
TIMESTEP 2386 / EPSILON 0.9954666000000021 / ACTION 2 / REWARD 0 / Q_MAX 2.059952e+12
Tally is -9
TIMESTEP 2387 / EPSILON 0.9954647000000021 / ACTION 0 / REWARD 0 / Q_MAX 3.644218e+12
Tally is -9
TIMESTEP 2388 / EPSILON 0.9954628000000021 / ACTION 2 / REWARD 0 / Q_MAX 2.622647e+12
Tally is -9
TIMESTEP 2389 / EPSILON 0.9954609000000021 / ACTION 0 / REWARD 0 / Q_MAX 1.160789e+12
Tally is -9
TIMESTEP 2390 / EPSILON 0.9954590000000021 / ACTION 1 / REWARD 0 / Q_MAX 1.143174e+12
Tally is -9
TIMESTEP 2391 / EPSILON 0.9954571000000021 / ACTION 0 / REWARD 0 / Q_MAX 1.667086e+12
Tally is -9
TIMESTEP 2392 / EPSILON 0.9954552000000021 / ACTION 2 / REWARD 0 / Q_MAX 2.069093e+12
Tally is -9
TIMESTEP 2393 / EPSILON 0.9954533000000021 / ACTION 0 / REWARD 0 / Q_MAX 3.781264e+12
Tally is -9
TIMESTEP 2394 / EPSILON 0.9954514000000021 / ACTION 0 / REWARD 0 / Q_MAX 4.555757e+12
Tally is -9
TIMESTEP

Tally is -9
TIMESTEP 2498 / EPSILON 0.9952538000000022 / ACTION 1 / REWARD 0 / Q_MAX 1.837144e+10
Tally is -9
TIMESTEP 2499 / EPSILON 0.9952519000000022 / ACTION 0 / REWARD 0 / Q_MAX 1.817730e+12
Tally is -9
TIMESTEP 2500 / EPSILON 0.9952500000000022 / ACTION 1 / REWARD 0 / Q_MAX 2.363147e+12
Tally is -9
TIMESTEP 2501 / EPSILON 0.9952481000000022 / ACTION 1 / REWARD 0 / Q_MAX 2.463471e+12
Tally is -9
TIMESTEP 2502 / EPSILON 0.9952462000000022 / ACTION 0 / REWARD 0 / Q_MAX 4.299648e+12
Tally is -9
TIMESTEP 2503 / EPSILON 0.9952443000000022 / ACTION 0 / REWARD 0 / Q_MAX 2.742459e+12
Tally is -9
TIMESTEP 2504 / EPSILON 0.9952424000000022 / ACTION 2 / REWARD 0 / Q_MAX 1.340067e+12
Tally is -9
TIMESTEP 2505 / EPSILON 0.9952405000000022 / ACTION 1 / REWARD 0 / Q_MAX 1.397718e+12
Tally is -9
TIMESTEP 2506 / EPSILON 0.9952386000000022 / ACTION 0 / REWARD 0 / Q_MAX 7.209576e+11
Tally is -9
TIMESTEP 2507 / EPSILON 0.9952367000000022 / ACTION 2 / REWARD 0 / Q_MAX 1.222115e+10
Tally is -9
TIMESTEP

Tally is -10
TIMESTEP 2609 / EPSILON 0.9950429000000023 / ACTION 2 / REWARD 0 / Q_MAX 3.399566e+12
Tally is -10
TIMESTEP 2610 / EPSILON 0.9950410000000023 / ACTION 2 / REWARD 0 / Q_MAX 4.570225e+12
Tally is -10
TIMESTEP 2611 / EPSILON 0.9950391000000023 / ACTION 2 / REWARD 0 / Q_MAX 4.260058e+12
Tally is -10
TIMESTEP 2612 / EPSILON 0.9950372000000023 / ACTION 1 / REWARD 0 / Q_MAX 3.721543e+12
Tally is -10
TIMESTEP 2613 / EPSILON 0.9950353000000023 / ACTION 0 / REWARD 0 / Q_MAX 4.001412e+12
Tally is -10
TIMESTEP 2614 / EPSILON 0.9950334000000023 / ACTION 0 / REWARD 0 / Q_MAX 4.547141e+12
Tally is -10
TIMESTEP 2615 / EPSILON 0.9950315000000023 / ACTION 1 / REWARD 0 / Q_MAX 4.047246e+12
Tally is -10
TIMESTEP 2616 / EPSILON 0.9950296000000023 / ACTION 0 / REWARD 0 / Q_MAX 4.855319e+12
Tally is -10
TIMESTEP 2617 / EPSILON 0.9950277000000023 / ACTION 2 / REWARD 0 / Q_MAX 6.110328e+12
Tally is -10
TIMESTEP 2618 / EPSILON 0.9950258000000023 / ACTION 1 / REWARD 0 / Q_MAX 7.027011e+12
Tally is -

Tally is -10
TIMESTEP 2722 / EPSILON 0.9948282000000024 / ACTION 2 / REWARD 0 / Q_MAX 3.197568e+12
Tally is -10
TIMESTEP 2723 / EPSILON 0.9948263000000024 / ACTION 0 / REWARD 0 / Q_MAX 2.675984e+12
Tally is -10
TIMESTEP 2724 / EPSILON 0.9948244000000024 / ACTION 1 / REWARD 0 / Q_MAX 2.900100e+12
Tally is -10
TIMESTEP 2725 / EPSILON 0.9948225000000024 / ACTION 2 / REWARD 0 / Q_MAX 2.880317e+12
Tally is -10
TIMESTEP 2726 / EPSILON 0.9948206000000024 / ACTION 1 / REWARD 0 / Q_MAX 3.102842e+12
Tally is -10
TIMESTEP 2727 / EPSILON 0.9948187000000024 / ACTION 1 / REWARD 0 / Q_MAX 3.438905e+12
Tally is -10
TIMESTEP 2728 / EPSILON 0.9948168000000024 / ACTION 1 / REWARD 0 / Q_MAX 3.744741e+12
Tally is -10
TIMESTEP 2729 / EPSILON 0.9948149000000024 / ACTION 2 / REWARD 0 / Q_MAX 4.134216e+12
Tally is -10
TIMESTEP 2730 / EPSILON 0.9948130000000024 / ACTION 0 / REWARD 0 / Q_MAX 3.846385e+12
Tally is -10
TIMESTEP 2731 / EPSILON 0.9948111000000024 / ACTION 2 / REWARD 0 / Q_MAX 3.331347e+12
Tally is -

Tally is -10
TIMESTEP 2823 / EPSILON 0.9946363000000025 / ACTION 0 / REWARD 0 / Q_MAX 2.412756e+12
Tally is -10
TIMESTEP 2824 / EPSILON 0.9946344000000025 / ACTION 0 / REWARD 0 / Q_MAX 1.027461e+12
Tally is -10
TIMESTEP 2825 / EPSILON 0.9946325000000025 / ACTION 1 / REWARD 0 / Q_MAX 1.668773e+12
Tally is -10
TIMESTEP 2826 / EPSILON 0.9946306000000025 / ACTION 0 / REWARD 0 / Q_MAX 1.824567e+12
Tally is -10
TIMESTEP 2827 / EPSILON 0.9946287000000025 / ACTION 0 / REWARD 0 / Q_MAX 2.354722e+12
Tally is -10
TIMESTEP 2828 / EPSILON 0.9946268000000025 / ACTION 1 / REWARD 0 / Q_MAX 3.756097e+12
Tally is -10
TIMESTEP 2829 / EPSILON 0.9946249000000025 / ACTION 1 / REWARD 0 / Q_MAX 1.318010e+12
Tally is -10
TIMESTEP 2830 / EPSILON 0.9946230000000025 / ACTION 1 / REWARD 0 / Q_MAX 1.774323e+12
Tally is -10
TIMESTEP 2831 / EPSILON 0.9946211000000025 / ACTION 0 / REWARD 0 / Q_MAX 2.486785e+12
Tally is -10
TIMESTEP 2832 / EPSILON 0.9946192000000025 / ACTION 0 / REWARD 0 / Q_MAX 2.057262e+12
Tally is -

TIMESTEP 2928 / EPSILON 0.9944368000000026 / ACTION 1 / REWARD 0 / Q_MAX 1.778989e+12
Tally is -11
TIMESTEP 2929 / EPSILON 0.9944349000000026 / ACTION 0 / REWARD 0 / Q_MAX 2.477112e+12
Tally is -11
TIMESTEP 2930 / EPSILON 0.9944330000000026 / ACTION 2 / REWARD 0 / Q_MAX 2.688828e+12
Tally is -11
TIMESTEP 2931 / EPSILON 0.9944311000000026 / ACTION 1 / REWARD 0 / Q_MAX 1.647332e+12
Tally is -11
TIMESTEP 2932 / EPSILON 0.9944292000000026 / ACTION 0 / REWARD 0 / Q_MAX -5.000758e+11
Tally is -11
TIMESTEP 2933 / EPSILON 0.9944273000000026 / ACTION 1 / REWARD 0 / Q_MAX 5.529200e+10
Tally is -11
TIMESTEP 2934 / EPSILON 0.9944254000000026 / ACTION 1 / REWARD 0 / Q_MAX 1.981404e+12
Tally is -11
TIMESTEP 2935 / EPSILON 0.9944235000000026 / ACTION 0 / REWARD 0 / Q_MAX -1.169052e+12
Tally is -11
TIMESTEP 2936 / EPSILON 0.9944216000000026 / ACTION 0 / REWARD 0 / Q_MAX -1.331506e+12
Tally is -11
TIMESTEP 2937 / EPSILON 0.9944197000000026 / ACTION 1 / REWARD 0 / Q_MAX -1.419178e+12
Tally is -11
TIMEST

TIMESTEP 3044 / EPSILON 0.9942164000000027 / ACTION 0 / REWARD 0 / Q_MAX 2.218447e+12
Tally is -11
TIMESTEP 3045 / EPSILON 0.9942145000000027 / ACTION 0 / REWARD 0 / Q_MAX 1.764314e+12
Tally is -11
TIMESTEP 3046 / EPSILON 0.9942126000000027 / ACTION 0 / REWARD 0 / Q_MAX 2.333183e+12
Tally is -11
TIMESTEP 3047 / EPSILON 0.9942107000000027 / ACTION 0 / REWARD 0 / Q_MAX 1.541258e+12
Tally is -11
TIMESTEP 3048 / EPSILON 0.9942088000000027 / ACTION 2 / REWARD 0 / Q_MAX 1.945871e+12
Tally is -11
TIMESTEP 3049 / EPSILON 0.9942069000000027 / ACTION 0 / REWARD 0 / Q_MAX 1.755622e+12
Tally is -11
TIMESTEP 3050 / EPSILON 0.9942050000000027 / ACTION 2 / REWARD 0 / Q_MAX 1.700740e+12
Tally is -11
TIMESTEP 3051 / EPSILON 0.9942031000000027 / ACTION 1 / REWARD 0 / Q_MAX 1.816965e+12
Tally is -11
TIMESTEP 3052 / EPSILON 0.9942012000000027 / ACTION 2 / REWARD 0 / Q_MAX 1.633075e+12
Tally is -11
TIMESTEP 3053 / EPSILON 0.9941993000000027 / ACTION 2 / REWARD 0 / Q_MAX 2.077096e+12
Tally is -11
TIMESTEP 3

Tally is -12
TIMESTEP 3163 / EPSILON 0.9939903000000028 / ACTION 2 / REWARD 0 / Q_MAX 1.631382e+11
Tally is -12
TIMESTEP 3164 / EPSILON 0.9939884000000028 / ACTION 1 / REWARD 0 / Q_MAX 2.841048e+12
Tally is -12
TIMESTEP 3165 / EPSILON 0.9939865000000028 / ACTION 2 / REWARD 0 / Q_MAX 2.799631e+12
Tally is -12
TIMESTEP 3166 / EPSILON 0.9939846000000028 / ACTION 2 / REWARD 0 / Q_MAX 4.093826e+12
Tally is -12
TIMESTEP 3167 / EPSILON 0.9939827000000028 / ACTION 0 / REWARD 0 / Q_MAX 2.508814e+12
Tally is -12
TIMESTEP 3168 / EPSILON 0.9939808000000028 / ACTION 1 / REWARD 0 / Q_MAX 1.076357e+12
Tally is -12
TIMESTEP 3169 / EPSILON 0.9939789000000028 / ACTION 0 / REWARD 0 / Q_MAX 1.363834e+11
Tally is -12
TIMESTEP 3170 / EPSILON 0.9939770000000028 / ACTION 2 / REWARD 0 / Q_MAX -9.883655e+11
Tally is -12
TIMESTEP 3171 / EPSILON 0.9939751000000028 / ACTION 1 / REWARD 0 / Q_MAX -4.241304e+11
Tally is -12
TIMESTEP 3172 / EPSILON 0.9939732000000028 / ACTION 2 / REWARD 0 / Q_MAX 6.736097e+11
Tally is

Tally is -12
TIMESTEP 3280 / EPSILON 0.9937680000000029 / ACTION 1 / REWARD 0 / Q_MAX 1.882745e+12
Tally is -12
TIMESTEP 3281 / EPSILON 0.9937661000000029 / ACTION 1 / REWARD 0 / Q_MAX 2.428302e+12
Tally is -12
TIMESTEP 3282 / EPSILON 0.9937642000000029 / ACTION 1 / REWARD 0 / Q_MAX 2.886053e+12
Tally is -12
TIMESTEP 3283 / EPSILON 0.9937623000000029 / ACTION 0 / REWARD 0 / Q_MAX 3.745454e+12
Tally is -12
TIMESTEP 3284 / EPSILON 0.9937604000000029 / ACTION 0 / REWARD 0 / Q_MAX 3.756334e+12
Tally is -12
TIMESTEP 3285 / EPSILON 0.9937585000000029 / ACTION 1 / REWARD 0 / Q_MAX 4.084044e+12
Tally is -12
TIMESTEP 3286 / EPSILON 0.9937566000000029 / ACTION 1 / REWARD 0 / Q_MAX 3.043862e+12
Tally is -12
TIMESTEP 3287 / EPSILON 0.9937547000000029 / ACTION 0 / REWARD 0 / Q_MAX 9.236051e+11
Tally is -12
TIMESTEP 3288 / EPSILON 0.9937528000000029 / ACTION 2 / REWARD 0 / Q_MAX 3.482201e+12
Tally is -12
TIMESTEP 3289 / EPSILON 0.9937509000000029 / ACTION 2 / REWARD 0 / Q_MAX 2.636522e+12
Tally is -

Tally is -13
TIMESTEP 3372 / EPSILON 0.993593200000003 / ACTION 2 / REWARD 0 / Q_MAX 3.468094e+12
Tally is -13
TIMESTEP 3373 / EPSILON 0.993591300000003 / ACTION 2 / REWARD 0 / Q_MAX 5.445322e+12
Tally is -13
TIMESTEP 3374 / EPSILON 0.993589400000003 / ACTION 2 / REWARD 0 / Q_MAX 4.392075e+12
Tally is -13
TIMESTEP 3375 / EPSILON 0.993587500000003 / ACTION 1 / REWARD 0 / Q_MAX 4.795119e+12
Tally is -13
TIMESTEP 3376 / EPSILON 0.993585600000003 / ACTION 0 / REWARD 0 / Q_MAX 4.542641e+12
Tally is -13
TIMESTEP 3377 / EPSILON 0.993583700000003 / ACTION 2 / REWARD 0 / Q_MAX 4.856096e+12
Tally is -13
TIMESTEP 3378 / EPSILON 0.993581800000003 / ACTION 1 / REWARD 0 / Q_MAX 4.330888e+12
Tally is -13
TIMESTEP 3379 / EPSILON 0.993579900000003 / ACTION 1 / REWARD 0 / Q_MAX 5.580391e+12
Tally is -13
TIMESTEP 3380 / EPSILON 0.993578000000003 / ACTION 2 / REWARD 0 / Q_MAX 6.565641e+12
Tally is -13
TIMESTEP 3381 / EPSILON 0.993576100000003 / ACTION 0 / REWARD 0 / Q_MAX 6.199902e+12
Tally is -13
TIMESTE

Tally is -13
TIMESTEP 3461 / EPSILON 0.993424100000003 / ACTION 2 / REWARD 0 / Q_MAX 3.145083e+12
Tally is -13
TIMESTEP 3462 / EPSILON 0.993422200000003 / ACTION 0 / REWARD 0 / Q_MAX 2.456897e+12
Tally is -13
TIMESTEP 3463 / EPSILON 0.993420300000003 / ACTION 0 / REWARD 0 / Q_MAX 1.614883e+12
Tally is -13
TIMESTEP 3464 / EPSILON 0.993418400000003 / ACTION 2 / REWARD 0 / Q_MAX 2.282484e+12
Tally is -13
TIMESTEP 3465 / EPSILON 0.993416500000003 / ACTION 1 / REWARD 0 / Q_MAX 1.996323e+12
Tally is -13
TIMESTEP 3466 / EPSILON 0.993414600000003 / ACTION 2 / REWARD 0 / Q_MAX 1.474038e+12
Tally is -13
TIMESTEP 3467 / EPSILON 0.993412700000003 / ACTION 1 / REWARD 0 / Q_MAX 1.610942e+12
Tally is -13
TIMESTEP 3468 / EPSILON 0.993410800000003 / ACTION 1 / REWARD 0 / Q_MAX 3.114319e+12
Tally is -13
TIMESTEP 3469 / EPSILON 0.993408900000003 / ACTION 2 / REWARD 0 / Q_MAX 3.018108e+12
Tally is -13
TIMESTEP 3470 / EPSILON 0.993407000000003 / ACTION 1 / REWARD 0 / Q_MAX 3.180467e+12
Tally is -13
TIMESTE

Tally is -13
TIMESTEP 3579 / EPSILON 0.9931999000000031 / ACTION 1 / REWARD 0 / Q_MAX 1.946269e+12
Tally is -13
TIMESTEP 3580 / EPSILON 0.9931980000000031 / ACTION 0 / REWARD 0 / Q_MAX 4.509203e+12
Tally is -13
TIMESTEP 3581 / EPSILON 0.9931961000000031 / ACTION 0 / REWARD 0 / Q_MAX 5.068699e+12
Tally is -13
TIMESTEP 3582 / EPSILON 0.9931942000000031 / ACTION 1 / REWARD 0 / Q_MAX 4.469490e+12
Tally is -13
TIMESTEP 3583 / EPSILON 0.9931923000000031 / ACTION 1 / REWARD 0 / Q_MAX 4.143529e+12
Tally is -13
TIMESTEP 3584 / EPSILON 0.9931904000000031 / ACTION 0 / REWARD 0 / Q_MAX 5.157864e+12
Tally is -13
TIMESTEP 3585 / EPSILON 0.9931885000000031 / ACTION 0 / REWARD 0 / Q_MAX 3.775601e+12
Tally is -13
TIMESTEP 3586 / EPSILON 0.9931866000000031 / ACTION 2 / REWARD 0 / Q_MAX 2.333650e+12
Tally is -13
TIMESTEP 3587 / EPSILON 0.9931847000000031 / ACTION 1 / REWARD 0 / Q_MAX 2.507499e+12
Tally is -13
TIMESTEP 3588 / EPSILON 0.9931828000000031 / ACTION 2 / REWARD 0 / Q_MAX 2.473456e+12
Tally is -

Tally is -14
TIMESTEP 3700 / EPSILON 0.9929700000000032 / ACTION 0 / REWARD 0 / Q_MAX 2.008862e+12
Tally is -14
TIMESTEP 3701 / EPSILON 0.9929681000000032 / ACTION 1 / REWARD 0 / Q_MAX 3.722757e+12
Tally is -14
TIMESTEP 3702 / EPSILON 0.9929662000000032 / ACTION 0 / REWARD 0 / Q_MAX 1.936286e+12
Tally is -14
TIMESTEP 3703 / EPSILON 0.9929643000000032 / ACTION 1 / REWARD 0 / Q_MAX 3.156659e+12
Tally is -14
TIMESTEP 3704 / EPSILON 0.9929624000000032 / ACTION 0 / REWARD 0 / Q_MAX 4.743230e+12
Tally is -14
TIMESTEP 3705 / EPSILON 0.9929605000000032 / ACTION 0 / REWARD 0 / Q_MAX 3.656123e+12
Tally is -14
TIMESTEP 3706 / EPSILON 0.9929586000000032 / ACTION 1 / REWARD 0 / Q_MAX 2.633512e+12
Tally is -14
TIMESTEP 3707 / EPSILON 0.9929567000000032 / ACTION 2 / REWARD 0 / Q_MAX 2.933531e+12
Tally is -14
TIMESTEP 3708 / EPSILON 0.9929548000000032 / ACTION 0 / REWARD 0 / Q_MAX 4.638591e+12
Tally is -14
TIMESTEP 3709 / EPSILON 0.9929529000000032 / ACTION 1 / REWARD 0 / Q_MAX 2.371930e+12
Tally is -

TIMESTEP 3810 / EPSILON 0.9927610000000033 / ACTION 1 / REWARD 0 / Q_MAX 2.820160e+12
Tally is -14
TIMESTEP 3811 / EPSILON 0.9927591000000033 / ACTION 1 / REWARD 0 / Q_MAX 3.332860e+12
Tally is -14
TIMESTEP 3812 / EPSILON 0.9927572000000033 / ACTION 2 / REWARD 0 / Q_MAX 1.613552e+12
Tally is -14
TIMESTEP 3813 / EPSILON 0.9927553000000033 / ACTION 1 / REWARD 0 / Q_MAX 1.271367e+12
Tally is -14
TIMESTEP 3814 / EPSILON 0.9927534000000033 / ACTION 0 / REWARD 0 / Q_MAX 3.259744e+12
Tally is -14
TIMESTEP 3815 / EPSILON 0.9927515000000033 / ACTION 1 / REWARD 0 / Q_MAX 2.558856e+12
Tally is -14
TIMESTEP 3816 / EPSILON 0.9927496000000033 / ACTION 2 / REWARD 0 / Q_MAX 3.396093e+12
Tally is -14
TIMESTEP 3817 / EPSILON 0.9927477000000033 / ACTION 2 / REWARD 0 / Q_MAX 2.109720e+12
Tally is -14
TIMESTEP 3818 / EPSILON 0.9927458000000033 / ACTION 0 / REWARD 0 / Q_MAX 6.770366e+11
Tally is -14
TIMESTEP 3819 / EPSILON 0.9927439000000033 / ACTION 2 / REWARD 0 / Q_MAX 2.101549e+12
Tally is -14
TIMESTEP 3

TIMESTEP 3911 / EPSILON 0.9925691000000034 / ACTION 2 / REWARD 0 / Q_MAX 3.869407e+12
Tally is -15
TIMESTEP 3912 / EPSILON 0.9925672000000034 / ACTION 2 / REWARD 0 / Q_MAX 6.021934e+12
Tally is -15
TIMESTEP 3913 / EPSILON 0.9925653000000034 / ACTION 2 / REWARD 0 / Q_MAX 5.726825e+12
Tally is -15
TIMESTEP 3914 / EPSILON 0.9925634000000034 / ACTION 0 / REWARD 0 / Q_MAX 5.291156e+12
Tally is -15
TIMESTEP 3915 / EPSILON 0.9925615000000034 / ACTION 2 / REWARD 0 / Q_MAX 3.394270e+12
Tally is -15
TIMESTEP 3916 / EPSILON 0.9925596000000034 / ACTION 1 / REWARD 0 / Q_MAX 2.389002e+12
Tally is -15
TIMESTEP 3917 / EPSILON 0.9925577000000034 / ACTION 1 / REWARD 0 / Q_MAX 2.434130e+12
Tally is -15
TIMESTEP 3918 / EPSILON 0.9925558000000034 / ACTION 0 / REWARD 0 / Q_MAX 4.453590e+12
Tally is -15
TIMESTEP 3919 / EPSILON 0.9925539000000034 / ACTION 1 / REWARD 0 / Q_MAX 1.624252e+12
Tally is -15
TIMESTEP 3920 / EPSILON 0.9925520000000034 / ACTION 1 / REWARD 0 / Q_MAX 1.466238e+12
Tally is -15
TIMESTEP 3

Tally is -15
TIMESTEP 4023 / EPSILON 0.9923563000000035 / ACTION 2 / REWARD 0 / Q_MAX 8.158379e+11
Tally is -15
TIMESTEP 4024 / EPSILON 0.9923544000000035 / ACTION 0 / REWARD 0 / Q_MAX 1.122123e+12
Tally is -15
TIMESTEP 4025 / EPSILON 0.9923525000000035 / ACTION 1 / REWARD 0 / Q_MAX 1.427674e+12
Tally is -15
TIMESTEP 4026 / EPSILON 0.9923506000000035 / ACTION 0 / REWARD 0 / Q_MAX 2.229453e+12
Tally is -15
TIMESTEP 4027 / EPSILON 0.9923487000000035 / ACTION 1 / REWARD 0 / Q_MAX 2.326245e+12
Tally is -15
TIMESTEP 4028 / EPSILON 0.9923468000000035 / ACTION 0 / REWARD 0 / Q_MAX 2.586371e+12
Tally is -15
TIMESTEP 4029 / EPSILON 0.9923449000000035 / ACTION 0 / REWARD 0 / Q_MAX 1.817893e+12
Tally is -15
TIMESTEP 4030 / EPSILON 0.9923430000000035 / ACTION 1 / REWARD 0 / Q_MAX 2.403604e+12
Tally is -15
TIMESTEP 4031 / EPSILON 0.9923411000000035 / ACTION 0 / REWARD 0 / Q_MAX 2.713124e+12
Tally is -15
TIMESTEP 4032 / EPSILON 0.9923392000000035 / ACTION 0 / REWARD 0 / Q_MAX 2.499056e+12
Tally is -

TIMESTEP 4118 / EPSILON 0.9921758000000036 / ACTION 1 / REWARD 0 / Q_MAX 7.892538e+11
Tally is -15
TIMESTEP 4119 / EPSILON 0.9921739000000036 / ACTION 0 / REWARD 0 / Q_MAX 1.072690e+12
Tally is -15
TIMESTEP 4120 / EPSILON 0.9921720000000036 / ACTION 2 / REWARD 0 / Q_MAX -3.766285e+11
Tally is -15
TIMESTEP 4121 / EPSILON 0.9921701000000036 / ACTION 0 / REWARD 0 / Q_MAX -9.368782e+10
Tally is -15
TIMESTEP 4122 / EPSILON 0.9921682000000036 / ACTION 0 / REWARD 0 / Q_MAX 6.649327e+11
Tally is -15
TIMESTEP 4123 / EPSILON 0.9921663000000036 / ACTION 2 / REWARD 0 / Q_MAX 1.639640e+12
Tally is -15
TIMESTEP 4124 / EPSILON 0.9921644000000036 / ACTION 0 / REWARD 0 / Q_MAX 6.088069e+11
Tally is -15
TIMESTEP 4125 / EPSILON 0.9921625000000036 / ACTION 1 / REWARD 0 / Q_MAX 1.060191e+12
Tally is -15
TIMESTEP 4126 / EPSILON 0.9921606000000036 / ACTION 2 / REWARD 0 / Q_MAX 2.489009e+12
Tally is -15
TIMESTEP 4127 / EPSILON 0.9921587000000036 / ACTION 1 / REWARD 0 / Q_MAX 6.886244e+11
Tally is -15
TIMESTEP

TIMESTEP 4223 / EPSILON 0.9919763000000037 / ACTION 1 / REWARD 0 / Q_MAX 1.064412e+12
Tally is -15
TIMESTEP 4224 / EPSILON 0.9919744000000037 / ACTION 1 / REWARD 0 / Q_MAX 1.254005e+12
Tally is -15
TIMESTEP 4225 / EPSILON 0.9919725000000037 / ACTION 2 / REWARD 0 / Q_MAX 1.446612e+12
Tally is -15
TIMESTEP 4226 / EPSILON 0.9919706000000037 / ACTION 2 / REWARD 0 / Q_MAX 1.486202e+12
Tally is -15
TIMESTEP 4227 / EPSILON 0.9919687000000037 / ACTION 2 / REWARD 0 / Q_MAX 1.872746e+12
Tally is -15
TIMESTEP 4228 / EPSILON 0.9919668000000037 / ACTION 1 / REWARD 0 / Q_MAX 1.550979e+12
Tally is -15
TIMESTEP 4229 / EPSILON 0.9919649000000037 / ACTION 0 / REWARD 0 / Q_MAX 1.550545e+12
Tally is -15
TIMESTEP 4230 / EPSILON 0.9919630000000037 / ACTION 1 / REWARD 0 / Q_MAX 2.154015e+12
Tally is -15
TIMESTEP 4231 / EPSILON 0.9919611000000037 / ACTION 2 / REWARD 0 / Q_MAX 1.439866e+12
Tally is -15
TIMESTEP 4232 / EPSILON 0.9919592000000037 / ACTION 1 / REWARD 0 / Q_MAX 1.778972e+12
Tally is -15
TIMESTEP 4

TIMESTEP 4333 / EPSILON 0.9917673000000038 / ACTION 1 / REWARD 0 / Q_MAX -1.185602e+12
Tally is -15
TIMESTEP 4334 / EPSILON 0.9917654000000038 / ACTION 0 / REWARD 0 / Q_MAX -2.838118e+11
Tally is -15
TIMESTEP 4335 / EPSILON 0.9917635000000038 / ACTION 0 / REWARD 0 / Q_MAX -1.019802e+12
Tally is -15
TIMESTEP 4336 / EPSILON 0.9917616000000038 / ACTION 2 / REWARD 0 / Q_MAX 3.675407e+11
Tally is -15
TIMESTEP 4337 / EPSILON 0.9917597000000038 / ACTION 1 / REWARD 0 / Q_MAX 2.329926e+11
Tally is -15
TIMESTEP 4338 / EPSILON 0.9917578000000038 / ACTION 0 / REWARD 0 / Q_MAX -3.635294e+11
Tally is -15
TIMESTEP 4339 / EPSILON 0.9917559000000038 / ACTION 2 / REWARD 0 / Q_MAX 7.905025e+11
Tally is -15
TIMESTEP 4340 / EPSILON 0.9917540000000038 / ACTION 0 / REWARD 0 / Q_MAX -5.908067e+10
Tally is -15
TIMESTEP 4341 / EPSILON 0.9917521000000038 / ACTION 2 / REWARD 0 / Q_MAX -6.259681e+11
Tally is -15
TIMESTEP 4342 / EPSILON 0.9917502000000038 / ACTION 2 / REWARD 0 / Q_MAX -1.845121e+12
Tally is -15
TIM

Tally is -15
TIMESTEP 4449 / EPSILON 0.9915469000000039 / ACTION 2 / REWARD 0 / Q_MAX 1.620567e+12
Tally is -15
TIMESTEP 4450 / EPSILON 0.9915450000000039 / ACTION 2 / REWARD 0 / Q_MAX 8.854529e+11
Tally is -15
TIMESTEP 4451 / EPSILON 0.9915431000000039 / ACTION 2 / REWARD 0 / Q_MAX 1.219188e+12
Tally is -15
TIMESTEP 4452 / EPSILON 0.9915412000000039 / ACTION 2 / REWARD 0 / Q_MAX 9.165151e+11
Tally is -15
TIMESTEP 4453 / EPSILON 0.9915393000000039 / ACTION 1 / REWARD 0 / Q_MAX 2.567216e+12
Tally is -15
TIMESTEP 4454 / EPSILON 0.9915374000000039 / ACTION 2 / REWARD 0 / Q_MAX 2.600414e+12
Tally is -15
TIMESTEP 4455 / EPSILON 0.9915355000000039 / ACTION 2 / REWARD 0 / Q_MAX 7.752504e+11
Tally is -15
TIMESTEP 4456 / EPSILON 0.9915336000000039 / ACTION 1 / REWARD 0 / Q_MAX 1.889069e+12
Tally is -15
TIMESTEP 4457 / EPSILON 0.9915317000000039 / ACTION 0 / REWARD 0 / Q_MAX 5.263852e+10
Tally is -15
TIMESTEP 4458 / EPSILON 0.9915298000000039 / ACTION 1 / REWARD 0 / Q_MAX 4.567811e+11
Tally is -

TIMESTEP 4541 / EPSILON 0.991372100000004 / ACTION 2 / REWARD 0 / Q_MAX 7.797096e+11
Tally is -15
TIMESTEP 4542 / EPSILON 0.991370200000004 / ACTION 0 / REWARD 0 / Q_MAX 2.085977e+11
Tally is -15
TIMESTEP 4543 / EPSILON 0.991368300000004 / ACTION 0 / REWARD 0 / Q_MAX 1.185722e+12
Tally is -15
TIMESTEP 4544 / EPSILON 0.991366400000004 / ACTION 2 / REWARD 0 / Q_MAX 1.805332e+12
Tally is -15
TIMESTEP 4545 / EPSILON 0.991364500000004 / ACTION 2 / REWARD 0 / Q_MAX 1.655149e+12
Tally is -15
TIMESTEP 4546 / EPSILON 0.991362600000004 / ACTION 2 / REWARD 0 / Q_MAX 5.429312e+11
Tally is -15
TIMESTEP 4547 / EPSILON 0.991360700000004 / ACTION 0 / REWARD 0 / Q_MAX 2.134326e+12
Tally is -15
TIMESTEP 4548 / EPSILON 0.991358800000004 / ACTION 1 / REWARD 0 / Q_MAX 2.261238e+12
Tally is -15
TIMESTEP 4549 / EPSILON 0.991356900000004 / ACTION 0 / REWARD 0 / Q_MAX 2.360605e+12
Tally is -15
TIMESTEP 4550 / EPSILON 0.991355000000004 / ACTION 0 / REWARD 0 / Q_MAX 2.740916e+12
Tally is -15
TIMESTEP 4551 / EPSI

TIMESTEP 4656 / EPSILON 0.9911536000000041 / ACTION 1 / REWARD 0 / Q_MAX 3.857184e+11
Tally is -16
TIMESTEP 4657 / EPSILON 0.9911517000000041 / ACTION 0 / REWARD 0 / Q_MAX -3.195219e+11
Tally is -16
TIMESTEP 4658 / EPSILON 0.9911498000000041 / ACTION 2 / REWARD 0 / Q_MAX -7.181683e+11
Tally is -16
TIMESTEP 4659 / EPSILON 0.9911479000000041 / ACTION 1 / REWARD 0 / Q_MAX 9.446221e+11
Tally is -16
TIMESTEP 4660 / EPSILON 0.9911460000000041 / ACTION 0 / REWARD 0 / Q_MAX -7.269663e+11
Tally is -16
TIMESTEP 4661 / EPSILON 0.9911441000000041 / ACTION 0 / REWARD 0 / Q_MAX 8.524484e+11
Tally is -16
TIMESTEP 4662 / EPSILON 0.9911422000000041 / ACTION 1 / REWARD 0 / Q_MAX 1.353253e+12
Tally is -16
TIMESTEP 4663 / EPSILON 0.9911403000000041 / ACTION 0 / REWARD 0 / Q_MAX 2.131921e+12
Tally is -16
TIMESTEP 4664 / EPSILON 0.9911384000000041 / ACTION 2 / REWARD 0 / Q_MAX 1.154660e+12
Tally is -16
TIMESTEP 4665 / EPSILON 0.9911365000000041 / ACTION 2 / REWARD 0 / Q_MAX 1.251144e+12
Tally is -16
TIMESTE

Tally is -16
TIMESTEP 4775 / EPSILON 0.9909275000000042 / ACTION 2 / REWARD 0 / Q_MAX 5.290376e+11
Tally is -16
TIMESTEP 4776 / EPSILON 0.9909256000000042 / ACTION 0 / REWARD 0 / Q_MAX 7.336627e+10
Tally is -16
TIMESTEP 4777 / EPSILON 0.9909237000000042 / ACTION 0 / REWARD 0 / Q_MAX 2.145359e+12
Tally is -16
TIMESTEP 4778 / EPSILON 0.9909218000000042 / ACTION 2 / REWARD 0 / Q_MAX 2.044096e+12
Tally is -16
TIMESTEP 4779 / EPSILON 0.9909199000000042 / ACTION 2 / REWARD 0 / Q_MAX 1.304068e+12
Tally is -16
TIMESTEP 4780 / EPSILON 0.9909180000000042 / ACTION 0 / REWARD 0 / Q_MAX 2.857036e+12
Tally is -16
TIMESTEP 4781 / EPSILON 0.9909161000000042 / ACTION 2 / REWARD 0 / Q_MAX 2.573801e+12
Tally is -16
TIMESTEP 4782 / EPSILON 0.9909142000000042 / ACTION 1 / REWARD 0 / Q_MAX 1.840385e+12
Tally is -16
TIMESTEP 4783 / EPSILON 0.9909123000000042 / ACTION 1 / REWARD 0 / Q_MAX 2.357383e+12
Tally is -16
TIMESTEP 4784 / EPSILON 0.9909104000000042 / ACTION 1 / REWARD 0 / Q_MAX 3.429545e+12
Tally is -

Tally is -17
TIMESTEP 4889 / EPSILON 0.9907109000000043 / ACTION 2 / REWARD 0 / Q_MAX -1.182541e+12
Tally is -17
TIMESTEP 4890 / EPSILON 0.9907090000000043 / ACTION 2 / REWARD 0 / Q_MAX 3.955766e+11
Tally is -17
TIMESTEP 4891 / EPSILON 0.9907071000000043 / ACTION 2 / REWARD 0 / Q_MAX -2.106839e+12
Tally is -17
TIMESTEP 4892 / EPSILON 0.9907052000000043 / ACTION 2 / REWARD 0 / Q_MAX -1.756888e+12
Tally is -17
TIMESTEP 4893 / EPSILON 0.9907033000000043 / ACTION 2 / REWARD 0 / Q_MAX -8.417805e+11
Tally is -17
TIMESTEP 4894 / EPSILON 0.9907014000000043 / ACTION 1 / REWARD 0 / Q_MAX 5.514862e+11
Tally is -17
TIMESTEP 4895 / EPSILON 0.9906995000000043 / ACTION 0 / REWARD 0 / Q_MAX 9.200244e+11
Tally is -17
TIMESTEP 4896 / EPSILON 0.9906976000000043 / ACTION 1 / REWARD 0 / Q_MAX 2.687614e+12
Tally is -17
TIMESTEP 4897 / EPSILON 0.9906957000000043 / ACTION 0 / REWARD 0 / Q_MAX 4.677442e+11
Tally is -17
TIMESTEP 4898 / EPSILON 0.9906938000000043 / ACTION 2 / REWARD 0 / Q_MAX 2.094149e+11
Tally 

TIMESTEP 4995 / EPSILON 0.9905095000000044 / ACTION 0 / REWARD 0 / Q_MAX 5.773270e+11
Tally is -17
TIMESTEP 4996 / EPSILON 0.9905076000000044 / ACTION 0 / REWARD 0 / Q_MAX 9.468920e+11
Tally is -17
TIMESTEP 4997 / EPSILON 0.9905057000000044 / ACTION 1 / REWARD 0 / Q_MAX 6.415440e+11
Tally is -17
TIMESTEP 4998 / EPSILON 0.9905038000000044 / ACTION 0 / REWARD 0 / Q_MAX 1.139673e+12
Tally is -17
TIMESTEP 4999 / EPSILON 0.9905019000000044 / ACTION 0 / REWARD 0 / Q_MAX 7.346434e+11
Tally is -17
TIMESTEP 5000 / EPSILON 0.9905000000000044 / ACTION 1 / REWARD 0 / Q_MAX 6.887007e+11
Tally is -17
TIMESTEP 5001 / EPSILON 0.9904981000000044 / ACTION 1 / REWARD 0 / Q_MAX 5.197069e+11
Tally is -17
TIMESTEP 5002 / EPSILON 0.9904962000000044 / ACTION 2 / REWARD 0 / Q_MAX 3.691300e+11
Tally is -17
TIMESTEP 5003 / EPSILON 0.9904943000000044 / ACTION 0 / REWARD 0 / Q_MAX 2.962280e+11
Tally is -17
TIMESTEP 5004 / EPSILON 0.9904924000000044 / ACTION 1 / REWARD 0 / Q_MAX 1.689229e+11
Tally is -17
TIMESTEP 5

Tally is -17
TIMESTEP 5107 / EPSILON 0.9902967000000045 / ACTION 2 / REWARD 0 / Q_MAX -8.320089e+11
Tally is -17
TIMESTEP 5108 / EPSILON 0.9902948000000045 / ACTION 0 / REWARD 0 / Q_MAX 1.135685e+12
Tally is -17
TIMESTEP 5109 / EPSILON 0.9902929000000045 / ACTION 2 / REWARD 0 / Q_MAX 2.441467e+12
Tally is -17
TIMESTEP 5110 / EPSILON 0.9902910000000045 / ACTION 0 / REWARD 0 / Q_MAX 1.420827e+12
Tally is -17
TIMESTEP 5111 / EPSILON 0.9902891000000045 / ACTION 2 / REWARD 0 / Q_MAX 2.675198e+11
Tally is -17
TIMESTEP 5112 / EPSILON 0.9902872000000045 / ACTION 1 / REWARD 0 / Q_MAX 1.220872e+12
Tally is -17
TIMESTEP 5113 / EPSILON 0.9902853000000045 / ACTION 0 / REWARD 0 / Q_MAX -4.214694e+11
Tally is -17
TIMESTEP 5114 / EPSILON 0.9902834000000045 / ACTION 0 / REWARD 0 / Q_MAX 5.670251e+11
Tally is -17
TIMESTEP 5115 / EPSILON 0.9902815000000045 / ACTION 1 / REWARD 0 / Q_MAX 9.330763e+11
Tally is -17
TIMESTEP 5116 / EPSILON 0.9902796000000045 / ACTION 1 / REWARD 0 / Q_MAX 3.959061e+11
Tally is

Tally is -17
TIMESTEP 5206 / EPSILON 0.9901086000000046 / ACTION 0 / REWARD 0 / Q_MAX 6.045697e+11
Tally is -17
TIMESTEP 5207 / EPSILON 0.9901067000000046 / ACTION 0 / REWARD 0 / Q_MAX 5.480283e+11
Tally is -17
TIMESTEP 5208 / EPSILON 0.9901048000000046 / ACTION 0 / REWARD 0 / Q_MAX 7.952232e+11
Tally is -17
TIMESTEP 5209 / EPSILON 0.9901029000000046 / ACTION 0 / REWARD 0 / Q_MAX 7.692252e+11
Tally is -17
TIMESTEP 5210 / EPSILON 0.9901010000000046 / ACTION 0 / REWARD 0 / Q_MAX 1.281972e+12
Tally is -17
TIMESTEP 5211 / EPSILON 0.9900991000000046 / ACTION 0 / REWARD 0 / Q_MAX 7.807904e+11
Tally is -17
TIMESTEP 5212 / EPSILON 0.9900972000000046 / ACTION 0 / REWARD 0 / Q_MAX 5.471901e+11
Tally is -17
TIMESTEP 5213 / EPSILON 0.9900953000000046 / ACTION 2 / REWARD 0 / Q_MAX 1.202812e+12
Tally is -17
TIMESTEP 5214 / EPSILON 0.9900934000000046 / ACTION 0 / REWARD 0 / Q_MAX 1.901783e+12
Tally is -17
TIMESTEP 5215 / EPSILON 0.9900915000000046 / ACTION 0 / REWARD 0 / Q_MAX 1.538396e+12
Tally is -

Tally is -17
TIMESTEP 5309 / EPSILON 0.9899129000000046 / ACTION 0 / REWARD 0 / Q_MAX 4.286513e+12
Tally is -17
TIMESTEP 5310 / EPSILON 0.9899110000000046 / ACTION 1 / REWARD 0 / Q_MAX 4.663134e+12
Tally is -17
TIMESTEP 5311 / EPSILON 0.9899091000000046 / ACTION 1 / REWARD 0 / Q_MAX 2.110073e+12
Tally is -17
TIMESTEP 5312 / EPSILON 0.9899072000000047 / ACTION 0 / REWARD 0 / Q_MAX 6.763885e+11
Tally is -17
TIMESTEP 5313 / EPSILON 0.9899053000000047 / ACTION 0 / REWARD 0 / Q_MAX 1.137467e+12
Tally is -17
TIMESTEP 5314 / EPSILON 0.9899034000000047 / ACTION 1 / REWARD 0 / Q_MAX 2.130766e+12
Tally is -17
TIMESTEP 5315 / EPSILON 0.9899015000000047 / ACTION 0 / REWARD 0 / Q_MAX 1.248490e+12
Tally is -17
TIMESTEP 5316 / EPSILON 0.9898996000000047 / ACTION 0 / REWARD 0 / Q_MAX 3.210213e+12
Tally is -17
TIMESTEP 5317 / EPSILON 0.9898977000000047 / ACTION 2 / REWARD 0 / Q_MAX 3.109675e+12
Tally is -17
TIMESTEP 5318 / EPSILON 0.9898958000000047 / ACTION 1 / REWARD 0 / Q_MAX 2.217754e+12
Tally is -

TIMESTEP 5424 / EPSILON 0.9896944000000047 / ACTION 1 / REWARD 0 / Q_MAX 1.721453e+12
Tally is -18
TIMESTEP 5425 / EPSILON 0.9896925000000047 / ACTION 2 / REWARD 0 / Q_MAX 1.933396e+12
Tally is -18
TIMESTEP 5426 / EPSILON 0.9896906000000047 / ACTION 2 / REWARD 0 / Q_MAX 1.521372e+12
Tally is -18
TIMESTEP 5427 / EPSILON 0.9896887000000048 / ACTION 1 / REWARD 0 / Q_MAX 2.086291e+12
Tally is -18
TIMESTEP 5428 / EPSILON 0.9896868000000048 / ACTION 0 / REWARD 0 / Q_MAX 1.070739e+12
Tally is -18
TIMESTEP 5429 / EPSILON 0.9896849000000048 / ACTION 1 / REWARD 0 / Q_MAX 5.702730e+11
Tally is -18
TIMESTEP 5430 / EPSILON 0.9896830000000048 / ACTION 0 / REWARD 0 / Q_MAX 9.956763e+11
Tally is -18
TIMESTEP 5431 / EPSILON 0.9896811000000048 / ACTION 0 / REWARD 0 / Q_MAX 1.318551e+12
Tally is -18
TIMESTEP 5432 / EPSILON 0.9896792000000048 / ACTION 2 / REWARD 0 / Q_MAX 6.631840e+11
Tally is -18
TIMESTEP 5433 / EPSILON 0.9896773000000048 / ACTION 1 / REWARD 0 / Q_MAX 3.581605e+11
Tally is -18
TIMESTEP 5

TIMESTEP 5532 / EPSILON 0.9894892000000048 / ACTION 2 / REWARD 0 / Q_MAX 3.455840e+12
Tally is -18
TIMESTEP 5533 / EPSILON 0.9894873000000048 / ACTION 2 / REWARD 0 / Q_MAX 4.898797e+12
Tally is -18
TIMESTEP 5534 / EPSILON 0.9894854000000048 / ACTION 0 / REWARD 0 / Q_MAX 4.737806e+12
Tally is -18
TIMESTEP 5535 / EPSILON 0.9894835000000048 / ACTION 1 / REWARD 0 / Q_MAX 3.227987e+12
Tally is -18
TIMESTEP 5536 / EPSILON 0.9894816000000048 / ACTION 0 / REWARD 0 / Q_MAX 3.331376e+12
Tally is -18
TIMESTEP 5537 / EPSILON 0.9894797000000048 / ACTION 2 / REWARD 0 / Q_MAX 3.920367e+12
Tally is -18
TIMESTEP 5538 / EPSILON 0.9894778000000048 / ACTION 1 / REWARD 0 / Q_MAX 2.594372e+12
Tally is -18
TIMESTEP 5539 / EPSILON 0.9894759000000048 / ACTION 2 / REWARD 0 / Q_MAX 2.613556e+12
Tally is -18
TIMESTEP 5540 / EPSILON 0.9894740000000048 / ACTION 1 / REWARD 0 / Q_MAX 3.780715e+12
Tally is -18
TIMESTEP 5541 / EPSILON 0.9894721000000049 / ACTION 1 / REWARD 0 / Q_MAX 4.087485e+12
Tally is -18
TIMESTEP 5

Tally is -19
TIMESTEP 5624 / EPSILON 0.9893144000000049 / ACTION 2 / REWARD 0 / Q_MAX 3.833378e+10
Tally is -19
TIMESTEP 5625 / EPSILON 0.9893125000000049 / ACTION 1 / REWARD 0 / Q_MAX -7.850637e+11
Tally is -19
TIMESTEP 5626 / EPSILON 0.9893106000000049 / ACTION 2 / REWARD 0 / Q_MAX -7.841298e+11
Tally is -19
TIMESTEP 5627 / EPSILON 0.9893087000000049 / ACTION 0 / REWARD 0 / Q_MAX 8.125055e+09
Tally is -19
TIMESTEP 5628 / EPSILON 0.9893068000000049 / ACTION 1 / REWARD 0 / Q_MAX -1.370689e+12
Tally is -19
TIMESTEP 5629 / EPSILON 0.9893049000000049 / ACTION 0 / REWARD 0 / Q_MAX 1.076375e+12
Tally is -19
TIMESTEP 5630 / EPSILON 0.9893030000000049 / ACTION 2 / REWARD 0 / Q_MAX 2.128081e+11
Tally is -19
TIMESTEP 5631 / EPSILON 0.9893011000000049 / ACTION 0 / REWARD 0 / Q_MAX 1.086056e+12
Tally is -19
TIMESTEP 5632 / EPSILON 0.9892992000000049 / ACTION 2 / REWARD 0 / Q_MAX 1.545786e+12
Tally is -19
TIMESTEP 5633 / EPSILON 0.9892973000000049 / ACTION 0 / REWARD 0 / Q_MAX 7.882475e+11
Tally i

Tally is -19
TIMESTEP 5732 / EPSILON 0.989109200000005 / ACTION 1 / REWARD 0 / Q_MAX 2.706892e+12
Tally is -19
TIMESTEP 5733 / EPSILON 0.989107300000005 / ACTION 2 / REWARD 0 / Q_MAX 3.122051e+12
Tally is -19
TIMESTEP 5734 / EPSILON 0.989105400000005 / ACTION 2 / REWARD 0 / Q_MAX 3.330383e+12
Tally is -19
TIMESTEP 5735 / EPSILON 0.989103500000005 / ACTION 1 / REWARD 0 / Q_MAX 2.918912e+12
Tally is -19
TIMESTEP 5736 / EPSILON 0.989101600000005 / ACTION 1 / REWARD 0 / Q_MAX 3.623350e+12
Tally is -19
TIMESTEP 5737 / EPSILON 0.989099700000005 / ACTION 2 / REWARD 0 / Q_MAX 3.834970e+12
Tally is -19
TIMESTEP 5738 / EPSILON 0.989097800000005 / ACTION 2 / REWARD 0 / Q_MAX 4.102027e+12
Tally is -19
TIMESTEP 5739 / EPSILON 0.989095900000005 / ACTION 0 / REWARD 0 / Q_MAX 2.497889e+12
Tally is -19
TIMESTEP 5740 / EPSILON 0.989094000000005 / ACTION 0 / REWARD 0 / Q_MAX 2.138034e+12
Tally is -19
TIMESTEP 5741 / EPSILON 0.989092100000005 / ACTION 1 / REWARD 0 / Q_MAX 3.807664e+12
Tally is -19
TIMESTE

Tally is -19
TIMESTEP 5842 / EPSILON 0.9889002000000051 / ACTION 0 / REWARD 0 / Q_MAX 5.212622e+12
Tally is -19
TIMESTEP 5843 / EPSILON 0.9888983000000051 / ACTION 0 / REWARD 0 / Q_MAX 3.819975e+12
Tally is -19
TIMESTEP 5844 / EPSILON 0.9888964000000051 / ACTION 2 / REWARD 0 / Q_MAX 3.693190e+12
Tally is -19
TIMESTEP 5845 / EPSILON 0.9888945000000051 / ACTION 1 / REWARD 0 / Q_MAX 4.077127e+12
Tally is -19
TIMESTEP 5846 / EPSILON 0.9888926000000051 / ACTION 2 / REWARD 0 / Q_MAX 2.797780e+12
Tally is -19
TIMESTEP 5847 / EPSILON 0.9888907000000051 / ACTION 0 / REWARD 0 / Q_MAX 2.553197e+12
Tally is -19
TIMESTEP 5848 / EPSILON 0.9888888000000051 / ACTION 1 / REWARD 0 / Q_MAX 4.101818e+12
Tally is -19
TIMESTEP 5849 / EPSILON 0.9888869000000051 / ACTION 0 / REWARD 0 / Q_MAX 4.347669e+12
Tally is -19
TIMESTEP 5850 / EPSILON 0.9888850000000051 / ACTION 2 / REWARD 0 / Q_MAX 3.880830e+12
Tally is -19
TIMESTEP 5851 / EPSILON 0.9888831000000051 / ACTION 0 / REWARD 0 / Q_MAX 3.977568e+12
Tally is -

TIMESTEP 5960 / EPSILON 0.9886760000000052 / ACTION 1 / REWARD 0 / Q_MAX 1.254144e+12
Tally is -19
TIMESTEP 5961 / EPSILON 0.9886741000000052 / ACTION 2 / REWARD 0 / Q_MAX 1.478720e+12
Tally is -19
TIMESTEP 5962 / EPSILON 0.9886722000000052 / ACTION 1 / REWARD 0 / Q_MAX 1.947677e+12
Tally is -19
TIMESTEP 5963 / EPSILON 0.9886703000000052 / ACTION 1 / REWARD 0 / Q_MAX 2.079713e+12
Tally is -19
TIMESTEP 5964 / EPSILON 0.9886684000000052 / ACTION 2 / REWARD 0 / Q_MAX 1.461038e+12
Tally is -19
TIMESTEP 5965 / EPSILON 0.9886665000000052 / ACTION 0 / REWARD 0 / Q_MAX 2.251203e+12
Tally is -19
TIMESTEP 5966 / EPSILON 0.9886646000000052 / ACTION 0 / REWARD 0 / Q_MAX 1.338908e+12
Tally is -19
TIMESTEP 5967 / EPSILON 0.9886627000000052 / ACTION 2 / REWARD 0 / Q_MAX 9.101651e+11
Tally is -19
TIMESTEP 5968 / EPSILON 0.9886608000000052 / ACTION 0 / REWARD 0 / Q_MAX 8.815109e+11
Tally is -19
TIMESTEP 5969 / EPSILON 0.9886589000000052 / ACTION 1 / REWARD 0 / Q_MAX 1.354125e+12
Tally is -19
TIMESTEP 5

TIMESTEP 6076 / EPSILON 0.9884556000000053 / ACTION 0 / REWARD 0 / Q_MAX 3.860699e+12
Tally is -19
TIMESTEP 6077 / EPSILON 0.9884537000000053 / ACTION 2 / REWARD 0 / Q_MAX 2.148660e+12
Tally is -19
TIMESTEP 6078 / EPSILON 0.9884518000000053 / ACTION 2 / REWARD 0 / Q_MAX 2.840635e+12
Tally is -19
TIMESTEP 6079 / EPSILON 0.9884499000000053 / ACTION 0 / REWARD 0 / Q_MAX 1.787199e+11
Tally is -19
TIMESTEP 6080 / EPSILON 0.9884480000000053 / ACTION 2 / REWARD 0 / Q_MAX 1.041671e+12
Tally is -19
TIMESTEP 6081 / EPSILON 0.9884461000000053 / ACTION 0 / REWARD 0 / Q_MAX 3.447573e+12
Tally is -19
TIMESTEP 6082 / EPSILON 0.9884442000000053 / ACTION 1 / REWARD 0 / Q_MAX 3.999907e+12
Tally is -19
TIMESTEP 6083 / EPSILON 0.9884423000000053 / ACTION 0 / REWARD 0 / Q_MAX 3.175429e+12
Tally is -19
TIMESTEP 6084 / EPSILON 0.9884404000000053 / ACTION 2 / REWARD 0 / Q_MAX 2.446046e+12
Tally is -19
TIMESTEP 6085 / EPSILON 0.9884385000000053 / ACTION 1 / REWARD 0 / Q_MAX 2.987429e+12
Tally is -19
TIMESTEP 6

TIMESTEP 6194 / EPSILON 0.9882314000000054 / ACTION 1 / REWARD 0 / Q_MAX -2.772098e+11
Tally is -19
TIMESTEP 6195 / EPSILON 0.9882295000000054 / ACTION 1 / REWARD 0 / Q_MAX 1.033190e+11
Tally is -19
TIMESTEP 6196 / EPSILON 0.9882276000000054 / ACTION 1 / REWARD 0 / Q_MAX -2.307174e+11
Tally is -19
TIMESTEP 6197 / EPSILON 0.9882257000000054 / ACTION 2 / REWARD 0 / Q_MAX 8.747771e+11
Tally is -19
TIMESTEP 6198 / EPSILON 0.9882238000000054 / ACTION 1 / REWARD 0 / Q_MAX 2.641183e+11
Tally is -19
TIMESTEP 6199 / EPSILON 0.9882219000000054 / ACTION 0 / REWARD 0 / Q_MAX 8.638552e+10
Tally is -19
TIMESTEP 6200 / EPSILON 0.9882200000000054 / ACTION 0 / REWARD 0 / Q_MAX 3.869147e+11
Tally is -19
TIMESTEP 6201 / EPSILON 0.9882181000000054 / ACTION 0 / REWARD 0 / Q_MAX 1.417500e+11
Tally is -19
TIMESTEP 6202 / EPSILON 0.9882162000000054 / ACTION 0 / REWARD 0 / Q_MAX 1.886863e+11
Tally is -19
TIMESTEP 6203 / EPSILON 0.9882143000000054 / ACTION 2 / REWARD 0 / Q_MAX 7.306125e+11
Tally is -19
TIMESTEP

Tally is -19
TIMESTEP 6311 / EPSILON 0.9880091000000055 / ACTION 1 / REWARD 0 / Q_MAX 1.503076e+12
Tally is -19
TIMESTEP 6312 / EPSILON 0.9880072000000055 / ACTION 1 / REWARD 0 / Q_MAX 1.908439e+12
Tally is -19
TIMESTEP 6313 / EPSILON 0.9880053000000055 / ACTION 2 / REWARD 0 / Q_MAX 3.315970e+12
Tally is -19
TIMESTEP 6314 / EPSILON 0.9880034000000055 / ACTION 1 / REWARD 0 / Q_MAX 2.723228e+12
Tally is -19
TIMESTEP 6315 / EPSILON 0.9880015000000055 / ACTION 1 / REWARD 0 / Q_MAX 4.122166e+12
Tally is -19
TIMESTEP 6316 / EPSILON 0.9879996000000055 / ACTION 0 / REWARD 0 / Q_MAX 4.058460e+12
Tally is -19
TIMESTEP 6317 / EPSILON 0.9879977000000055 / ACTION 1 / REWARD 0 / Q_MAX 3.171775e+12
Tally is -19
TIMESTEP 6318 / EPSILON 0.9879958000000055 / ACTION 0 / REWARD 0 / Q_MAX 1.140644e+12
Tally is -19
TIMESTEP 6319 / EPSILON 0.9879939000000055 / ACTION 0 / REWARD 0 / Q_MAX 3.469345e+12
Tally is -19
TIMESTEP 6320 / EPSILON 0.9879920000000055 / ACTION 1 / REWARD 0 / Q_MAX 1.580543e+12
Tally is -

Tally is -20
TIMESTEP 6425 / EPSILON 0.9877925000000056 / ACTION 0 / REWARD 0 / Q_MAX 2.842363e+11
Tally is -20
TIMESTEP 6426 / EPSILON 0.9877906000000056 / ACTION 2 / REWARD 0 / Q_MAX -9.801968e+11
Tally is -20
TIMESTEP 6427 / EPSILON 0.9877887000000056 / ACTION 1 / REWARD 0 / Q_MAX 2.337593e+12
Tally is -20
TIMESTEP 6428 / EPSILON 0.9877868000000056 / ACTION 1 / REWARD 0 / Q_MAX -1.644338e+12
Tally is -20
TIMESTEP 6429 / EPSILON 0.9877849000000056 / ACTION 0 / REWARD 0 / Q_MAX 5.249650e+11
Tally is -20
TIMESTEP 6430 / EPSILON 0.9877830000000056 / ACTION 1 / REWARD 0 / Q_MAX 1.217939e+12
Tally is -20
TIMESTEP 6431 / EPSILON 0.9877811000000056 / ACTION 2 / REWARD 0 / Q_MAX -2.526987e+11
Tally is -20
TIMESTEP 6432 / EPSILON 0.9877792000000056 / ACTION 1 / REWARD 0 / Q_MAX 1.260247e+12
Tally is -20
TIMESTEP 6433 / EPSILON 0.9877773000000056 / ACTION 2 / REWARD 0 / Q_MAX 2.618397e+12
Tally is -20
TIMESTEP 6434 / EPSILON 0.9877754000000056 / ACTION 0 / REWARD 0 / Q_MAX 2.360836e+12
Tally i

Tally is -20
TIMESTEP 6531 / EPSILON 0.9875911000000057 / ACTION 2 / REWARD 0 / Q_MAX 2.092521e+12
Tally is -20
TIMESTEP 6532 / EPSILON 0.9875892000000057 / ACTION 2 / REWARD 0 / Q_MAX 2.182000e+12
Tally is -20
TIMESTEP 6533 / EPSILON 0.9875873000000057 / ACTION 2 / REWARD 0 / Q_MAX 2.977273e+12
Tally is -20
TIMESTEP 6534 / EPSILON 0.9875854000000057 / ACTION 2 / REWARD 0 / Q_MAX 4.614497e+11
Tally is -20
TIMESTEP 6535 / EPSILON 0.9875835000000057 / ACTION 1 / REWARD 0 / Q_MAX 1.026513e+12
Tally is -20
TIMESTEP 6536 / EPSILON 0.9875816000000057 / ACTION 0 / REWARD 0 / Q_MAX 3.000572e+12
Tally is -20
TIMESTEP 6537 / EPSILON 0.9875797000000057 / ACTION 2 / REWARD 0 / Q_MAX 2.104782e+12
Tally is -20
TIMESTEP 6538 / EPSILON 0.9875778000000057 / ACTION 0 / REWARD 0 / Q_MAX 2.539759e+12
Tally is -20
TIMESTEP 6539 / EPSILON 0.9875759000000057 / ACTION 0 / REWARD 0 / Q_MAX 1.207160e+12
Tally is -20
TIMESTEP 6540 / EPSILON 0.9875740000000057 / ACTION 2 / REWARD 0 / Q_MAX 1.168750e+12
Tally is -

TIMESTEP 6627 / EPSILON 0.9874087000000058 / ACTION 2 / REWARD 0 / Q_MAX 1.310438e+12
Tally is -21
TIMESTEP 6628 / EPSILON 0.9874068000000058 / ACTION 0 / REWARD 0 / Q_MAX -6.546001e+10
Tally is -21
TIMESTEP 6629 / EPSILON 0.9874049000000058 / ACTION 2 / REWARD 0 / Q_MAX 1.989190e+12
Tally is -21
TIMESTEP 6630 / EPSILON 0.9874030000000058 / ACTION 2 / REWARD 0 / Q_MAX 3.506679e+11
Tally is -21
TIMESTEP 6631 / EPSILON 0.9874011000000058 / ACTION 0 / REWARD 0 / Q_MAX 2.517945e+12
Tally is -21
TIMESTEP 6632 / EPSILON 0.9873992000000058 / ACTION 2 / REWARD 0 / Q_MAX 2.843681e+12
Tally is -21
TIMESTEP 6633 / EPSILON 0.9873973000000058 / ACTION 2 / REWARD 0 / Q_MAX 2.994799e+12
Tally is -21
TIMESTEP 6634 / EPSILON 0.9873954000000058 / ACTION 2 / REWARD 0 / Q_MAX 2.997185e+12
Tally is -21
TIMESTEP 6635 / EPSILON 0.9873935000000058 / ACTION 0 / REWARD 0 / Q_MAX 3.562669e+12
Tally is -21
TIMESTEP 6636 / EPSILON 0.9873916000000058 / ACTION 0 / REWARD 0 / Q_MAX 2.996679e+12
Tally is -21
TIMESTEP 

Tally is -21
TIMESTEP 6718 / EPSILON 0.9872358000000059 / ACTION 2 / REWARD 0 / Q_MAX 1.046883e+12
Tally is -21
TIMESTEP 6719 / EPSILON 0.9872339000000059 / ACTION 0 / REWARD 0 / Q_MAX 4.320781e+11
Tally is -21
TIMESTEP 6720 / EPSILON 0.9872320000000059 / ACTION 0 / REWARD 0 / Q_MAX 6.400846e+11
Tally is -21
TIMESTEP 6721 / EPSILON 0.9872301000000059 / ACTION 0 / REWARD 0 / Q_MAX 4.960096e+11
Tally is -21
TIMESTEP 6722 / EPSILON 0.9872282000000059 / ACTION 0 / REWARD 0 / Q_MAX 1.385354e+12
Tally is -21
TIMESTEP 6723 / EPSILON 0.9872263000000059 / ACTION 1 / REWARD 0 / Q_MAX 1.474972e+12
Tally is -21
TIMESTEP 6724 / EPSILON 0.9872244000000059 / ACTION 1 / REWARD 0 / Q_MAX 1.242654e+12
Tally is -21
TIMESTEP 6725 / EPSILON 0.9872225000000059 / ACTION 2 / REWARD 0 / Q_MAX -8.591622e+10
Tally is -21
TIMESTEP 6726 / EPSILON 0.9872206000000059 / ACTION 1 / REWARD 0 / Q_MAX 5.332980e+11
Tally is -21
TIMESTEP 6727 / EPSILON 0.9872187000000059 / ACTION 1 / REWARD 0 / Q_MAX 3.552975e+11
Tally is 

Tally is -21
TIMESTEP 6821 / EPSILON 0.987040100000006 / ACTION 1 / REWARD 0 / Q_MAX 2.105188e+12
Tally is -21
TIMESTEP 6822 / EPSILON 0.987038200000006 / ACTION 0 / REWARD 0 / Q_MAX 4.902919e+12
Tally is -21
TIMESTEP 6823 / EPSILON 0.987036300000006 / ACTION 0 / REWARD 0 / Q_MAX 3.446486e+12
Tally is -21
TIMESTEP 6824 / EPSILON 0.987034400000006 / ACTION 2 / REWARD 0 / Q_MAX 4.099061e+12
Tally is -21
TIMESTEP 6825 / EPSILON 0.987032500000006 / ACTION 2 / REWARD 0 / Q_MAX 3.453756e+12
Tally is -21
TIMESTEP 6826 / EPSILON 0.987030600000006 / ACTION 1 / REWARD 0 / Q_MAX 2.828383e+12
Tally is -21
TIMESTEP 6827 / EPSILON 0.987028700000006 / ACTION 1 / REWARD 0 / Q_MAX 1.640182e+12
Tally is -21
TIMESTEP 6828 / EPSILON 0.987026800000006 / ACTION 0 / REWARD 0 / Q_MAX 9.703079e+11
Tally is -21
TIMESTEP 6829 / EPSILON 0.987024900000006 / ACTION 0 / REWARD 0 / Q_MAX 2.128729e+12
Tally is -21
TIMESTEP 6830 / EPSILON 0.987023000000006 / ACTION 0 / REWARD 0 / Q_MAX 1.188325e+12
Tally is -21
TIMESTE

Tally is -21
TIMESTEP 6933 / EPSILON 0.9868273000000061 / ACTION 0 / REWARD 0 / Q_MAX 1.912626e+11
Tally is -21
TIMESTEP 6934 / EPSILON 0.9868254000000061 / ACTION 1 / REWARD 0 / Q_MAX 8.692824e+11
Tally is -21
TIMESTEP 6935 / EPSILON 0.9868235000000061 / ACTION 2 / REWARD 0 / Q_MAX 2.584801e+11
Tally is -21
TIMESTEP 6936 / EPSILON 0.9868216000000061 / ACTION 1 / REWARD 0 / Q_MAX 1.348397e+11
Tally is -21
TIMESTEP 6937 / EPSILON 0.9868197000000061 / ACTION 1 / REWARD 0 / Q_MAX -3.985419e+11
Tally is -21
TIMESTEP 6938 / EPSILON 0.9868178000000061 / ACTION 1 / REWARD 0 / Q_MAX 7.674658e+11
Tally is -21
TIMESTEP 6939 / EPSILON 0.9868159000000061 / ACTION 0 / REWARD 0 / Q_MAX 6.288960e+11
Tally is -21
TIMESTEP 6940 / EPSILON 0.9868140000000061 / ACTION 0 / REWARD 0 / Q_MAX 1.932338e+11
Tally is -21
TIMESTEP 6941 / EPSILON 0.9868121000000061 / ACTION 2 / REWARD 0 / Q_MAX -1.738229e+11
Tally is -21
TIMESTEP 6942 / EPSILON 0.9868102000000061 / ACTION 0 / REWARD 0 / Q_MAX 7.068738e+11
Tally is

Tally is -21
TIMESTEP 7046 / EPSILON 0.9866126000000062 / ACTION 0 / REWARD 0 / Q_MAX 3.466913e+12
Tally is -21
TIMESTEP 7047 / EPSILON 0.9866107000000062 / ACTION 1 / REWARD 0 / Q_MAX 2.314891e+12
Tally is -21
TIMESTEP 7048 / EPSILON 0.9866088000000062 / ACTION 2 / REWARD 0 / Q_MAX 3.122247e+12
Tally is -21
TIMESTEP 7049 / EPSILON 0.9866069000000062 / ACTION 0 / REWARD 0 / Q_MAX 2.055296e+12
Tally is -21
TIMESTEP 7050 / EPSILON 0.9866050000000062 / ACTION 1 / REWARD 0 / Q_MAX 2.545949e+12
Tally is -21
TIMESTEP 7051 / EPSILON 0.9866031000000062 / ACTION 0 / REWARD 0 / Q_MAX 1.362141e+12
Tally is -21
TIMESTEP 7052 / EPSILON 0.9866012000000062 / ACTION 0 / REWARD 0 / Q_MAX 6.560315e+11
Tally is -21
TIMESTEP 7053 / EPSILON 0.9865993000000062 / ACTION 0 / REWARD 0 / Q_MAX 1.496773e+12
Tally is -21
TIMESTEP 7054 / EPSILON 0.9865974000000062 / ACTION 2 / REWARD 0 / Q_MAX 1.661355e+12
Tally is -21
TIMESTEP 7055 / EPSILON 0.9865955000000062 / ACTION 1 / REWARD 0 / Q_MAX 4.357983e+12
Tally is -

TIMESTEP 7149 / EPSILON 0.9864169000000063 / ACTION 1 / REWARD 0 / Q_MAX 1.021192e+12
Tally is -22
TIMESTEP 7150 / EPSILON 0.9864150000000063 / ACTION 0 / REWARD 0 / Q_MAX 2.114494e+11
Tally is -22
TIMESTEP 7151 / EPSILON 0.9864131000000063 / ACTION 2 / REWARD 0 / Q_MAX 5.678508e+11
Tally is -22
TIMESTEP 7152 / EPSILON 0.9864112000000063 / ACTION 2 / REWARD 0 / Q_MAX 1.208871e+12
Tally is -22
TIMESTEP 7153 / EPSILON 0.9864093000000063 / ACTION 2 / REWARD 0 / Q_MAX 7.991823e+11
Tally is -22
TIMESTEP 7154 / EPSILON 0.9864074000000063 / ACTION 1 / REWARD 0 / Q_MAX 2.845894e+12
Tally is -22
TIMESTEP 7155 / EPSILON 0.9864055000000063 / ACTION 2 / REWARD 0 / Q_MAX 4.734718e+11
Tally is -22
TIMESTEP 7156 / EPSILON 0.9864036000000063 / ACTION 2 / REWARD 0 / Q_MAX 1.294484e+12
Tally is -22
TIMESTEP 7157 / EPSILON 0.9864017000000063 / ACTION 0 / REWARD 0 / Q_MAX 9.619371e+11
Tally is -22
TIMESTEP 7158 / EPSILON 0.9863998000000063 / ACTION 2 / REWARD 0 / Q_MAX 1.674919e+12
Tally is -22
TIMESTEP 7

TIMESTEP 7266 / EPSILON 0.9861946000000064 / ACTION 2 / REWARD 0 / Q_MAX 3.102053e+12
Tally is -22
TIMESTEP 7267 / EPSILON 0.9861927000000064 / ACTION 0 / REWARD 0 / Q_MAX 2.604625e+12
Tally is -22
TIMESTEP 7268 / EPSILON 0.9861908000000064 / ACTION 1 / REWARD 0 / Q_MAX 1.311882e+12
Tally is -22
TIMESTEP 7269 / EPSILON 0.9861889000000064 / ACTION 1 / REWARD 0 / Q_MAX 1.560617e+12
Tally is -22
TIMESTEP 7270 / EPSILON 0.9861870000000064 / ACTION 0 / REWARD 0 / Q_MAX 9.851912e+11
Tally is -22
TIMESTEP 7271 / EPSILON 0.9861851000000064 / ACTION 1 / REWARD 0 / Q_MAX 2.441243e+12
Tally is -22
TIMESTEP 7272 / EPSILON 0.9861832000000064 / ACTION 0 / REWARD 0 / Q_MAX 1.395965e+12
Tally is -22
TIMESTEP 7273 / EPSILON 0.9861813000000064 / ACTION 0 / REWARD 0 / Q_MAX 2.186136e+12
Tally is -22
TIMESTEP 7274 / EPSILON 0.9861794000000064 / ACTION 2 / REWARD 0 / Q_MAX 3.123147e+12
Tally is -22
TIMESTEP 7275 / EPSILON 0.9861775000000064 / ACTION 1 / REWARD 0 / Q_MAX 3.495708e+12
Tally is -22
TIMESTEP 7

TIMESTEP 7383 / EPSILON 0.9859723000000065 / ACTION 0 / REWARD 0 / Q_MAX -1.202904e+11
Tally is -23
TIMESTEP 7384 / EPSILON 0.9859704000000065 / ACTION 2 / REWARD 0 / Q_MAX -7.938294e+11
Tally is -23
TIMESTEP 7385 / EPSILON 0.9859685000000065 / ACTION 2 / REWARD 0 / Q_MAX 2.234313e+11
Tally is -23
TIMESTEP 7386 / EPSILON 0.9859666000000065 / ACTION 2 / REWARD 0 / Q_MAX 1.540253e+12
Tally is -23
TIMESTEP 7387 / EPSILON 0.9859647000000065 / ACTION 0 / REWARD 0 / Q_MAX -9.760394e+10
Tally is -23
TIMESTEP 7388 / EPSILON 0.9859628000000065 / ACTION 1 / REWARD 0 / Q_MAX -1.301579e+12
Tally is -23
TIMESTEP 7389 / EPSILON 0.9859609000000065 / ACTION 1 / REWARD 0 / Q_MAX -1.385147e+12
Tally is -23
TIMESTEP 7390 / EPSILON 0.9859590000000065 / ACTION 0 / REWARD 0 / Q_MAX -8.670643e+11
Tally is -23
TIMESTEP 7391 / EPSILON 0.9859571000000065 / ACTION 2 / REWARD 0 / Q_MAX 9.532323e+11
Tally is -23
TIMESTEP 7392 / EPSILON 0.9859552000000065 / ACTION 0 / REWARD 0 / Q_MAX 1.289359e+12
Tally is -23
TIME

Tally is -23
TIMESTEP 7497 / EPSILON 0.9857557000000066 / ACTION 0 / REWARD 0 / Q_MAX 2.256294e+12
Tally is -23
TIMESTEP 7498 / EPSILON 0.9857538000000066 / ACTION 0 / REWARD 0 / Q_MAX 1.531999e+12
Tally is -23
TIMESTEP 7499 / EPSILON 0.9857519000000066 / ACTION 0 / REWARD 0 / Q_MAX 1.757991e+12
Tally is -23
TIMESTEP 7500 / EPSILON 0.9857500000000066 / ACTION 0 / REWARD 0 / Q_MAX 2.488818e+12
Tally is -23
TIMESTEP 7501 / EPSILON 0.9857481000000066 / ACTION 0 / REWARD 0 / Q_MAX 2.518202e+12
Tally is -23
TIMESTEP 7502 / EPSILON 0.9857462000000066 / ACTION 1 / REWARD 0 / Q_MAX 7.947763e+11
Tally is -23
TIMESTEP 7503 / EPSILON 0.9857443000000066 / ACTION 2 / REWARD 0 / Q_MAX -3.461641e+10
Tally is -23
TIMESTEP 7504 / EPSILON 0.9857424000000066 / ACTION 2 / REWARD 0 / Q_MAX -3.547117e+11
Tally is -23
TIMESTEP 7505 / EPSILON 0.9857405000000066 / ACTION 2 / REWARD 0 / Q_MAX -1.026471e+12
Tally is -23
TIMESTEP 7506 / EPSILON 0.9857386000000066 / ACTION 0 / REWARD 0 / Q_MAX 2.230256e+11
Tally i

TIMESTEP 7618 / EPSILON 0.9855258000000067 / ACTION 0 / REWARD 0 / Q_MAX 1.517091e+12
Tally is -24
TIMESTEP 7619 / EPSILON 0.9855239000000067 / ACTION 2 / REWARD 0 / Q_MAX 1.524464e+12
Tally is -24
TIMESTEP 7620 / EPSILON 0.9855220000000067 / ACTION 1 / REWARD 0 / Q_MAX 2.970540e+12
Tally is -24
TIMESTEP 7621 / EPSILON 0.9855201000000067 / ACTION 1 / REWARD 0 / Q_MAX 1.646207e+12
Tally is -24
TIMESTEP 7622 / EPSILON 0.9855182000000067 / ACTION 0 / REWARD 0 / Q_MAX 8.104441e+11
Tally is -24
TIMESTEP 7623 / EPSILON 0.9855163000000067 / ACTION 2 / REWARD 0 / Q_MAX 1.154285e+12
Tally is -24
TIMESTEP 7624 / EPSILON 0.9855144000000067 / ACTION 1 / REWARD 0 / Q_MAX -5.205057e+10
Tally is -24
TIMESTEP 7625 / EPSILON 0.9855125000000067 / ACTION 0 / REWARD 0 / Q_MAX -1.362945e+12
Tally is -24
TIMESTEP 7626 / EPSILON 0.9855106000000067 / ACTION 1 / REWARD 0 / Q_MAX 4.080334e+10
Tally is -24
TIMESTEP 7627 / EPSILON 0.9855087000000067 / ACTION 0 / REWARD 0 / Q_MAX -1.141214e+11
Tally is -24
TIMESTE

Tally is -24
TIMESTEP 7730 / EPSILON 0.9853130000000068 / ACTION 0 / REWARD 0 / Q_MAX 1.821156e+12
Tally is -24
TIMESTEP 7731 / EPSILON 0.9853111000000068 / ACTION 0 / REWARD 0 / Q_MAX 2.264067e+12
Tally is -24
TIMESTEP 7732 / EPSILON 0.9853092000000068 / ACTION 2 / REWARD 0 / Q_MAX 2.511925e+12
Tally is -24
TIMESTEP 7733 / EPSILON 0.9853073000000068 / ACTION 1 / REWARD 0 / Q_MAX 1.907834e+12
Tally is -24
TIMESTEP 7734 / EPSILON 0.9853054000000068 / ACTION 0 / REWARD 0 / Q_MAX 2.169423e+12
Tally is -24
TIMESTEP 7735 / EPSILON 0.9853035000000068 / ACTION 0 / REWARD 0 / Q_MAX 2.069090e+12
Tally is -24
TIMESTEP 7736 / EPSILON 0.9853016000000068 / ACTION 1 / REWARD 0 / Q_MAX 2.490792e+12
Tally is -24
TIMESTEP 7737 / EPSILON 0.9852997000000068 / ACTION 2 / REWARD 0 / Q_MAX 1.941743e+12
Tally is -24
TIMESTEP 7738 / EPSILON 0.9852978000000068 / ACTION 1 / REWARD 0 / Q_MAX 2.089365e+12
Tally is -24
TIMESTEP 7739 / EPSILON 0.9852959000000068 / ACTION 0 / REWARD 0 / Q_MAX 2.103448e+12
Tally is -

TIMESTEP 7837 / EPSILON 0.9851097000000069 / ACTION 1 / REWARD 0 / Q_MAX 1.785770e+12
Tally is -24
TIMESTEP 7838 / EPSILON 0.9851078000000069 / ACTION 2 / REWARD 0 / Q_MAX 1.887425e+12
Tally is -24
TIMESTEP 7839 / EPSILON 0.9851059000000069 / ACTION 1 / REWARD 0 / Q_MAX 1.975879e+12
Tally is -24
TIMESTEP 7840 / EPSILON 0.9851040000000069 / ACTION 1 / REWARD 0 / Q_MAX 2.235197e+12
Tally is -24
TIMESTEP 7841 / EPSILON 0.9851021000000069 / ACTION 1 / REWARD 0 / Q_MAX 1.553290e+12
Tally is -24
TIMESTEP 7842 / EPSILON 0.9851002000000069 / ACTION 2 / REWARD 0 / Q_MAX 2.402180e+12
Tally is -24
TIMESTEP 7843 / EPSILON 0.9850983000000069 / ACTION 2 / REWARD 0 / Q_MAX 1.099005e+12
Tally is -24
TIMESTEP 7844 / EPSILON 0.9850964000000069 / ACTION 2 / REWARD 0 / Q_MAX 1.599293e+12
Tally is -24
TIMESTEP 7845 / EPSILON 0.9850945000000069 / ACTION 1 / REWARD 0 / Q_MAX 1.721958e+12
Tally is -24
TIMESTEP 7846 / EPSILON 0.9850926000000069 / ACTION 0 / REWARD 0 / Q_MAX 1.940178e+12
Tally is -24
TIMESTEP 7

Tally is -25
TIMESTEP 7946 / EPSILON 0.984902600000007 / ACTION 1 / REWARD 0 / Q_MAX 2.003980e+11
Tally is -25
TIMESTEP 7947 / EPSILON 0.984900700000007 / ACTION 2 / REWARD 0 / Q_MAX 1.306904e+12
Tally is -25
TIMESTEP 7948 / EPSILON 0.984898800000007 / ACTION 0 / REWARD 0 / Q_MAX 3.292083e+11
Tally is -25
TIMESTEP 7949 / EPSILON 0.984896900000007 / ACTION 1 / REWARD 0 / Q_MAX 1.331011e+12
Tally is -25
TIMESTEP 7950 / EPSILON 0.984895000000007 / ACTION 2 / REWARD 0 / Q_MAX 1.100119e+12
Tally is -25
TIMESTEP 7951 / EPSILON 0.984893100000007 / ACTION 2 / REWARD 0 / Q_MAX 1.833307e+12
Tally is -25
TIMESTEP 7952 / EPSILON 0.984891200000007 / ACTION 1 / REWARD 0 / Q_MAX 1.547912e+12
Tally is -25
TIMESTEP 7953 / EPSILON 0.984889300000007 / ACTION 0 / REWARD 0 / Q_MAX 2.402121e+12
Tally is -25
TIMESTEP 7954 / EPSILON 0.984887400000007 / ACTION 2 / REWARD 0 / Q_MAX 3.099465e+12
Tally is -25
TIMESTEP 7955 / EPSILON 0.984885500000007 / ACTION 2 / REWARD 0 / Q_MAX 3.057455e+12
Tally is -25
TIMESTE

TIMESTEP 8059 / EPSILON 0.984687900000007 / ACTION 0 / REWARD 0 / Q_MAX 2.993251e+12
Tally is -25
TIMESTEP 8060 / EPSILON 0.9846860000000071 / ACTION 0 / REWARD 0 / Q_MAX 5.691014e+12
Tally is -25
TIMESTEP 8061 / EPSILON 0.9846841000000071 / ACTION 2 / REWARD 0 / Q_MAX 3.858209e+12
Tally is -25
TIMESTEP 8062 / EPSILON 0.9846822000000071 / ACTION 0 / REWARD 0 / Q_MAX 4.174035e+12
Tally is -25
TIMESTEP 8063 / EPSILON 0.9846803000000071 / ACTION 1 / REWARD 0 / Q_MAX 3.953026e+12
Tally is -25
TIMESTEP 8064 / EPSILON 0.9846784000000071 / ACTION 2 / REWARD 0 / Q_MAX 1.849648e+12
Tally is -25
TIMESTEP 8065 / EPSILON 0.9846765000000071 / ACTION 2 / REWARD 0 / Q_MAX 1.467916e+12
Tally is -25
TIMESTEP 8066 / EPSILON 0.9846746000000071 / ACTION 0 / REWARD 0 / Q_MAX 2.021716e+12
Tally is -25
TIMESTEP 8067 / EPSILON 0.9846727000000071 / ACTION 0 / REWARD 0 / Q_MAX 2.458596e+12
Tally is -25
TIMESTEP 8068 / EPSILON 0.9846708000000071 / ACTION 1 / REWARD 0 / Q_MAX 3.424253e+12
Tally is -25
TIMESTEP 80

Tally is -26
TIMESTEP 8176 / EPSILON 0.9844656000000072 / ACTION 1 / REWARD 0 / Q_MAX 6.022199e+11
Tally is -26
TIMESTEP 8177 / EPSILON 0.9844637000000072 / ACTION 0 / REWARD 0 / Q_MAX 1.398981e+12
Tally is -26
TIMESTEP 8178 / EPSILON 0.9844618000000072 / ACTION 2 / REWARD 0 / Q_MAX 2.319987e+11
Tally is -26
TIMESTEP 8179 / EPSILON 0.9844599000000072 / ACTION 2 / REWARD 0 / Q_MAX 1.067926e+12
Tally is -26
TIMESTEP 8180 / EPSILON 0.9844580000000072 / ACTION 2 / REWARD 0 / Q_MAX 2.762692e+12
Tally is -26
TIMESTEP 8181 / EPSILON 0.9844561000000072 / ACTION 1 / REWARD 0 / Q_MAX 2.950321e+12
Tally is -26
TIMESTEP 8182 / EPSILON 0.9844542000000072 / ACTION 1 / REWARD 0 / Q_MAX 3.674806e+12
Tally is -26
TIMESTEP 8183 / EPSILON 0.9844523000000072 / ACTION 2 / REWARD 0 / Q_MAX 2.383787e+12
Tally is -26
TIMESTEP 8184 / EPSILON 0.9844504000000072 / ACTION 0 / REWARD 0 / Q_MAX 1.952522e+12
Tally is -26
TIMESTEP 8185 / EPSILON 0.9844485000000072 / ACTION 1 / REWARD 0 / Q_MAX 2.480376e+12
Tally is -

TIMESTEP 8286 / EPSILON 0.9842566000000073 / ACTION 1 / REWARD 0 / Q_MAX 2.193788e+12
Tally is -26
TIMESTEP 8287 / EPSILON 0.9842547000000073 / ACTION 0 / REWARD 0 / Q_MAX 1.797046e+12
Tally is -26
TIMESTEP 8288 / EPSILON 0.9842528000000073 / ACTION 1 / REWARD 0 / Q_MAX 1.386194e+12
Tally is -26
TIMESTEP 8289 / EPSILON 0.9842509000000073 / ACTION 0 / REWARD 0 / Q_MAX 2.542866e+12
Tally is -26
TIMESTEP 8290 / EPSILON 0.9842490000000073 / ACTION 0 / REWARD 0 / Q_MAX 2.498564e+12
Tally is -26
TIMESTEP 8291 / EPSILON 0.9842471000000073 / ACTION 1 / REWARD 0 / Q_MAX 2.418373e+12
Tally is -26
TIMESTEP 8292 / EPSILON 0.9842452000000073 / ACTION 1 / REWARD 0 / Q_MAX 3.487570e+12
Tally is -26
TIMESTEP 8293 / EPSILON 0.9842433000000073 / ACTION 1 / REWARD 0 / Q_MAX 1.687215e+12
Tally is -26
TIMESTEP 8294 / EPSILON 0.9842414000000073 / ACTION 2 / REWARD 0 / Q_MAX 2.045786e+12
Tally is -26
TIMESTEP 8295 / EPSILON 0.9842395000000073 / ACTION 0 / REWARD 0 / Q_MAX 6.906667e+11
Tally is -26
TIMESTEP 8

Tally is -27
TIMESTEP 8402 / EPSILON 0.9840362000000074 / ACTION 2 / REWARD 0 / Q_MAX 1.580719e+12
Tally is -27
TIMESTEP 8403 / EPSILON 0.9840343000000074 / ACTION 2 / REWARD 0 / Q_MAX 3.538495e+12
Tally is -27
TIMESTEP 8404 / EPSILON 0.9840324000000074 / ACTION 0 / REWARD 0 / Q_MAX 3.191291e+12
Tally is -27
TIMESTEP 8405 / EPSILON 0.9840305000000074 / ACTION 0 / REWARD 0 / Q_MAX 2.021210e+12
Tally is -27
TIMESTEP 8406 / EPSILON 0.9840286000000074 / ACTION 0 / REWARD 0 / Q_MAX 1.581714e+12
Tally is -27
TIMESTEP 8407 / EPSILON 0.9840267000000074 / ACTION 1 / REWARD 0 / Q_MAX 1.530554e+12
Tally is -27
TIMESTEP 8408 / EPSILON 0.9840248000000074 / ACTION 0 / REWARD 0 / Q_MAX 7.686802e+11
Tally is -27
TIMESTEP 8409 / EPSILON 0.9840229000000074 / ACTION 2 / REWARD 0 / Q_MAX 7.390323e+10
Tally is -27
TIMESTEP 8410 / EPSILON 0.9840210000000074 / ACTION 1 / REWARD 0 / Q_MAX -1.546342e+12
Tally is -27
TIMESTEP 8411 / EPSILON 0.9840191000000074 / ACTION 0 / REWARD 0 / Q_MAX 1.250485e+12
Tally is 

Tally is -27
TIMESTEP 8519 / EPSILON 0.9838139000000075 / ACTION 2 / REWARD 0 / Q_MAX 1.771384e+12
Tally is -27
TIMESTEP 8520 / EPSILON 0.9838120000000075 / ACTION 0 / REWARD 0 / Q_MAX 1.762234e+12
Tally is -27
TIMESTEP 8521 / EPSILON 0.9838101000000075 / ACTION 2 / REWARD 0 / Q_MAX 1.727914e+12
Tally is -27
TIMESTEP 8522 / EPSILON 0.9838082000000075 / ACTION 0 / REWARD 0 / Q_MAX 1.166630e+12
Tally is -27
TIMESTEP 8523 / EPSILON 0.9838063000000075 / ACTION 2 / REWARD 0 / Q_MAX 1.724821e+12
Tally is -27
TIMESTEP 8524 / EPSILON 0.9838044000000075 / ACTION 0 / REWARD 0 / Q_MAX 1.213619e+12
Tally is -27
TIMESTEP 8525 / EPSILON 0.9838025000000075 / ACTION 0 / REWARD 0 / Q_MAX 1.749943e+12
Tally is -27
TIMESTEP 8526 / EPSILON 0.9838006000000075 / ACTION 0 / REWARD 0 / Q_MAX 8.836796e+11
Tally is -27
TIMESTEP 8527 / EPSILON 0.9837987000000075 / ACTION 0 / REWARD 0 / Q_MAX 9.417901e+11
Tally is -27
TIMESTEP 8528 / EPSILON 0.9837968000000075 / ACTION 1 / REWARD 0 / Q_MAX 1.485346e+12
Tally is -

Tally is -27
TIMESTEP 8632 / EPSILON 0.9835992000000076 / ACTION 1 / REWARD 0 / Q_MAX -1.236980e+11
Tally is -27
TIMESTEP 8633 / EPSILON 0.9835973000000076 / ACTION 0 / REWARD 0 / Q_MAX -1.120971e+12
Tally is -27
TIMESTEP 8634 / EPSILON 0.9835954000000076 / ACTION 2 / REWARD 0 / Q_MAX -2.149609e+12
Tally is -27
TIMESTEP 8635 / EPSILON 0.9835935000000076 / ACTION 0 / REWARD 0 / Q_MAX -2.028076e+12
Tally is -27
TIMESTEP 8636 / EPSILON 0.9835916000000076 / ACTION 0 / REWARD 0 / Q_MAX -5.897540e+11
Tally is -27
TIMESTEP 8637 / EPSILON 0.9835897000000076 / ACTION 0 / REWARD 0 / Q_MAX 5.086344e+11
Tally is -27
TIMESTEP 8638 / EPSILON 0.9835878000000076 / ACTION 1 / REWARD 0 / Q_MAX 3.256422e+11
Tally is -27
TIMESTEP 8639 / EPSILON 0.9835859000000076 / ACTION 1 / REWARD 0 / Q_MAX 5.552154e+11
Tally is -27
TIMESTEP 8640 / EPSILON 0.9835840000000076 / ACTION 2 / REWARD 0 / Q_MAX 1.036540e+12
Tally is -27
TIMESTEP 8641 / EPSILON 0.9835821000000076 / ACTION 0 / REWARD 0 / Q_MAX 1.073492e+12
Tally

TIMESTEP 8744 / EPSILON 0.9833864000000077 / ACTION 0 / REWARD 0 / Q_MAX 1.274599e+12
Tally is -27
TIMESTEP 8745 / EPSILON 0.9833845000000077 / ACTION 2 / REWARD 0 / Q_MAX 1.035174e+12
Tally is -27
TIMESTEP 8746 / EPSILON 0.9833826000000077 / ACTION 0 / REWARD 0 / Q_MAX 2.225789e+12
Tally is -27
TIMESTEP 8747 / EPSILON 0.9833807000000077 / ACTION 2 / REWARD 0 / Q_MAX 6.826504e+11
Tally is -27
TIMESTEP 8748 / EPSILON 0.9833788000000077 / ACTION 1 / REWARD 0 / Q_MAX 3.969143e+11
Tally is -27
TIMESTEP 8749 / EPSILON 0.9833769000000077 / ACTION 1 / REWARD 0 / Q_MAX 8.829850e+11
Tally is -27
TIMESTEP 8750 / EPSILON 0.9833750000000077 / ACTION 0 / REWARD 0 / Q_MAX 1.085948e+12
Tally is -27
TIMESTEP 8751 / EPSILON 0.9833731000000077 / ACTION 2 / REWARD 0 / Q_MAX 1.215252e+12
Tally is -27
TIMESTEP 8752 / EPSILON 0.9833712000000077 / ACTION 1 / REWARD 0 / Q_MAX 1.156345e+12
Tally is -27
TIMESTEP 8753 / EPSILON 0.9833693000000077 / ACTION 2 / REWARD 0 / Q_MAX 1.313119e+12
Tally is -27
TIMESTEP 8

Tally is -28
TIMESTEP 8861 / EPSILON 0.9831641000000078 / ACTION 1 / REWARD 0 / Q_MAX 1.243463e+12
Tally is -28
TIMESTEP 8862 / EPSILON 0.9831622000000078 / ACTION 2 / REWARD 0 / Q_MAX 1.500144e+12
Tally is -28
TIMESTEP 8863 / EPSILON 0.9831603000000078 / ACTION 2 / REWARD 0 / Q_MAX 1.159802e+12
Tally is -28
TIMESTEP 8864 / EPSILON 0.9831584000000078 / ACTION 2 / REWARD 0 / Q_MAX 1.586394e+12
Tally is -28
TIMESTEP 8865 / EPSILON 0.9831565000000078 / ACTION 0 / REWARD 0 / Q_MAX 8.426835e+11
Tally is -28
TIMESTEP 8866 / EPSILON 0.9831546000000078 / ACTION 1 / REWARD 0 / Q_MAX 9.383393e+11
Tally is -28
TIMESTEP 8867 / EPSILON 0.9831527000000078 / ACTION 0 / REWARD 0 / Q_MAX 5.419645e+11
Tally is -28
TIMESTEP 8868 / EPSILON 0.9831508000000078 / ACTION 2 / REWARD 0 / Q_MAX 5.264373e+10
Tally is -28
TIMESTEP 8869 / EPSILON 0.9831489000000078 / ACTION 2 / REWARD 0 / Q_MAX -6.263284e+11
Tally is -28
TIMESTEP 8870 / EPSILON 0.9831470000000078 / ACTION 2 / REWARD 0 / Q_MAX 7.180545e+10
Tally is 

TIMESTEP 8977 / EPSILON 0.9829437000000079 / ACTION 0 / REWARD 0 / Q_MAX 2.019538e+12
Tally is -28
TIMESTEP 8978 / EPSILON 0.9829418000000079 / ACTION 1 / REWARD 0 / Q_MAX 3.093679e+12
Tally is -28
TIMESTEP 8979 / EPSILON 0.9829399000000079 / ACTION 2 / REWARD 0 / Q_MAX 2.568121e+12
Tally is -28
TIMESTEP 8980 / EPSILON 0.9829380000000079 / ACTION 2 / REWARD 0 / Q_MAX 1.074130e+12
Tally is -28
TIMESTEP 8981 / EPSILON 0.9829361000000079 / ACTION 0 / REWARD 0 / Q_MAX 1.439970e+12
Tally is -28
TIMESTEP 8982 / EPSILON 0.9829342000000079 / ACTION 0 / REWARD 0 / Q_MAX 1.268646e+12
Tally is -28
TIMESTEP 8983 / EPSILON 0.9829323000000079 / ACTION 0 / REWARD 0 / Q_MAX 1.405751e+12
Tally is -28
TIMESTEP 8984 / EPSILON 0.9829304000000079 / ACTION 2 / REWARD 0 / Q_MAX 1.557010e+12
Tally is -28
TIMESTEP 8985 / EPSILON 0.9829285000000079 / ACTION 0 / REWARD 0 / Q_MAX 1.305510e+12
Tally is -28
TIMESTEP 8986 / EPSILON 0.9829266000000079 / ACTION 0 / REWARD 0 / Q_MAX 1.265409e+12
Tally is -28
TIMESTEP 8

Tally is -28
TIMESTEP 9095 / EPSILON 0.982719500000008 / ACTION 1 / REWARD 0 / Q_MAX -3.732688e+10
Tally is -28
TIMESTEP 9096 / EPSILON 0.982717600000008 / ACTION 2 / REWARD 0 / Q_MAX 4.450509e+10
Tally is -28
TIMESTEP 9097 / EPSILON 0.982715700000008 / ACTION 0 / REWARD 0 / Q_MAX 4.335823e+11
Tally is -28
TIMESTEP 9098 / EPSILON 0.982713800000008 / ACTION 0 / REWARD 0 / Q_MAX 6.351464e+11
Tally is -28
TIMESTEP 9099 / EPSILON 0.982711900000008 / ACTION 0 / REWARD 0 / Q_MAX 2.172983e+11
Tally is -28
TIMESTEP 9100 / EPSILON 0.982710000000008 / ACTION 2 / REWARD 0 / Q_MAX 1.163460e+11
Tally is -28
TIMESTEP 9101 / EPSILON 0.982708100000008 / ACTION 0 / REWARD 0 / Q_MAX 1.171751e+11
Tally is -28
TIMESTEP 9102 / EPSILON 0.982706200000008 / ACTION 0 / REWARD 0 / Q_MAX 1.586176e+11
Tally is -28
TIMESTEP 9103 / EPSILON 0.982704300000008 / ACTION 0 / REWARD 0 / Q_MAX 5.874005e+10
Tally is -28
TIMESTEP 9104 / EPSILON 0.982702400000008 / ACTION 1 / REWARD 0 / Q_MAX 1.465460e+11
Tally is -28
TIMEST

TIMESTEP 9209 / EPSILON 0.9825029000000081 / ACTION 1 / REWARD 0 / Q_MAX 1.503257e+12
Tally is -29
TIMESTEP 9210 / EPSILON 0.9825010000000081 / ACTION 1 / REWARD 0 / Q_MAX -2.579958e+11
Tally is -29
TIMESTEP 9211 / EPSILON 0.9824991000000081 / ACTION 1 / REWARD 0 / Q_MAX -6.302700e+11
Tally is -29
TIMESTEP 9212 / EPSILON 0.9824972000000081 / ACTION 2 / REWARD 0 / Q_MAX 1.005250e+12
Tally is -29
TIMESTEP 9213 / EPSILON 0.9824953000000081 / ACTION 0 / REWARD 0 / Q_MAX 1.471477e+12
Tally is -29
TIMESTEP 9214 / EPSILON 0.9824934000000081 / ACTION 1 / REWARD 0 / Q_MAX 1.092564e+12
Tally is -29
TIMESTEP 9215 / EPSILON 0.9824915000000081 / ACTION 1 / REWARD 0 / Q_MAX 2.046508e+12
Tally is -29
TIMESTEP 9216 / EPSILON 0.9824896000000081 / ACTION 2 / REWARD 0 / Q_MAX 1.570712e+12
Tally is -29
TIMESTEP 9217 / EPSILON 0.9824877000000081 / ACTION 2 / REWARD 0 / Q_MAX 2.324343e+12
Tally is -29
TIMESTEP 9218 / EPSILON 0.9824858000000081 / ACTION 2 / REWARD 0 / Q_MAX 1.243826e+12
Tally is -29
TIMESTEP

TIMESTEP 9321 / EPSILON 0.9822901000000082 / ACTION 2 / REWARD 0 / Q_MAX 1.119164e+12
Tally is -29
TIMESTEP 9322 / EPSILON 0.9822882000000082 / ACTION 1 / REWARD 0 / Q_MAX 1.069473e+12
Tally is -29
TIMESTEP 9323 / EPSILON 0.9822863000000082 / ACTION 0 / REWARD 0 / Q_MAX 8.943396e+11
Tally is -29
TIMESTEP 9324 / EPSILON 0.9822844000000082 / ACTION 1 / REWARD 0 / Q_MAX 1.417240e+12
Tally is -29
TIMESTEP 9325 / EPSILON 0.9822825000000082 / ACTION 2 / REWARD 0 / Q_MAX 4.805890e+11
Tally is -29
TIMESTEP 9326 / EPSILON 0.9822806000000082 / ACTION 0 / REWARD 0 / Q_MAX 2.413021e+11
Tally is -29
TIMESTEP 9327 / EPSILON 0.9822787000000082 / ACTION 0 / REWARD 0 / Q_MAX 1.344567e+12
Tally is -29
TIMESTEP 9328 / EPSILON 0.9822768000000082 / ACTION 2 / REWARD 0 / Q_MAX 9.179707e+11
Tally is -29
TIMESTEP 9329 / EPSILON 0.9822749000000082 / ACTION 2 / REWARD 0 / Q_MAX 1.673376e+11
Tally is -29
TIMESTEP 9330 / EPSILON 0.9822730000000082 / ACTION 0 / REWARD 0 / Q_MAX 1.684397e+11
Tally is -29
TIMESTEP 9

Tally is -29
TIMESTEP 9438 / EPSILON 0.9820678000000083 / ACTION 0 / REWARD 0 / Q_MAX 1.116604e+12
Tally is -29
TIMESTEP 9439 / EPSILON 0.9820659000000083 / ACTION 2 / REWARD 0 / Q_MAX 3.579571e+12
Tally is -29
TIMESTEP 9440 / EPSILON 0.9820640000000083 / ACTION 2 / REWARD 0 / Q_MAX 2.430503e+11
Tally is -29
TIMESTEP 9441 / EPSILON 0.9820621000000083 / ACTION 0 / REWARD 0 / Q_MAX 7.897262e+10
Tally is -29
TIMESTEP 9442 / EPSILON 0.9820602000000083 / ACTION 0 / REWARD 0 / Q_MAX 2.689197e+12
Tally is -29
TIMESTEP 9443 / EPSILON 0.9820583000000083 / ACTION 2 / REWARD 0 / Q_MAX 1.781765e+12
Tally is -29
TIMESTEP 9444 / EPSILON 0.9820564000000083 / ACTION 2 / REWARD 0 / Q_MAX 3.548441e+12
Tally is -29
TIMESTEP 9445 / EPSILON 0.9820545000000083 / ACTION 1 / REWARD 0 / Q_MAX 1.073560e+12
Tally is -29
TIMESTEP 9446 / EPSILON 0.9820526000000083 / ACTION 1 / REWARD 0 / Q_MAX 1.436703e+12
Tally is -29
TIMESTEP 9447 / EPSILON 0.9820507000000083 / ACTION 2 / REWARD 0 / Q_MAX 3.068232e+12
Tally is -

Tally is -29
TIMESTEP 9553 / EPSILON 0.9818493000000084 / ACTION 0 / REWARD 0 / Q_MAX 4.346831e+12
Tally is -29
TIMESTEP 9554 / EPSILON 0.9818474000000084 / ACTION 1 / REWARD 0 / Q_MAX 3.419832e+12
Tally is -29
TIMESTEP 9555 / EPSILON 0.9818455000000084 / ACTION 2 / REWARD 0 / Q_MAX 5.880224e+12
Tally is -29
TIMESTEP 9556 / EPSILON 0.9818436000000084 / ACTION 2 / REWARD 0 / Q_MAX 4.631827e+12
Tally is -29
TIMESTEP 9557 / EPSILON 0.9818417000000084 / ACTION 0 / REWARD 0 / Q_MAX 4.743923e+12
Tally is -29
TIMESTEP 9558 / EPSILON 0.9818398000000084 / ACTION 1 / REWARD 0 / Q_MAX 3.160389e+12
Tally is -29
TIMESTEP 9559 / EPSILON 0.9818379000000084 / ACTION 1 / REWARD 0 / Q_MAX 1.354991e+12
Tally is -29
TIMESTEP 9560 / EPSILON 0.9818360000000084 / ACTION 0 / REWARD 0 / Q_MAX 1.680345e+12
Tally is -29
TIMESTEP 9561 / EPSILON 0.9818341000000084 / ACTION 0 / REWARD 0 / Q_MAX 4.310139e+12
Tally is -29
TIMESTEP 9562 / EPSILON 0.9818322000000084 / ACTION 2 / REWARD 0 / Q_MAX 3.222080e+12
Tally is -

TIMESTEP 9670 / EPSILON 0.9816270000000085 / ACTION 0 / REWARD 0 / Q_MAX 4.291217e+12
Tally is -29
TIMESTEP 9671 / EPSILON 0.9816251000000085 / ACTION 2 / REWARD 0 / Q_MAX 3.173074e+12
Tally is -29
TIMESTEP 9672 / EPSILON 0.9816232000000085 / ACTION 2 / REWARD 0 / Q_MAX 3.227848e+12
Tally is -29
TIMESTEP 9673 / EPSILON 0.9816213000000085 / ACTION 0 / REWARD 0 / Q_MAX 1.780590e+12
Tally is -29
TIMESTEP 9674 / EPSILON 0.9816194000000085 / ACTION 0 / REWARD 0 / Q_MAX 2.349040e+12
Tally is -29
TIMESTEP 9675 / EPSILON 0.9816175000000085 / ACTION 2 / REWARD 0 / Q_MAX 2.184625e+12
Tally is -29
TIMESTEP 9676 / EPSILON 0.9816156000000085 / ACTION 0 / REWARD 0 / Q_MAX 3.093555e+12
Tally is -29
TIMESTEP 9677 / EPSILON 0.9816137000000085 / ACTION 1 / REWARD 0 / Q_MAX 2.667979e+12
Tally is -29
TIMESTEP 9678 / EPSILON 0.9816118000000085 / ACTION 2 / REWARD 0 / Q_MAX 2.508057e+12
Tally is -29
TIMESTEP 9679 / EPSILON 0.9816099000000085 / ACTION 2 / REWARD 0 / Q_MAX 4.096769e+12
Tally is -29
TIMESTEP 9

TIMESTEP 9788 / EPSILON 0.9814028000000086 / ACTION 2 / REWARD 0 / Q_MAX 4.383351e+12
Tally is -29
TIMESTEP 9789 / EPSILON 0.9814009000000086 / ACTION 2 / REWARD 0 / Q_MAX 4.537636e+12
Tally is -29
TIMESTEP 9790 / EPSILON 0.9813990000000086 / ACTION 0 / REWARD 0 / Q_MAX 3.036215e+12
Tally is -29
TIMESTEP 9791 / EPSILON 0.9813971000000086 / ACTION 2 / REWARD 0 / Q_MAX 8.725755e+11
Tally is -29
TIMESTEP 9792 / EPSILON 0.9813952000000086 / ACTION 1 / REWARD 0 / Q_MAX 2.204489e+12
Tally is -29
TIMESTEP 9793 / EPSILON 0.9813933000000086 / ACTION 1 / REWARD 0 / Q_MAX 1.792095e+12
Tally is -29
TIMESTEP 9794 / EPSILON 0.9813914000000086 / ACTION 2 / REWARD 0 / Q_MAX 4.891926e+12
Tally is -29
TIMESTEP 9795 / EPSILON 0.9813895000000086 / ACTION 0 / REWARD 0 / Q_MAX 4.303165e+12
Tally is -29
TIMESTEP 9796 / EPSILON 0.9813876000000086 / ACTION 0 / REWARD 0 / Q_MAX 4.106443e+12
Tally is -29
TIMESTEP 9797 / EPSILON 0.9813857000000086 / ACTION 2 / REWARD 0 / Q_MAX 6.753454e+12
Tally is -29
TIMESTEP 9

TIMESTEP 9897 / EPSILON 0.9811957000000087 / ACTION 0 / REWARD 0 / Q_MAX 3.033643e+12
Tally is -30
TIMESTEP 9898 / EPSILON 0.9811938000000087 / ACTION 0 / REWARD 0 / Q_MAX 2.297586e+12
Tally is -30
TIMESTEP 9899 / EPSILON 0.9811919000000087 / ACTION 2 / REWARD 0 / Q_MAX 1.847608e+12
Tally is -30
TIMESTEP 9900 / EPSILON 0.9811900000000087 / ACTION 1 / REWARD 0 / Q_MAX 3.696310e+12
Tally is -30
TIMESTEP 9901 / EPSILON 0.9811881000000087 / ACTION 0 / REWARD 0 / Q_MAX 4.044695e+12
Tally is -30
TIMESTEP 9902 / EPSILON 0.9811862000000087 / ACTION 2 / REWARD 0 / Q_MAX 3.622021e+12
Tally is -30
TIMESTEP 9903 / EPSILON 0.9811843000000087 / ACTION 1 / REWARD 0 / Q_MAX 2.576780e+12
Tally is -30
TIMESTEP 9904 / EPSILON 0.9811824000000087 / ACTION 1 / REWARD 0 / Q_MAX 3.745511e+12
Tally is -30
TIMESTEP 9905 / EPSILON 0.9811805000000087 / ACTION 2 / REWARD 0 / Q_MAX 2.766235e+12
Tally is -30
TIMESTEP 9906 / EPSILON 0.9811786000000087 / ACTION 0 / REWARD 0 / Q_MAX 3.191919e+12
Tally is -30
TIMESTEP 9

TIMESTEP 10000 / EPSILON 0.9810000000000088 / ACTION 1 / REWARD 0 / Q_MAX 2.260757e+12
Tally is -30
TIMESTEP 10001 / EPSILON 0.9809981000000088 / ACTION 2 / REWARD 0 / Q_MAX 1.119216e+12
Tally is -30
TIMESTEP 10002 / EPSILON 0.9809962000000088 / ACTION 0 / REWARD 0 / Q_MAX 1.513079e+12
Tally is -30
TIMESTEP 10003 / EPSILON 0.9809943000000088 / ACTION 0 / REWARD 0 / Q_MAX 1.398100e+12
Tally is -30
TIMESTEP 10004 / EPSILON 0.9809924000000088 / ACTION 1 / REWARD 0 / Q_MAX 3.360330e+12
Tally is -30
TIMESTEP 10005 / EPSILON 0.9809905000000088 / ACTION 0 / REWARD 0 / Q_MAX 1.988225e+11
Tally is -30
TIMESTEP 10006 / EPSILON 0.9809886000000088 / ACTION 1 / REWARD 0 / Q_MAX 1.532625e+12
Tally is -30
TIMESTEP 10007 / EPSILON 0.9809867000000088 / ACTION 2 / REWARD 0 / Q_MAX 2.039167e+12
Tally is -30
TIMESTEP 10008 / EPSILON 0.9809848000000088 / ACTION 0 / REWARD 0 / Q_MAX 2.700938e+12
Tally is -30
TIMESTEP 10009 / EPSILON 0.9809829000000088 / ACTION 1 / REWARD 0 / Q_MAX 3.628625e+12
Tally is -30


Tally is -30
TIMESTEP 10098 / EPSILON 0.9808138000000088 / ACTION 1 / REWARD 0 / Q_MAX 1.425078e+12
Tally is -30
TIMESTEP 10099 / EPSILON 0.9808119000000088 / ACTION 2 / REWARD 0 / Q_MAX 1.799760e+12
Tally is -30
TIMESTEP 10100 / EPSILON 0.9808100000000088 / ACTION 1 / REWARD 0 / Q_MAX 1.978720e+12
Tally is -30
TIMESTEP 10101 / EPSILON 0.9808081000000088 / ACTION 2 / REWARD 0 / Q_MAX 2.093054e+12
Tally is -30
TIMESTEP 10102 / EPSILON 0.9808062000000088 / ACTION 2 / REWARD 0 / Q_MAX 1.726052e+12
Tally is -30
TIMESTEP 10103 / EPSILON 0.9808043000000088 / ACTION 0 / REWARD 0 / Q_MAX 1.837433e+12
Tally is -30
TIMESTEP 10104 / EPSILON 0.9808024000000088 / ACTION 1 / REWARD 0 / Q_MAX 2.574317e+12
Tally is -30
TIMESTEP 10105 / EPSILON 0.9808005000000088 / ACTION 1 / REWARD 0 / Q_MAX 2.964774e+12
Tally is -30
TIMESTEP 10106 / EPSILON 0.9807986000000088 / ACTION 2 / REWARD 0 / Q_MAX 2.331750e+12
Tally is -30
TIMESTEP 10107 / EPSILON 0.9807967000000088 / ACTION 1 / REWARD 0 / Q_MAX 2.402493e+12


Tally is -30
TIMESTEP 10183 / EPSILON 0.9806523000000089 / ACTION 0 / REWARD 0 / Q_MAX 2.064577e+12
Tally is -30
TIMESTEP 10184 / EPSILON 0.9806504000000089 / ACTION 2 / REWARD 0 / Q_MAX 9.128017e+11
Tally is -30
TIMESTEP 10185 / EPSILON 0.9806485000000089 / ACTION 2 / REWARD 0 / Q_MAX 2.343031e+12
Tally is -30
TIMESTEP 10186 / EPSILON 0.9806466000000089 / ACTION 0 / REWARD 0 / Q_MAX 1.711064e+12
Tally is -30
TIMESTEP 10187 / EPSILON 0.9806447000000089 / ACTION 0 / REWARD 0 / Q_MAX 1.014174e+12
Tally is -30
TIMESTEP 10188 / EPSILON 0.9806428000000089 / ACTION 2 / REWARD 0 / Q_MAX 8.637435e+11
Tally is -30
TIMESTEP 10189 / EPSILON 0.9806409000000089 / ACTION 0 / REWARD 0 / Q_MAX 2.912413e+11
Tally is -30
TIMESTEP 10190 / EPSILON 0.9806390000000089 / ACTION 0 / REWARD 0 / Q_MAX -1.226678e+12
Tally is -30
TIMESTEP 10191 / EPSILON 0.9806371000000089 / ACTION 2 / REWARD 0 / Q_MAX 1.548849e+11
Tally is -30
TIMESTEP 10192 / EPSILON 0.9806352000000089 / ACTION 0 / REWARD 0 / Q_MAX -1.063103e+1

TIMESTEP 10270 / EPSILON 0.980487000000009 / ACTION 1 / REWARD 0 / Q_MAX -2.074020e+11
Tally is -30
TIMESTEP 10271 / EPSILON 0.980485100000009 / ACTION 0 / REWARD 0 / Q_MAX 8.887889e+11
Tally is -30
TIMESTEP 10272 / EPSILON 0.980483200000009 / ACTION 1 / REWARD 0 / Q_MAX 6.152195e+11
Tally is -30
TIMESTEP 10273 / EPSILON 0.980481300000009 / ACTION 0 / REWARD 0 / Q_MAX 3.041142e+11
Tally is -30
TIMESTEP 10274 / EPSILON 0.980479400000009 / ACTION 1 / REWARD 0 / Q_MAX 2.270564e+11
Tally is -30
TIMESTEP 10275 / EPSILON 0.980477500000009 / ACTION 1 / REWARD 0 / Q_MAX 5.214801e+11
Tally is -30
TIMESTEP 10276 / EPSILON 0.980475600000009 / ACTION 2 / REWARD 0 / Q_MAX 3.221797e+11
Tally is -30
TIMESTEP 10277 / EPSILON 0.980473700000009 / ACTION 0 / REWARD 0 / Q_MAX 8.035642e+11
Tally is -30
TIMESTEP 10278 / EPSILON 0.980471800000009 / ACTION 1 / REWARD 0 / Q_MAX 1.644011e+12
Tally is -30
TIMESTEP 10279 / EPSILON 0.980469900000009 / ACTION 2 / REWARD 0 / Q_MAX 1.962574e+12
Tally is -30
TIMESTEP 

TIMESTEP 10354 / EPSILON 0.9803274000000091 / ACTION 0 / REWARD 0 / Q_MAX -1.326445e+10
Tally is -31
TIMESTEP 10355 / EPSILON 0.9803255000000091 / ACTION 0 / REWARD 0 / Q_MAX -6.406083e+11
Tally is -31
TIMESTEP 10356 / EPSILON 0.9803236000000091 / ACTION 1 / REWARD 0 / Q_MAX -1.197870e+10
Tally is -31
TIMESTEP 10357 / EPSILON 0.9803217000000091 / ACTION 0 / REWARD 0 / Q_MAX -6.504593e+11
Tally is -31
TIMESTEP 10358 / EPSILON 0.9803198000000091 / ACTION 1 / REWARD 0 / Q_MAX 6.033384e+11
Tally is -31
TIMESTEP 10359 / EPSILON 0.9803179000000091 / ACTION 1 / REWARD 0 / Q_MAX 5.760620e+11
Tally is -31
TIMESTEP 10360 / EPSILON 0.9803160000000091 / ACTION 2 / REWARD 0 / Q_MAX 1.289411e+11
Tally is -31
TIMESTEP 10361 / EPSILON 0.9803141000000091 / ACTION 0 / REWARD 0 / Q_MAX -1.643660e+11
Tally is -31
TIMESTEP 10362 / EPSILON 0.9803122000000091 / ACTION 2 / REWARD 0 / Q_MAX 1.054664e+12
Tally is -31
TIMESTEP 10363 / EPSILON 0.9803103000000091 / ACTION 2 / REWARD 0 / Q_MAX 2.042192e+12
Tally is

TIMESTEP 10446 / EPSILON 0.9801526000000091 / ACTION 2 / REWARD 0 / Q_MAX 1.087098e+12
Tally is -31
TIMESTEP 10447 / EPSILON 0.9801507000000091 / ACTION 2 / REWARD 0 / Q_MAX 1.902610e+12
Tally is -31
TIMESTEP 10448 / EPSILON 0.9801488000000091 / ACTION 0 / REWARD 0 / Q_MAX 2.083072e+12
Tally is -31
TIMESTEP 10449 / EPSILON 0.9801469000000091 / ACTION 2 / REWARD 0 / Q_MAX 1.294216e+12
Tally is -31
TIMESTEP 10450 / EPSILON 0.9801450000000091 / ACTION 1 / REWARD 0 / Q_MAX 6.839515e+11
Tally is -31
TIMESTEP 10451 / EPSILON 0.9801431000000091 / ACTION 0 / REWARD 0 / Q_MAX 6.330277e+11
Tally is -31
TIMESTEP 10452 / EPSILON 0.9801412000000091 / ACTION 2 / REWARD 0 / Q_MAX 6.673072e+11
Tally is -31
TIMESTEP 10453 / EPSILON 0.9801393000000092 / ACTION 1 / REWARD 0 / Q_MAX 1.000028e+12
Tally is -31
TIMESTEP 10454 / EPSILON 0.9801374000000092 / ACTION 2 / REWARD 0 / Q_MAX 1.293072e+12
Tally is -31
TIMESTEP 10455 / EPSILON 0.9801355000000092 / ACTION 0 / REWARD 0 / Q_MAX 7.288850e+11
Tally is -31


TIMESTEP 10531 / EPSILON 0.9799911000000092 / ACTION 1 / REWARD 0 / Q_MAX 1.563097e+12
Tally is -31
TIMESTEP 10532 / EPSILON 0.9799892000000092 / ACTION 2 / REWARD 0 / Q_MAX 2.489149e+12
Tally is -31
TIMESTEP 10533 / EPSILON 0.9799873000000092 / ACTION 0 / REWARD 0 / Q_MAX 8.349834e+11
Tally is -31
TIMESTEP 10534 / EPSILON 0.9799854000000092 / ACTION 1 / REWARD 0 / Q_MAX 1.192941e+12
Tally is -31
TIMESTEP 10535 / EPSILON 0.9799835000000092 / ACTION 0 / REWARD 0 / Q_MAX 7.697764e+11
Tally is -31
TIMESTEP 10536 / EPSILON 0.9799816000000092 / ACTION 2 / REWARD 0 / Q_MAX 6.304005e+11
Tally is -31
TIMESTEP 10537 / EPSILON 0.9799797000000092 / ACTION 0 / REWARD 0 / Q_MAX 1.490593e+12
Tally is -31
TIMESTEP 10538 / EPSILON 0.9799778000000092 / ACTION 2 / REWARD 0 / Q_MAX 2.443516e+12
Tally is -31
TIMESTEP 10539 / EPSILON 0.9799759000000092 / ACTION 0 / REWARD 0 / Q_MAX 3.614693e+12
Tally is -31
TIMESTEP 10540 / EPSILON 0.9799740000000092 / ACTION 0 / REWARD 0 / Q_MAX 1.985072e+12
Tally is -31


Tally is -32
TIMESTEP 10622 / EPSILON 0.9798182000000093 / ACTION 1 / REWARD 0 / Q_MAX -1.831104e+12
Tally is -32
TIMESTEP 10623 / EPSILON 0.9798163000000093 / ACTION 1 / REWARD 0 / Q_MAX -1.691675e+12
Tally is -32
TIMESTEP 10624 / EPSILON 0.9798144000000093 / ACTION 2 / REWARD 0 / Q_MAX -8.939219e+11
Tally is -32
TIMESTEP 10625 / EPSILON 0.9798125000000093 / ACTION 0 / REWARD 0 / Q_MAX -3.132458e+11
Tally is -32
TIMESTEP 10626 / EPSILON 0.9798106000000093 / ACTION 2 / REWARD 0 / Q_MAX 7.167799e+11
Tally is -32
TIMESTEP 10627 / EPSILON 0.9798087000000093 / ACTION 1 / REWARD 0 / Q_MAX -9.698783e+11
Tally is -32
TIMESTEP 10628 / EPSILON 0.9798068000000093 / ACTION 2 / REWARD 0 / Q_MAX 1.063512e+12
Tally is -32
TIMESTEP 10629 / EPSILON 0.9798049000000093 / ACTION 2 / REWARD 0 / Q_MAX 2.038283e+11
Tally is -32
TIMESTEP 10630 / EPSILON 0.9798030000000093 / ACTION 2 / REWARD 0 / Q_MAX 1.256000e+12
Tally is -32
TIMESTEP 10631 / EPSILON 0.9798011000000093 / ACTION 1 / REWARD 0 / Q_MAX 1.784493

TIMESTEP 10708 / EPSILON 0.9796548000000094 / ACTION 1 / REWARD 0 / Q_MAX -1.129748e+11
Tally is -32
TIMESTEP 10709 / EPSILON 0.9796529000000094 / ACTION 1 / REWARD 0 / Q_MAX 1.407003e+11
Tally is -32
TIMESTEP 10710 / EPSILON 0.9796510000000094 / ACTION 0 / REWARD 0 / Q_MAX -3.232137e+11
Tally is -32
TIMESTEP 10711 / EPSILON 0.9796491000000094 / ACTION 0 / REWARD 0 / Q_MAX 6.350271e+11
Tally is -32
TIMESTEP 10712 / EPSILON 0.9796472000000094 / ACTION 2 / REWARD 0 / Q_MAX 6.603545e+11
Tally is -32
TIMESTEP 10713 / EPSILON 0.9796453000000094 / ACTION 0 / REWARD 0 / Q_MAX 1.145989e+12
Tally is -32
TIMESTEP 10714 / EPSILON 0.9796434000000094 / ACTION 2 / REWARD 0 / Q_MAX 1.152601e+12
Tally is -32
TIMESTEP 10715 / EPSILON 0.9796415000000094 / ACTION 0 / REWARD 0 / Q_MAX 9.642676e+11
Tally is -32
TIMESTEP 10716 / EPSILON 0.9796396000000094 / ACTION 0 / REWARD 0 / Q_MAX 9.750885e+11
Tally is -32
TIMESTEP 10717 / EPSILON 0.9796377000000094 / ACTION 0 / REWARD 0 / Q_MAX 1.020862e+12
Tally is -3

TIMESTEP 10805 / EPSILON 0.9794705000000095 / ACTION 0 / REWARD 0 / Q_MAX 1.768331e+12
Tally is -32
TIMESTEP 10806 / EPSILON 0.9794686000000095 / ACTION 0 / REWARD 0 / Q_MAX 2.999532e+12
Tally is -32
TIMESTEP 10807 / EPSILON 0.9794667000000095 / ACTION 1 / REWARD 0 / Q_MAX 2.890127e+12
Tally is -32
TIMESTEP 10808 / EPSILON 0.9794648000000095 / ACTION 1 / REWARD 0 / Q_MAX 2.981084e+12
Tally is -32
TIMESTEP 10809 / EPSILON 0.9794629000000095 / ACTION 2 / REWARD 0 / Q_MAX 1.336352e+12
Tally is -32
TIMESTEP 10810 / EPSILON 0.9794610000000095 / ACTION 1 / REWARD 0 / Q_MAX 9.828398e+11
Tally is -32
TIMESTEP 10811 / EPSILON 0.9794591000000095 / ACTION 0 / REWARD 0 / Q_MAX 2.573675e+12
Tally is -32
TIMESTEP 10812 / EPSILON 0.9794572000000095 / ACTION 1 / REWARD 0 / Q_MAX 2.091128e+12
Tally is -32
TIMESTEP 10813 / EPSILON 0.9794553000000095 / ACTION 0 / REWARD 0 / Q_MAX 2.366474e+12
Tally is -32
TIMESTEP 10814 / EPSILON 0.9794534000000095 / ACTION 2 / REWARD 0 / Q_MAX 1.126955e+12
Tally is -32


Tally is -32
TIMESTEP 10905 / EPSILON 0.9792805000000095 / ACTION 0 / REWARD 0 / Q_MAX 1.498266e+12
Tally is -32
TIMESTEP 10906 / EPSILON 0.9792786000000095 / ACTION 2 / REWARD 0 / Q_MAX 1.204870e+12
Tally is -32
TIMESTEP 10907 / EPSILON 0.9792767000000095 / ACTION 1 / REWARD 0 / Q_MAX 2.186914e+12
Tally is -32
TIMESTEP 10908 / EPSILON 0.9792748000000095 / ACTION 2 / REWARD 0 / Q_MAX 2.534154e+12
Tally is -32
TIMESTEP 10909 / EPSILON 0.9792729000000095 / ACTION 1 / REWARD 0 / Q_MAX 2.364450e+12
Tally is -32
TIMESTEP 10910 / EPSILON 0.9792710000000096 / ACTION 2 / REWARD 0 / Q_MAX -1.733781e+11
Tally is -32
TIMESTEP 10911 / EPSILON 0.9792691000000096 / ACTION 1 / REWARD 0 / Q_MAX 1.116022e+12
Tally is -32
TIMESTEP 10912 / EPSILON 0.9792672000000096 / ACTION 0 / REWARD 0 / Q_MAX 1.464660e+12
Tally is -32
TIMESTEP 10913 / EPSILON 0.9792653000000096 / ACTION 2 / REWARD 0 / Q_MAX 1.789679e+12
Tally is -32
TIMESTEP 10914 / EPSILON 0.9792634000000096 / ACTION 2 / REWARD 0 / Q_MAX 1.509892e+12

Tally is -32
TIMESTEP 11022 / EPSILON 0.9790582000000096 / ACTION 1 / REWARD 0 / Q_MAX 2.642161e+12
Tally is -32
TIMESTEP 11023 / EPSILON 0.9790563000000096 / ACTION 2 / REWARD 0 / Q_MAX 1.592679e+12
Tally is -32
TIMESTEP 11024 / EPSILON 0.9790544000000097 / ACTION 0 / REWARD 0 / Q_MAX 1.322037e+12
Tally is -32
TIMESTEP 11025 / EPSILON 0.9790525000000097 / ACTION 0 / REWARD 0 / Q_MAX 2.914252e+12
Tally is -32
TIMESTEP 11026 / EPSILON 0.9790506000000097 / ACTION 0 / REWARD 0 / Q_MAX 2.974446e+12
Tally is -32
TIMESTEP 11027 / EPSILON 0.9790487000000097 / ACTION 0 / REWARD 0 / Q_MAX 3.085043e+12
Tally is -32
TIMESTEP 11028 / EPSILON 0.9790468000000097 / ACTION 2 / REWARD 0 / Q_MAX 2.989398e+12
Tally is -32
TIMESTEP 11029 / EPSILON 0.9790449000000097 / ACTION 1 / REWARD 0 / Q_MAX 6.567245e+11
Tally is -32
TIMESTEP 11030 / EPSILON 0.9790430000000097 / ACTION 0 / REWARD 0 / Q_MAX 2.466934e+12
Tally is -32
TIMESTEP 11031 / EPSILON 0.9790411000000097 / ACTION 0 / REWARD 0 / Q_MAX 1.847584e+12


TIMESTEP 11126 / EPSILON 0.9788606000000097 / ACTION 0 / REWARD 0 / Q_MAX 1.145062e+12
Tally is -33
TIMESTEP 11127 / EPSILON 0.9788587000000097 / ACTION 0 / REWARD 0 / Q_MAX 9.460336e+11
Tally is -33
TIMESTEP 11128 / EPSILON 0.9788568000000097 / ACTION 1 / REWARD 0 / Q_MAX -5.967041e+11
Tally is -33
TIMESTEP 11129 / EPSILON 0.9788549000000097 / ACTION 1 / REWARD 0 / Q_MAX -3.585170e+11
Tally is -33
TIMESTEP 11130 / EPSILON 0.9788530000000097 / ACTION 0 / REWARD 0 / Q_MAX 1.254400e+12
Tally is -33
TIMESTEP 11131 / EPSILON 0.9788511000000097 / ACTION 1 / REWARD 0 / Q_MAX 1.840436e+12
Tally is -33
TIMESTEP 11132 / EPSILON 0.9788492000000097 / ACTION 0 / REWARD 0 / Q_MAX 1.324905e+11
Tally is -33
TIMESTEP 11133 / EPSILON 0.9788473000000097 / ACTION 0 / REWARD 0 / Q_MAX -2.141917e+11
Tally is -33
TIMESTEP 11134 / EPSILON 0.9788454000000097 / ACTION 0 / REWARD 0 / Q_MAX -2.301625e+11
Tally is -33
TIMESTEP 11135 / EPSILON 0.9788435000000097 / ACTION 1 / REWARD 0 / Q_MAX 8.583034e+10
Tally is 

TIMESTEP 11212 / EPSILON 0.9786972000000098 / ACTION 1 / REWARD 0 / Q_MAX 2.104951e+12
Tally is -33
TIMESTEP 11213 / EPSILON 0.9786953000000098 / ACTION 2 / REWARD 0 / Q_MAX 1.611264e+12
Tally is -33
TIMESTEP 11214 / EPSILON 0.9786934000000098 / ACTION 2 / REWARD 0 / Q_MAX 2.394499e+12
Tally is -33
TIMESTEP 11215 / EPSILON 0.9786915000000098 / ACTION 0 / REWARD 0 / Q_MAX 2.279098e+12
Tally is -33
TIMESTEP 11216 / EPSILON 0.9786896000000098 / ACTION 2 / REWARD 0 / Q_MAX 1.554445e+12
Tally is -33
TIMESTEP 11217 / EPSILON 0.9786877000000098 / ACTION 2 / REWARD 0 / Q_MAX 1.971650e+12
Tally is -33
TIMESTEP 11218 / EPSILON 0.9786858000000098 / ACTION 0 / REWARD 0 / Q_MAX 2.503232e+12
Tally is -33
TIMESTEP 11219 / EPSILON 0.9786839000000098 / ACTION 0 / REWARD 0 / Q_MAX 2.407737e+12
Tally is -33
TIMESTEP 11220 / EPSILON 0.9786820000000098 / ACTION 2 / REWARD 0 / Q_MAX 1.405217e+12
Tally is -33
TIMESTEP 11221 / EPSILON 0.9786801000000098 / ACTION 2 / REWARD 0 / Q_MAX 1.841018e+12
Tally is -33


TIMESTEP 11331 / EPSILON 0.9784711000000099 / ACTION 2 / REWARD 0 / Q_MAX 3.338119e+12
Tally is -33
TIMESTEP 11332 / EPSILON 0.9784692000000099 / ACTION 1 / REWARD 0 / Q_MAX 2.884092e+12
Tally is -33
TIMESTEP 11333 / EPSILON 0.9784673000000099 / ACTION 0 / REWARD 0 / Q_MAX 3.870138e+12
Tally is -33
TIMESTEP 11334 / EPSILON 0.9784654000000099 / ACTION 2 / REWARD 0 / Q_MAX 2.673142e+12
Tally is -33
TIMESTEP 11335 / EPSILON 0.9784635000000099 / ACTION 0 / REWARD 0 / Q_MAX 2.375277e+12
Tally is -33
TIMESTEP 11336 / EPSILON 0.9784616000000099 / ACTION 2 / REWARD 0 / Q_MAX 2.745653e+12
Tally is -33
TIMESTEP 11337 / EPSILON 0.9784597000000099 / ACTION 1 / REWARD 0 / Q_MAX 1.872065e+12
Tally is -33
TIMESTEP 11338 / EPSILON 0.9784578000000099 / ACTION 2 / REWARD 0 / Q_MAX 3.460297e+12
Tally is -33
TIMESTEP 11339 / EPSILON 0.9784559000000099 / ACTION 1 / REWARD 0 / Q_MAX 1.785261e+12
Tally is -33
TIMESTEP 11340 / EPSILON 0.9784540000000099 / ACTION 0 / REWARD 0 / Q_MAX 2.510601e+12
Tally is -33


Tally is -34
TIMESTEP 11446 / EPSILON 0.97825260000001 / ACTION 1 / REWARD 0 / Q_MAX 4.254601e+11
Tally is -34
TIMESTEP 11447 / EPSILON 0.97825070000001 / ACTION 2 / REWARD 0 / Q_MAX 3.904726e+11
Tally is -34
TIMESTEP 11448 / EPSILON 0.97824880000001 / ACTION 0 / REWARD 0 / Q_MAX 3.126179e+11
Tally is -34
TIMESTEP 11449 / EPSILON 0.97824690000001 / ACTION 1 / REWARD 0 / Q_MAX 9.706448e+10
Tally is -34
TIMESTEP 11450 / EPSILON 0.97824500000001 / ACTION 0 / REWARD 0 / Q_MAX 1.149776e+11
Tally is -34
TIMESTEP 11451 / EPSILON 0.97824310000001 / ACTION 0 / REWARD 0 / Q_MAX 6.861816e+11
Tally is -34
TIMESTEP 11452 / EPSILON 0.97824120000001 / ACTION 1 / REWARD 0 / Q_MAX 2.332843e+11
Tally is -34
TIMESTEP 11453 / EPSILON 0.97823930000001 / ACTION 1 / REWARD 0 / Q_MAX 6.465805e+11
Tally is -34
TIMESTEP 11454 / EPSILON 0.97823740000001 / ACTION 0 / REWARD 0 / Q_MAX 1.843566e+12
Tally is -34
TIMESTEP 11455 / EPSILON 0.97823550000001 / ACTION 1 / REWARD 0 / Q_MAX 1.960268e+12
Tally is -34
TIMESTE

TIMESTEP 11565 / EPSILON 0.9780265000000101 / ACTION 1 / REWARD 0 / Q_MAX 4.640020e+12
Tally is -34
TIMESTEP 11566 / EPSILON 0.9780246000000101 / ACTION 0 / REWARD 0 / Q_MAX 2.503713e+12
Tally is -34
TIMESTEP 11567 / EPSILON 0.9780227000000101 / ACTION 0 / REWARD 0 / Q_MAX 4.811124e+12
Tally is -34
TIMESTEP 11568 / EPSILON 0.9780208000000101 / ACTION 2 / REWARD 0 / Q_MAX 3.773997e+12
Tally is -34
TIMESTEP 11569 / EPSILON 0.9780189000000101 / ACTION 0 / REWARD 0 / Q_MAX 2.377426e+12
Tally is -34
TIMESTEP 11570 / EPSILON 0.9780170000000101 / ACTION 1 / REWARD 0 / Q_MAX 4.818366e+12
Tally is -34
TIMESTEP 11571 / EPSILON 0.9780151000000101 / ACTION 2 / REWARD 0 / Q_MAX 4.767434e+12
Tally is -34
TIMESTEP 11572 / EPSILON 0.9780132000000101 / ACTION 0 / REWARD 0 / Q_MAX 3.093676e+12
Tally is -34
TIMESTEP 11573 / EPSILON 0.9780113000000101 / ACTION 0 / REWARD 0 / Q_MAX 4.067407e+12
Tally is -34
TIMESTEP 11574 / EPSILON 0.9780094000000101 / ACTION 2 / REWARD 0 / Q_MAX 4.110773e+12
Tally is -34


TIMESTEP 11681 / EPSILON 0.9778061000000102 / ACTION 0 / REWARD 0 / Q_MAX 1.537261e+12
Tally is -34
TIMESTEP 11682 / EPSILON 0.9778042000000102 / ACTION 1 / REWARD 0 / Q_MAX 1.116810e+12
Tally is -34
TIMESTEP 11683 / EPSILON 0.9778023000000102 / ACTION 0 / REWARD 0 / Q_MAX 2.692496e+12
Tally is -34
TIMESTEP 11684 / EPSILON 0.9778004000000102 / ACTION 0 / REWARD 0 / Q_MAX 2.393412e+12
Tally is -34
TIMESTEP 11685 / EPSILON 0.9777985000000102 / ACTION 1 / REWARD 0 / Q_MAX 2.437014e+12
Tally is -34
TIMESTEP 11686 / EPSILON 0.9777966000000102 / ACTION 1 / REWARD 0 / Q_MAX 3.437259e+12
Tally is -34
TIMESTEP 11687 / EPSILON 0.9777947000000102 / ACTION 0 / REWARD 0 / Q_MAX 2.801074e+12
Tally is -34
TIMESTEP 11688 / EPSILON 0.9777928000000102 / ACTION 1 / REWARD 0 / Q_MAX 4.974168e+12
Tally is -34
TIMESTEP 11689 / EPSILON 0.9777909000000102 / ACTION 0 / REWARD 0 / Q_MAX 3.426530e+12
Tally is -34
TIMESTEP 11690 / EPSILON 0.9777890000000102 / ACTION 1 / REWARD 0 / Q_MAX 3.863032e+12
Tally is -34


TIMESTEP 11794 / EPSILON 0.9775914000000103 / ACTION 2 / REWARD 0 / Q_MAX 1.768176e+12
Tally is -34
TIMESTEP 11795 / EPSILON 0.9775895000000103 / ACTION 0 / REWARD 0 / Q_MAX 1.300097e+12
Tally is -34
TIMESTEP 11796 / EPSILON 0.9775876000000103 / ACTION 0 / REWARD 0 / Q_MAX 6.849968e+11
Tally is -34
TIMESTEP 11797 / EPSILON 0.9775857000000103 / ACTION 2 / REWARD 0 / Q_MAX 1.755360e+12
Tally is -34
TIMESTEP 11798 / EPSILON 0.9775838000000103 / ACTION 0 / REWARD 0 / Q_MAX 2.163530e+12
Tally is -34
TIMESTEP 11799 / EPSILON 0.9775819000000103 / ACTION 2 / REWARD 0 / Q_MAX 3.063453e+12
Tally is -34
TIMESTEP 11800 / EPSILON 0.9775800000000103 / ACTION 0 / REWARD 0 / Q_MAX 3.029161e+12
Tally is -34
TIMESTEP 11801 / EPSILON 0.9775781000000103 / ACTION 2 / REWARD 0 / Q_MAX 3.086266e+12
Tally is -34
TIMESTEP 11802 / EPSILON 0.9775762000000103 / ACTION 1 / REWARD 0 / Q_MAX 2.387492e+12
Tally is -34
TIMESTEP 11803 / EPSILON 0.9775743000000103 / ACTION 2 / REWARD 0 / Q_MAX 1.047250e+12
Tally is -34


TIMESTEP 11904 / EPSILON 0.9773824000000104 / ACTION 1 / REWARD 0 / Q_MAX -3.630329e+11
Tally is -35
TIMESTEP 11905 / EPSILON 0.9773805000000104 / ACTION 2 / REWARD 0 / Q_MAX 4.565060e+11
Tally is -35
TIMESTEP 11906 / EPSILON 0.9773786000000104 / ACTION 1 / REWARD 0 / Q_MAX 8.295711e+11
Tally is -35
TIMESTEP 11907 / EPSILON 0.9773767000000104 / ACTION 0 / REWARD 0 / Q_MAX -2.272654e+11
Tally is -35
TIMESTEP 11908 / EPSILON 0.9773748000000104 / ACTION 1 / REWARD 0 / Q_MAX 2.034382e+12
Tally is -35
TIMESTEP 11909 / EPSILON 0.9773729000000104 / ACTION 0 / REWARD 0 / Q_MAX 5.336769e+11
Tally is -35
TIMESTEP 11910 / EPSILON 0.9773710000000104 / ACTION 2 / REWARD 0 / Q_MAX 5.433546e+12
Tally is -35
TIMESTEP 11911 / EPSILON 0.9773691000000104 / ACTION 1 / REWARD 0 / Q_MAX 3.885763e+12
Tally is -35
TIMESTEP 11912 / EPSILON 0.9773672000000104 / ACTION 0 / REWARD 0 / Q_MAX 1.608614e+12
Tally is -35
TIMESTEP 11913 / EPSILON 0.9773653000000104 / ACTION 1 / REWARD 0 / Q_MAX -6.320854e+11
Tally is -

TIMESTEP 11995 / EPSILON 0.9772095000000105 / ACTION 1 / REWARD 0 / Q_MAX 1.512236e+12
Tally is -35
TIMESTEP 11996 / EPSILON 0.9772076000000105 / ACTION 0 / REWARD 0 / Q_MAX 1.507274e+12
Tally is -35
TIMESTEP 11997 / EPSILON 0.9772057000000105 / ACTION 2 / REWARD 0 / Q_MAX 1.200315e+12
Tally is -35
TIMESTEP 11998 / EPSILON 0.9772038000000105 / ACTION 0 / REWARD 0 / Q_MAX 1.328902e+12
Tally is -35
TIMESTEP 11999 / EPSILON 0.9772019000000105 / ACTION 0 / REWARD 0 / Q_MAX 6.146216e+11
Tally is -35
TIMESTEP 12000 / EPSILON 0.9772000000000105 / ACTION 2 / REWARD 0 / Q_MAX 2.018941e+12
Tally is -35
TIMESTEP 12001 / EPSILON 0.9771981000000105 / ACTION 1 / REWARD 0 / Q_MAX 1.674494e+12
Tally is -35
TIMESTEP 12002 / EPSILON 0.9771962000000105 / ACTION 0 / REWARD 0 / Q_MAX 2.253585e+12
Tally is -35
TIMESTEP 12003 / EPSILON 0.9771943000000105 / ACTION 2 / REWARD 0 / Q_MAX 1.261742e+12
Tally is -35
TIMESTEP 12004 / EPSILON 0.9771924000000105 / ACTION 0 / REWARD 0 / Q_MAX 2.321078e+12
Tally is -35


Tally is -36
TIMESTEP 12105 / EPSILON 0.9770005000000106 / ACTION 2 / REWARD 0 / Q_MAX 1.427994e+12
Tally is -36
TIMESTEP 12106 / EPSILON 0.9769986000000106 / ACTION 2 / REWARD 0 / Q_MAX 1.817708e+12
Tally is -36
TIMESTEP 12107 / EPSILON 0.9769967000000106 / ACTION 1 / REWARD 0 / Q_MAX 2.297119e+12
Tally is -36
TIMESTEP 12108 / EPSILON 0.9769948000000106 / ACTION 2 / REWARD 0 / Q_MAX 2.427351e+12
Tally is -36
TIMESTEP 12109 / EPSILON 0.9769929000000106 / ACTION 0 / REWARD 0 / Q_MAX 2.150628e+12
Tally is -36
TIMESTEP 12110 / EPSILON 0.9769910000000106 / ACTION 2 / REWARD 0 / Q_MAX 2.021158e+12
Tally is -36
TIMESTEP 12111 / EPSILON 0.9769891000000106 / ACTION 1 / REWARD 0 / Q_MAX 1.658938e+12
Tally is -36
TIMESTEP 12112 / EPSILON 0.9769872000000106 / ACTION 0 / REWARD 0 / Q_MAX 1.913647e+12
Tally is -36
TIMESTEP 12113 / EPSILON 0.9769853000000106 / ACTION 0 / REWARD 0 / Q_MAX 3.335926e+12
Tally is -36
TIMESTEP 12114 / EPSILON 0.9769834000000106 / ACTION 0 / REWARD 0 / Q_MAX 2.982305e+12


Tally is -36
TIMESTEP 12218 / EPSILON 0.9767858000000107 / ACTION 2 / REWARD 0 / Q_MAX 6.987899e+11
Tally is -36
TIMESTEP 12219 / EPSILON 0.9767839000000107 / ACTION 2 / REWARD 0 / Q_MAX 5.643416e+11
Tally is -36
TIMESTEP 12220 / EPSILON 0.9767820000000107 / ACTION 2 / REWARD 0 / Q_MAX 9.471375e+11
Tally is -36
TIMESTEP 12221 / EPSILON 0.9767801000000107 / ACTION 0 / REWARD 0 / Q_MAX 9.877224e+11
Tally is -36
TIMESTEP 12222 / EPSILON 0.9767782000000107 / ACTION 0 / REWARD 0 / Q_MAX 7.954567e+11
Tally is -36
TIMESTEP 12223 / EPSILON 0.9767763000000107 / ACTION 0 / REWARD 0 / Q_MAX 1.411458e+12
Tally is -36
TIMESTEP 12224 / EPSILON 0.9767744000000107 / ACTION 0 / REWARD 0 / Q_MAX 9.529287e+11
Tally is -36
TIMESTEP 12225 / EPSILON 0.9767725000000107 / ACTION 1 / REWARD 0 / Q_MAX 1.275857e+12
Tally is -36
TIMESTEP 12226 / EPSILON 0.9767706000000107 / ACTION 1 / REWARD 0 / Q_MAX 1.053413e+12
Tally is -36
TIMESTEP 12227 / EPSILON 0.9767687000000107 / ACTION 0 / REWARD 0 / Q_MAX 8.658152e+11


TIMESTEP 12333 / EPSILON 0.9765673000000108 / ACTION 0 / REWARD 0 / Q_MAX 3.680709e+12
Tally is -36
TIMESTEP 12334 / EPSILON 0.9765654000000108 / ACTION 2 / REWARD 0 / Q_MAX 4.841064e+12
Tally is -36
TIMESTEP 12335 / EPSILON 0.9765635000000108 / ACTION 1 / REWARD 0 / Q_MAX 1.961113e+12
Tally is -36
TIMESTEP 12336 / EPSILON 0.9765616000000108 / ACTION 2 / REWARD 0 / Q_MAX 1.969985e+12
Tally is -36
TIMESTEP 12337 / EPSILON 0.9765597000000108 / ACTION 2 / REWARD 0 / Q_MAX 3.730099e+12
Tally is -36
TIMESTEP 12338 / EPSILON 0.9765578000000108 / ACTION 1 / REWARD 0 / Q_MAX 2.047184e+12
Tally is -36
TIMESTEP 12339 / EPSILON 0.9765559000000108 / ACTION 1 / REWARD 0 / Q_MAX 3.937263e+12
Tally is -36
TIMESTEP 12340 / EPSILON 0.9765540000000108 / ACTION 2 / REWARD 0 / Q_MAX 2.004448e+12
Tally is -36
TIMESTEP 12341 / EPSILON 0.9765521000000108 / ACTION 0 / REWARD 0 / Q_MAX 1.502544e+12
Tally is -36
TIMESTEP 12342 / EPSILON 0.9765502000000108 / ACTION 0 / REWARD 0 / Q_MAX 2.436264e+12
Tally is -36


Tally is -37
TIMESTEP 12448 / EPSILON 0.9763488000000109 / ACTION 0 / REWARD 0 / Q_MAX 2.241379e+12
Tally is -37
TIMESTEP 12449 / EPSILON 0.9763469000000109 / ACTION 1 / REWARD 0 / Q_MAX 1.260205e+12
Tally is -37
TIMESTEP 12450 / EPSILON 0.9763450000000109 / ACTION 2 / REWARD 0 / Q_MAX 2.461352e+12
Tally is -37
TIMESTEP 12451 / EPSILON 0.9763431000000109 / ACTION 0 / REWARD 0 / Q_MAX 2.445040e+12
Tally is -37
TIMESTEP 12452 / EPSILON 0.9763412000000109 / ACTION 2 / REWARD 0 / Q_MAX 2.272466e+12
Tally is -37
TIMESTEP 12453 / EPSILON 0.9763393000000109 / ACTION 2 / REWARD 0 / Q_MAX 2.323062e+12
Tally is -37
TIMESTEP 12454 / EPSILON 0.9763374000000109 / ACTION 1 / REWARD 0 / Q_MAX 1.248341e+12
Tally is -37
TIMESTEP 12455 / EPSILON 0.9763355000000109 / ACTION 1 / REWARD 0 / Q_MAX 9.109720e+11
Tally is -37
TIMESTEP 12456 / EPSILON 0.9763336000000109 / ACTION 0 / REWARD 0 / Q_MAX 2.965794e+12
Tally is -37
TIMESTEP 12457 / EPSILON 0.9763317000000109 / ACTION 2 / REWARD 0 / Q_MAX 2.834542e+12


TIMESTEP 12561 / EPSILON 0.976134100000011 / ACTION 1 / REWARD 0 / Q_MAX 2.444240e+12
Tally is -37
TIMESTEP 12562 / EPSILON 0.976132200000011 / ACTION 0 / REWARD 0 / Q_MAX 4.092926e+12
Tally is -37
TIMESTEP 12563 / EPSILON 0.976130300000011 / ACTION 1 / REWARD 0 / Q_MAX 3.375239e+12
Tally is -37
TIMESTEP 12564 / EPSILON 0.976128400000011 / ACTION 1 / REWARD 0 / Q_MAX -5.339987e+11
Tally is -37
TIMESTEP 12565 / EPSILON 0.976126500000011 / ACTION 0 / REWARD 0 / Q_MAX 1.337817e+12
Tally is -37
TIMESTEP 12566 / EPSILON 0.976124600000011 / ACTION 1 / REWARD 0 / Q_MAX 1.378602e+12
Tally is -37
TIMESTEP 12567 / EPSILON 0.976122700000011 / ACTION 1 / REWARD 0 / Q_MAX 8.363312e+11
Tally is -37
TIMESTEP 12568 / EPSILON 0.976120800000011 / ACTION 2 / REWARD 0 / Q_MAX 9.924512e+11
Tally is -37
TIMESTEP 12569 / EPSILON 0.976118900000011 / ACTION 2 / REWARD 0 / Q_MAX 8.474023e+11
Tally is -37
TIMESTEP 12570 / EPSILON 0.976117000000011 / ACTION 0 / REWARD 0 / Q_MAX 1.647690e+11
Tally is -37
TIMESTEP 

TIMESTEP 12678 / EPSILON 0.9759118000000111 / ACTION 1 / REWARD 0 / Q_MAX 2.966266e+12
Tally is -38
TIMESTEP 12679 / EPSILON 0.9759099000000111 / ACTION 1 / REWARD 0 / Q_MAX 2.445692e+12
Tally is -38
TIMESTEP 12680 / EPSILON 0.9759080000000111 / ACTION 0 / REWARD 0 / Q_MAX 3.212762e+12
Tally is -38
TIMESTEP 12681 / EPSILON 0.9759061000000111 / ACTION 0 / REWARD 0 / Q_MAX 3.093579e+12
Tally is -38
TIMESTEP 12682 / EPSILON 0.9759042000000111 / ACTION 1 / REWARD 0 / Q_MAX 3.719529e+12
Tally is -38
TIMESTEP 12683 / EPSILON 0.9759023000000111 / ACTION 2 / REWARD 0 / Q_MAX 6.381000e+11
Tally is -38
TIMESTEP 12684 / EPSILON 0.9759004000000111 / ACTION 0 / REWARD 0 / Q_MAX 1.479318e+12
Tally is -38
TIMESTEP 12685 / EPSILON 0.9758985000000111 / ACTION 1 / REWARD 0 / Q_MAX -1.054972e+11
Tally is -38
TIMESTEP 12686 / EPSILON 0.9758966000000111 / ACTION 0 / REWARD 0 / Q_MAX 1.092582e+12
Tally is -38
TIMESTEP 12687 / EPSILON 0.9758947000000111 / ACTION 2 / REWARD 0 / Q_MAX -5.573863e+10
Tally is -3

Tally is -38
TIMESTEP 12786 / EPSILON 0.9757066000000112 / ACTION 1 / REWARD 0 / Q_MAX 1.576248e+12
Tally is -38
TIMESTEP 12787 / EPSILON 0.9757047000000112 / ACTION 2 / REWARD 0 / Q_MAX 7.924437e+11
Tally is -38
TIMESTEP 12788 / EPSILON 0.9757028000000112 / ACTION 0 / REWARD 0 / Q_MAX 2.708258e+11
Tally is -38
TIMESTEP 12789 / EPSILON 0.9757009000000112 / ACTION 1 / REWARD 0 / Q_MAX 9.554466e+11
Tally is -38
TIMESTEP 12790 / EPSILON 0.9756990000000112 / ACTION 0 / REWARD 0 / Q_MAX 1.286865e+12
Tally is -38
TIMESTEP 12791 / EPSILON 0.9756971000000112 / ACTION 2 / REWARD 0 / Q_MAX 1.194475e+12
Tally is -38
TIMESTEP 12792 / EPSILON 0.9756952000000112 / ACTION 2 / REWARD 0 / Q_MAX 1.558650e+12
Tally is -38
TIMESTEP 12793 / EPSILON 0.9756933000000112 / ACTION 1 / REWARD 0 / Q_MAX 1.521553e+12
Tally is -38
TIMESTEP 12794 / EPSILON 0.9756914000000112 / ACTION 2 / REWARD 0 / Q_MAX 1.391186e+12
Tally is -38
TIMESTEP 12795 / EPSILON 0.9756895000000112 / ACTION 1 / REWARD 0 / Q_MAX 1.838417e+12


Tally is -39
TIMESTEP 12897 / EPSILON 0.9754957000000113 / ACTION 1 / REWARD 0 / Q_MAX 1.708265e+12
Tally is -39
TIMESTEP 12898 / EPSILON 0.9754938000000113 / ACTION 2 / REWARD 0 / Q_MAX 2.204500e+12
Tally is -39
TIMESTEP 12899 / EPSILON 0.9754919000000113 / ACTION 0 / REWARD 0 / Q_MAX 4.189565e+12
Tally is -39
TIMESTEP 12900 / EPSILON 0.9754900000000113 / ACTION 0 / REWARD 0 / Q_MAX 2.519464e+12
Tally is -39
TIMESTEP 12901 / EPSILON 0.9754881000000113 / ACTION 2 / REWARD 0 / Q_MAX 2.921356e+12
Tally is -39
TIMESTEP 12902 / EPSILON 0.9754862000000113 / ACTION 1 / REWARD 0 / Q_MAX 4.948770e+12
Tally is -39
TIMESTEP 12903 / EPSILON 0.9754843000000113 / ACTION 0 / REWARD 0 / Q_MAX 2.956829e+12
Tally is -39
TIMESTEP 12904 / EPSILON 0.9754824000000113 / ACTION 2 / REWARD 0 / Q_MAX 3.583361e+12
Tally is -39
TIMESTEP 12905 / EPSILON 0.9754805000000113 / ACTION 1 / REWARD 0 / Q_MAX 3.292576e+12
Tally is -39
TIMESTEP 12906 / EPSILON 0.9754786000000113 / ACTION 0 / REWARD 0 / Q_MAX 2.458810e+12


Tally is -39
TIMESTEP 13011 / EPSILON 0.9752791000000114 / ACTION 0 / REWARD 0 / Q_MAX 1.232709e+12
Tally is -39
TIMESTEP 13012 / EPSILON 0.9752772000000114 / ACTION 2 / REWARD 0 / Q_MAX 1.753341e+12
Tally is -39
TIMESTEP 13013 / EPSILON 0.9752753000000114 / ACTION 2 / REWARD 0 / Q_MAX 1.250972e+12
Tally is -39
TIMESTEP 13014 / EPSILON 0.9752734000000114 / ACTION 2 / REWARD 0 / Q_MAX 1.105021e+12
Tally is -39
TIMESTEP 13015 / EPSILON 0.9752715000000114 / ACTION 2 / REWARD 0 / Q_MAX 1.420542e+12
Tally is -39
TIMESTEP 13016 / EPSILON 0.9752696000000114 / ACTION 0 / REWARD 0 / Q_MAX 9.779496e+11
Tally is -39
TIMESTEP 13017 / EPSILON 0.9752677000000114 / ACTION 2 / REWARD 0 / Q_MAX 1.398337e+12
Tally is -39
TIMESTEP 13018 / EPSILON 0.9752658000000114 / ACTION 1 / REWARD 0 / Q_MAX 1.111574e+12
Tally is -39
TIMESTEP 13019 / EPSILON 0.9752639000000114 / ACTION 2 / REWARD 0 / Q_MAX 1.346132e+12
Tally is -39
TIMESTEP 13020 / EPSILON 0.9752620000000114 / ACTION 1 / REWARD 0 / Q_MAX 1.365495e+12


Tally is -40
TIMESTEP 13126 / EPSILON 0.9750606000000115 / ACTION 0 / REWARD 0 / Q_MAX 2.195076e+12
Tally is -40
TIMESTEP 13127 / EPSILON 0.9750587000000115 / ACTION 1 / REWARD 0 / Q_MAX 1.543693e+12
Tally is -40
TIMESTEP 13128 / EPSILON 0.9750568000000115 / ACTION 1 / REWARD 0 / Q_MAX 2.887738e+12
Tally is -40
TIMESTEP 13129 / EPSILON 0.9750549000000115 / ACTION 0 / REWARD 0 / Q_MAX 2.915967e+12
Tally is -40
TIMESTEP 13130 / EPSILON 0.9750530000000115 / ACTION 2 / REWARD 0 / Q_MAX 2.788190e+12
Tally is -40
TIMESTEP 13131 / EPSILON 0.9750511000000115 / ACTION 0 / REWARD 0 / Q_MAX 2.566895e+12
Tally is -40
TIMESTEP 13132 / EPSILON 0.9750492000000115 / ACTION 1 / REWARD 0 / Q_MAX 2.659558e+12
Tally is -40
TIMESTEP 13133 / EPSILON 0.9750473000000115 / ACTION 0 / REWARD 0 / Q_MAX 2.652535e+12
Tally is -40
TIMESTEP 13134 / EPSILON 0.9750454000000115 / ACTION 1 / REWARD 0 / Q_MAX 2.050627e+12
Tally is -40
TIMESTEP 13135 / EPSILON 0.9750435000000115 / ACTION 2 / REWARD 0 / Q_MAX 1.576266e+12


TIMESTEP 13240 / EPSILON 0.9748440000000116 / ACTION 0 / REWARD 0 / Q_MAX 1.656770e+12
Tally is -40
TIMESTEP 13241 / EPSILON 0.9748421000000116 / ACTION 0 / REWARD 0 / Q_MAX 2.313995e+12
Tally is -40
TIMESTEP 13242 / EPSILON 0.9748402000000116 / ACTION 1 / REWARD 0 / Q_MAX 2.289197e+12
Tally is -40
TIMESTEP 13243 / EPSILON 0.9748383000000116 / ACTION 2 / REWARD 0 / Q_MAX 2.812767e+12
Tally is -40
TIMESTEP 13244 / EPSILON 0.9748364000000116 / ACTION 0 / REWARD 0 / Q_MAX 2.123504e+12
Tally is -40
TIMESTEP 13245 / EPSILON 0.9748345000000116 / ACTION 0 / REWARD 0 / Q_MAX 2.460432e+12
Tally is -40
TIMESTEP 13246 / EPSILON 0.9748326000000116 / ACTION 1 / REWARD 0 / Q_MAX 2.609973e+12
Tally is -40
TIMESTEP 13247 / EPSILON 0.9748307000000116 / ACTION 0 / REWARD 0 / Q_MAX 2.517351e+12
Tally is -40
TIMESTEP 13248 / EPSILON 0.9748288000000116 / ACTION 1 / REWARD 0 / Q_MAX 2.638147e+12
Tally is -40
TIMESTEP 13249 / EPSILON 0.9748269000000116 / ACTION 1 / REWARD 0 / Q_MAX 2.657113e+12
Tally is -40


TIMESTEP 13355 / EPSILON 0.9746255000000117 / ACTION 0 / REWARD 0 / Q_MAX -1.613052e+12
Tally is -40
TIMESTEP 13356 / EPSILON 0.9746236000000117 / ACTION 0 / REWARD 0 / Q_MAX -9.909182e+11
Tally is -40
TIMESTEP 13357 / EPSILON 0.9746217000000117 / ACTION 2 / REWARD 0 / Q_MAX -4.311613e+11
Tally is -40
TIMESTEP 13358 / EPSILON 0.9746198000000117 / ACTION 1 / REWARD 0 / Q_MAX -1.636613e+12
Tally is -40
TIMESTEP 13359 / EPSILON 0.9746179000000117 / ACTION 0 / REWARD 0 / Q_MAX -9.639192e+11
Tally is -40
TIMESTEP 13360 / EPSILON 0.9746160000000117 / ACTION 0 / REWARD 0 / Q_MAX -7.131669e+11
Tally is -40
TIMESTEP 13361 / EPSILON 0.9746141000000117 / ACTION 0 / REWARD 0 / Q_MAX -1.331366e+12
Tally is -40
TIMESTEP 13362 / EPSILON 0.9746122000000117 / ACTION 2 / REWARD 0 / Q_MAX -9.731129e+10
Tally is -40
TIMESTEP 13363 / EPSILON 0.9746103000000117 / ACTION 1 / REWARD 0 / Q_MAX -1.858226e+10
Tally is -40
TIMESTEP 13364 / EPSILON 0.9746084000000117 / ACTION 0 / REWARD 0 / Q_MAX -1.090334e+10
Tal

TIMESTEP 13470 / EPSILON 0.9744070000000118 / ACTION 1 / REWARD 0 / Q_MAX -1.042133e+12
Tally is -41
TIMESTEP 13471 / EPSILON 0.9744051000000118 / ACTION 2 / REWARD 0 / Q_MAX -1.264055e+12
Tally is -41
TIMESTEP 13472 / EPSILON 0.9744032000000118 / ACTION 1 / REWARD 0 / Q_MAX 1.510905e+12
Tally is -41
TIMESTEP 13473 / EPSILON 0.9744013000000118 / ACTION 2 / REWARD 0 / Q_MAX -8.984200e+11
Tally is -41
TIMESTEP 13474 / EPSILON 0.9743994000000118 / ACTION 0 / REWARD 0 / Q_MAX -1.661296e+12
Tally is -41
TIMESTEP 13475 / EPSILON 0.9743975000000118 / ACTION 1 / REWARD 0 / Q_MAX -2.478771e+12
Tally is -41
TIMESTEP 13476 / EPSILON 0.9743956000000118 / ACTION 1 / REWARD 0 / Q_MAX -2.040794e+12
Tally is -41
TIMESTEP 13477 / EPSILON 0.9743937000000118 / ACTION 0 / REWARD 0 / Q_MAX -1.988241e+12
Tally is -41
TIMESTEP 13478 / EPSILON 0.9743918000000118 / ACTION 2 / REWARD 0 / Q_MAX 4.613079e+11
Tally is -41
TIMESTEP 13479 / EPSILON 0.9743899000000118 / ACTION 2 / REWARD 0 / Q_MAX 5.459505e+11
Tally 

Tally is -41
TIMESTEP 13586 / EPSILON 0.9741866000000119 / ACTION 0 / REWARD 0 / Q_MAX 8.474302e+11
Tally is -41
TIMESTEP 13587 / EPSILON 0.9741847000000119 / ACTION 1 / REWARD 0 / Q_MAX 1.341834e+12
Tally is -41
TIMESTEP 13588 / EPSILON 0.9741828000000119 / ACTION 2 / REWARD 0 / Q_MAX 1.011177e+12
Tally is -41
TIMESTEP 13589 / EPSILON 0.9741809000000119 / ACTION 1 / REWARD 0 / Q_MAX 7.953457e+11
Tally is -41
TIMESTEP 13590 / EPSILON 0.9741790000000119 / ACTION 2 / REWARD 0 / Q_MAX 5.882648e+11
Tally is -41
TIMESTEP 13591 / EPSILON 0.9741771000000119 / ACTION 2 / REWARD 0 / Q_MAX 6.457139e+11
Tally is -41
TIMESTEP 13592 / EPSILON 0.9741752000000119 / ACTION 2 / REWARD 0 / Q_MAX 5.838346e+11
Tally is -41
TIMESTEP 13593 / EPSILON 0.9741733000000119 / ACTION 2 / REWARD 0 / Q_MAX 3.582886e+11
Tally is -41
TIMESTEP 13594 / EPSILON 0.9741714000000119 / ACTION 1 / REWARD 0 / Q_MAX 1.000485e+12
Tally is -41
TIMESTEP 13595 / EPSILON 0.9741695000000119 / ACTION 0 / REWARD 0 / Q_MAX 1.075982e+12


Tally is -41
TIMESTEP 13700 / EPSILON 0.973970000000012 / ACTION 1 / REWARD 0 / Q_MAX 1.313261e+12
Tally is -41
TIMESTEP 13701 / EPSILON 0.973968100000012 / ACTION 0 / REWARD 0 / Q_MAX 6.253346e+11
Tally is -41
TIMESTEP 13702 / EPSILON 0.973966200000012 / ACTION 1 / REWARD 0 / Q_MAX -1.357223e+12
Tally is -41
TIMESTEP 13703 / EPSILON 0.973964300000012 / ACTION 0 / REWARD 0 / Q_MAX -1.010108e+12
Tally is -41
TIMESTEP 13704 / EPSILON 0.973962400000012 / ACTION 2 / REWARD 0 / Q_MAX -9.498648e+11
Tally is -41
TIMESTEP 13705 / EPSILON 0.973960500000012 / ACTION 1 / REWARD 0 / Q_MAX -8.716689e+11
Tally is -41
TIMESTEP 13706 / EPSILON 0.973958600000012 / ACTION 2 / REWARD 0 / Q_MAX -3.747928e+11
Tally is -41
TIMESTEP 13707 / EPSILON 0.973956700000012 / ACTION 0 / REWARD 0 / Q_MAX -3.165807e+12
Tally is -41
TIMESTEP 13708 / EPSILON 0.973954800000012 / ACTION 1 / REWARD 0 / Q_MAX -2.217969e+12
Tally is -41
TIMESTEP 13709 / EPSILON 0.973952900000012 / ACTION 1 / REWARD 0 / Q_MAX -1.176371e+12
Ta

TIMESTEP 13814 / EPSILON 0.9737534000000121 / ACTION 2 / REWARD 0 / Q_MAX 2.940693e+12
Tally is -41
TIMESTEP 13815 / EPSILON 0.9737515000000121 / ACTION 0 / REWARD 0 / Q_MAX 2.487743e+12
Tally is -41
TIMESTEP 13816 / EPSILON 0.9737496000000121 / ACTION 0 / REWARD 0 / Q_MAX 1.784185e+12
Tally is -41
TIMESTEP 13817 / EPSILON 0.9737477000000121 / ACTION 2 / REWARD 0 / Q_MAX 1.905534e+12
Tally is -41
TIMESTEP 13818 / EPSILON 0.9737458000000121 / ACTION 1 / REWARD 0 / Q_MAX 2.039582e+12
Tally is -41
TIMESTEP 13819 / EPSILON 0.9737439000000121 / ACTION 1 / REWARD 0 / Q_MAX 3.569553e+12
Tally is -41
TIMESTEP 13820 / EPSILON 0.9737420000000121 / ACTION 1 / REWARD 0 / Q_MAX 3.083597e+12
Tally is -41
TIMESTEP 13821 / EPSILON 0.9737401000000121 / ACTION 2 / REWARD 0 / Q_MAX 1.961316e+12
Tally is -41
TIMESTEP 13822 / EPSILON 0.9737382000000121 / ACTION 2 / REWARD 0 / Q_MAX 1.121160e+12
Tally is -41
TIMESTEP 13823 / EPSILON 0.9737363000000121 / ACTION 0 / REWARD 0 / Q_MAX 1.289219e+12
Tally is -41


Tally is -42
TIMESTEP 13927 / EPSILON 0.9735387000000122 / ACTION 1 / REWARD 0 / Q_MAX 3.782962e+12
Tally is -42
TIMESTEP 13928 / EPSILON 0.9735368000000122 / ACTION 0 / REWARD 0 / Q_MAX 3.140125e+12
Tally is -42
TIMESTEP 13929 / EPSILON 0.9735349000000122 / ACTION 1 / REWARD 0 / Q_MAX 1.954069e+12
Tally is -42
TIMESTEP 13930 / EPSILON 0.9735330000000122 / ACTION 0 / REWARD 0 / Q_MAX 3.301044e+12
Tally is -42
TIMESTEP 13931 / EPSILON 0.9735311000000122 / ACTION 0 / REWARD 0 / Q_MAX 1.973917e+12
Tally is -42
TIMESTEP 13932 / EPSILON 0.9735292000000122 / ACTION 2 / REWARD 0 / Q_MAX 1.007547e+12
Tally is -42
TIMESTEP 13933 / EPSILON 0.9735273000000122 / ACTION 2 / REWARD 0 / Q_MAX 3.118526e+11
Tally is -42
TIMESTEP 13934 / EPSILON 0.9735254000000122 / ACTION 2 / REWARD 0 / Q_MAX 1.014442e+12
Tally is -42
TIMESTEP 13935 / EPSILON 0.9735235000000122 / ACTION 1 / REWARD 0 / Q_MAX 2.502117e+12
Tally is -42
TIMESTEP 13936 / EPSILON 0.9735216000000122 / ACTION 1 / REWARD 0 / Q_MAX 2.708246e+12


Tally is -42
TIMESTEP 14037 / EPSILON 0.9733297000000123 / ACTION 1 / REWARD 0 / Q_MAX 6.524156e+11
Tally is -42
TIMESTEP 14038 / EPSILON 0.9733278000000123 / ACTION 2 / REWARD 0 / Q_MAX 1.345777e+12
Tally is -42
TIMESTEP 14039 / EPSILON 0.9733259000000123 / ACTION 1 / REWARD 0 / Q_MAX 1.175868e+12
Tally is -42
TIMESTEP 14040 / EPSILON 0.9733240000000123 / ACTION 1 / REWARD 0 / Q_MAX 2.160284e+11
Tally is -42
TIMESTEP 14041 / EPSILON 0.9733221000000123 / ACTION 1 / REWARD 0 / Q_MAX -1.125954e+12
Tally is -42
TIMESTEP 14042 / EPSILON 0.9733202000000123 / ACTION 2 / REWARD 0 / Q_MAX 1.877138e+12
Tally is -42
TIMESTEP 14043 / EPSILON 0.9733183000000123 / ACTION 0 / REWARD 0 / Q_MAX 2.748689e+12
Tally is -42
TIMESTEP 14044 / EPSILON 0.9733164000000123 / ACTION 2 / REWARD 0 / Q_MAX 1.909423e+12
Tally is -42
TIMESTEP 14045 / EPSILON 0.9733145000000123 / ACTION 1 / REWARD 0 / Q_MAX 1.808416e+12
Tally is -42
TIMESTEP 14046 / EPSILON 0.9733126000000123 / ACTION 0 / REWARD 0 / Q_MAX 1.189780e+12

Tally is -43
TIMESTEP 14140 / EPSILON 0.9731340000000124 / ACTION 2 / REWARD 0 / Q_MAX 2.907596e+11
Tally is -43
TIMESTEP 14141 / EPSILON 0.9731321000000124 / ACTION 1 / REWARD 0 / Q_MAX -3.463466e+11
Tally is -43
TIMESTEP 14142 / EPSILON 0.9731302000000124 / ACTION 2 / REWARD 0 / Q_MAX -5.411790e+11
Tally is -43
TIMESTEP 14143 / EPSILON 0.9731283000000124 / ACTION 0 / REWARD 0 / Q_MAX -1.659132e+11
Tally is -43
TIMESTEP 14144 / EPSILON 0.9731264000000124 / ACTION 2 / REWARD 0 / Q_MAX -5.217053e+11
Tally is -43
TIMESTEP 14145 / EPSILON 0.9731245000000124 / ACTION 2 / REWARD 0 / Q_MAX 1.296173e+12
Tally is -43
TIMESTEP 14146 / EPSILON 0.9731226000000124 / ACTION 2 / REWARD 0 / Q_MAX -3.461885e+11
Tally is -43
TIMESTEP 14147 / EPSILON 0.9731207000000124 / ACTION 0 / REWARD 0 / Q_MAX 5.640763e+11
Tally is -43
TIMESTEP 14148 / EPSILON 0.9731188000000124 / ACTION 1 / REWARD 0 / Q_MAX 6.540472e+11
Tally is -43
TIMESTEP 14149 / EPSILON 0.9731169000000124 / ACTION 0 / REWARD 0 / Q_MAX 1.072643

Tally is -43
TIMESTEP 14253 / EPSILON 0.9729193000000125 / ACTION 0 / REWARD 0 / Q_MAX -1.726701e+11
Tally is -43
TIMESTEP 14254 / EPSILON 0.9729174000000125 / ACTION 2 / REWARD 0 / Q_MAX -8.257549e+11
Tally is -43
TIMESTEP 14255 / EPSILON 0.9729155000000125 / ACTION 1 / REWARD 0 / Q_MAX -1.300304e+12
Tally is -43
TIMESTEP 14256 / EPSILON 0.9729136000000125 / ACTION 1 / REWARD 0 / Q_MAX -4.558146e+11
Tally is -43
TIMESTEP 14257 / EPSILON 0.9729117000000125 / ACTION 0 / REWARD 0 / Q_MAX -2.917212e+11
Tally is -43
TIMESTEP 14258 / EPSILON 0.9729098000000125 / ACTION 2 / REWARD 0 / Q_MAX 4.973600e+11
Tally is -43
TIMESTEP 14259 / EPSILON 0.9729079000000125 / ACTION 0 / REWARD 0 / Q_MAX 5.192436e+11
Tally is -43
TIMESTEP 14260 / EPSILON 0.9729060000000125 / ACTION 1 / REWARD 0 / Q_MAX 1.582639e+12
Tally is -43
TIMESTEP 14261 / EPSILON 0.9729041000000125 / ACTION 0 / REWARD 0 / Q_MAX 1.007168e+12
Tally is -43
TIMESTEP 14262 / EPSILON 0.9729022000000125 / ACTION 1 / REWARD 0 / Q_MAX 9.154408

Tally is -43
TIMESTEP 14360 / EPSILON 0.9727160000000126 / ACTION 1 / REWARD 0 / Q_MAX 1.172653e+12
Tally is -43
TIMESTEP 14361 / EPSILON 0.9727141000000126 / ACTION 0 / REWARD 0 / Q_MAX 1.116928e+12
Tally is -43
TIMESTEP 14362 / EPSILON 0.9727122000000126 / ACTION 1 / REWARD 0 / Q_MAX 1.606053e+12
Tally is -43
TIMESTEP 14363 / EPSILON 0.9727103000000126 / ACTION 0 / REWARD 0 / Q_MAX 1.092527e+12
Tally is -43
TIMESTEP 14364 / EPSILON 0.9727084000000126 / ACTION 2 / REWARD 0 / Q_MAX 1.024257e+12
Tally is -43
TIMESTEP 14365 / EPSILON 0.9727065000000126 / ACTION 1 / REWARD 0 / Q_MAX 1.308238e+12
Tally is -43
TIMESTEP 14366 / EPSILON 0.9727046000000126 / ACTION 2 / REWARD 0 / Q_MAX 9.727607e+11
Tally is -43
TIMESTEP 14367 / EPSILON 0.9727027000000126 / ACTION 0 / REWARD 0 / Q_MAX 1.514272e+12
Tally is -43
TIMESTEP 14368 / EPSILON 0.9727008000000126 / ACTION 1 / REWARD 0 / Q_MAX 1.356227e+12
Tally is -43
TIMESTEP 14369 / EPSILON 0.9726989000000126 / ACTION 2 / REWARD 0 / Q_MAX 9.320950e+11


TIMESTEP 14474 / EPSILON 0.9724994000000127 / ACTION 2 / REWARD 0 / Q_MAX 2.108421e+12
Tally is -43
TIMESTEP 14475 / EPSILON 0.9724975000000127 / ACTION 0 / REWARD 0 / Q_MAX 2.274820e+12
Tally is -43
TIMESTEP 14476 / EPSILON 0.9724956000000127 / ACTION 2 / REWARD 0 / Q_MAX 8.427854e+11
Tally is -43
TIMESTEP 14477 / EPSILON 0.9724937000000127 / ACTION 0 / REWARD 0 / Q_MAX -3.952898e+11
Tally is -43
TIMESTEP 14478 / EPSILON 0.9724918000000127 / ACTION 1 / REWARD 0 / Q_MAX -6.618667e+11
Tally is -43
TIMESTEP 14479 / EPSILON 0.9724899000000127 / ACTION 1 / REWARD 0 / Q_MAX -1.794964e+11
Tally is -43
TIMESTEP 14480 / EPSILON 0.9724880000000127 / ACTION 0 / REWARD 0 / Q_MAX -4.766710e+11
Tally is -43
TIMESTEP 14481 / EPSILON 0.9724861000000127 / ACTION 2 / REWARD 0 / Q_MAX -2.530133e+11
Tally is -43
TIMESTEP 14482 / EPSILON 0.9724842000000127 / ACTION 0 / REWARD 0 / Q_MAX -1.272615e+12
Tally is -43
TIMESTEP 14483 / EPSILON 0.9724823000000127 / ACTION 2 / REWARD 0 / Q_MAX -2.594498e+11
Tally 

TIMESTEP 14589 / EPSILON 0.9722809000000128 / ACTION 1 / REWARD 0 / Q_MAX 1.768123e+12
Tally is -43
TIMESTEP 14590 / EPSILON 0.9722790000000128 / ACTION 2 / REWARD 0 / Q_MAX 1.173083e+12
Tally is -43
TIMESTEP 14591 / EPSILON 0.9722771000000128 / ACTION 2 / REWARD 0 / Q_MAX 8.517848e+11
Tally is -43
TIMESTEP 14592 / EPSILON 0.9722752000000128 / ACTION 2 / REWARD 0 / Q_MAX 9.844440e+11
Tally is -43
TIMESTEP 14593 / EPSILON 0.9722733000000128 / ACTION 1 / REWARD 0 / Q_MAX -8.627980e+10
Tally is -43
TIMESTEP 14594 / EPSILON 0.9722714000000128 / ACTION 1 / REWARD 0 / Q_MAX -5.107349e+11
Tally is -43
TIMESTEP 14595 / EPSILON 0.9722695000000128 / ACTION 2 / REWARD 0 / Q_MAX 1.302924e+11
Tally is -43
TIMESTEP 14596 / EPSILON 0.9722676000000128 / ACTION 0 / REWARD 0 / Q_MAX -7.126249e+10
Tally is -43
TIMESTEP 14597 / EPSILON 0.9722657000000128 / ACTION 1 / REWARD 0 / Q_MAX -6.671856e+11
Tally is -43
TIMESTEP 14598 / EPSILON 0.9722638000000128 / ACTION 1 / REWARD 0 / Q_MAX -4.987934e+11
Tally is

TIMESTEP 14698 / EPSILON 0.9720738000000129 / ACTION 2 / REWARD 0 / Q_MAX 6.474212e+11
Tally is -44
TIMESTEP 14699 / EPSILON 0.9720719000000129 / ACTION 2 / REWARD 0 / Q_MAX 1.476867e+12
Tally is -44
TIMESTEP 14700 / EPSILON 0.9720700000000129 / ACTION 0 / REWARD 0 / Q_MAX 3.266916e+12
Tally is -44
TIMESTEP 14701 / EPSILON 0.9720681000000129 / ACTION 2 / REWARD 0 / Q_MAX 2.518044e+12
Tally is -44
TIMESTEP 14702 / EPSILON 0.9720662000000129 / ACTION 2 / REWARD 0 / Q_MAX 1.469241e+12
Tally is -44
TIMESTEP 14703 / EPSILON 0.9720643000000129 / ACTION 2 / REWARD 0 / Q_MAX 2.233999e+12
Tally is -44
TIMESTEP 14704 / EPSILON 0.9720624000000129 / ACTION 1 / REWARD 0 / Q_MAX 1.864670e+12
Tally is -44
TIMESTEP 14705 / EPSILON 0.9720605000000129 / ACTION 1 / REWARD 0 / Q_MAX 1.769430e+12
Tally is -44
TIMESTEP 14706 / EPSILON 0.9720586000000129 / ACTION 2 / REWARD 0 / Q_MAX 1.524406e+12
Tally is -44
TIMESTEP 14707 / EPSILON 0.9720567000000129 / ACTION 1 / REWARD 0 / Q_MAX 2.194583e+12
Tally is -44


Tally is -44
TIMESTEP 14810 / EPSILON 0.971861000000013 / ACTION 1 / REWARD 0 / Q_MAX 9.963373e+11
Tally is -44
TIMESTEP 14811 / EPSILON 0.971859100000013 / ACTION 0 / REWARD 0 / Q_MAX 1.081114e+12
Tally is -44
TIMESTEP 14812 / EPSILON 0.971857200000013 / ACTION 0 / REWARD 0 / Q_MAX 4.438256e+11
Tally is -44
TIMESTEP 14813 / EPSILON 0.971855300000013 / ACTION 2 / REWARD 0 / Q_MAX 1.344142e+12
Tally is -44
TIMESTEP 14814 / EPSILON 0.971853400000013 / ACTION 1 / REWARD 0 / Q_MAX 1.974571e+12
Tally is -44
TIMESTEP 14815 / EPSILON 0.971851500000013 / ACTION 2 / REWARD 0 / Q_MAX 4.852888e+12
Tally is -44
TIMESTEP 14816 / EPSILON 0.971849600000013 / ACTION 1 / REWARD 0 / Q_MAX 1.669591e+12
Tally is -44
TIMESTEP 14817 / EPSILON 0.971847700000013 / ACTION 2 / REWARD 0 / Q_MAX 2.930616e+12
Tally is -44
TIMESTEP 14818 / EPSILON 0.971845800000013 / ACTION 0 / REWARD 0 / Q_MAX 4.311523e+12
Tally is -44
TIMESTEP 14819 / EPSILON 0.971843900000013 / ACTION 2 / REWARD 0 / Q_MAX 4.074441e+12
Tally is -

Tally is -45
TIMESTEP 14919 / EPSILON 0.9716539000000131 / ACTION 1 / REWARD 0 / Q_MAX 1.122673e+11
Tally is -45
TIMESTEP 14920 / EPSILON 0.9716520000000131 / ACTION 1 / REWARD 0 / Q_MAX 3.633936e+12
Tally is -45
TIMESTEP 14921 / EPSILON 0.9716501000000131 / ACTION 1 / REWARD 0 / Q_MAX 2.035092e+12
Tally is -45
TIMESTEP 14922 / EPSILON 0.9716482000000131 / ACTION 1 / REWARD 0 / Q_MAX 3.400907e+12
Tally is -45
TIMESTEP 14923 / EPSILON 0.9716463000000131 / ACTION 2 / REWARD 0 / Q_MAX 2.170393e+12
Tally is -45
TIMESTEP 14924 / EPSILON 0.9716444000000131 / ACTION 1 / REWARD 0 / Q_MAX 3.003567e+12
Tally is -45
TIMESTEP 14925 / EPSILON 0.9716425000000131 / ACTION 2 / REWARD 0 / Q_MAX 2.636758e+12
Tally is -45
TIMESTEP 14926 / EPSILON 0.9716406000000131 / ACTION 1 / REWARD 0 / Q_MAX 3.086321e+12
Tally is -45
TIMESTEP 14927 / EPSILON 0.9716387000000131 / ACTION 2 / REWARD 0 / Q_MAX 2.754460e+12
Tally is -45
TIMESTEP 14928 / EPSILON 0.9716368000000131 / ACTION 2 / REWARD 0 / Q_MAX 2.411054e+12


Tally is -45
TIMESTEP 15033 / EPSILON 0.9714373000000132 / ACTION 0 / REWARD 0 / Q_MAX -1.016103e+12
Tally is -45
TIMESTEP 15034 / EPSILON 0.9714354000000132 / ACTION 2 / REWARD 0 / Q_MAX -2.463101e+11
Tally is -45
TIMESTEP 15035 / EPSILON 0.9714335000000132 / ACTION 0 / REWARD 0 / Q_MAX -9.690050e+10
Tally is -45
TIMESTEP 15036 / EPSILON 0.9714316000000132 / ACTION 1 / REWARD 0 / Q_MAX -1.039203e+12
Tally is -45
TIMESTEP 15037 / EPSILON 0.9714297000000132 / ACTION 1 / REWARD 0 / Q_MAX -1.234712e+12
Tally is -45
TIMESTEP 15038 / EPSILON 0.9714278000000132 / ACTION 2 / REWARD 0 / Q_MAX -2.417816e+11
Tally is -45
TIMESTEP 15039 / EPSILON 0.9714259000000132 / ACTION 2 / REWARD 0 / Q_MAX 4.430812e+11
Tally is -45
TIMESTEP 15040 / EPSILON 0.9714240000000132 / ACTION 1 / REWARD 0 / Q_MAX -5.914506e+10
Tally is -45
TIMESTEP 15041 / EPSILON 0.9714221000000132 / ACTION 2 / REWARD 0 / Q_MAX 1.406961e+12
Tally is -45
TIMESTEP 15042 / EPSILON 0.9714202000000132 / ACTION 0 / REWARD 0 / Q_MAX 3.0785

Tally is -45
TIMESTEP 15146 / EPSILON 0.9712226000000133 / ACTION 1 / REWARD 0 / Q_MAX 1.213087e+12
Tally is -45
TIMESTEP 15147 / EPSILON 0.9712207000000133 / ACTION 0 / REWARD 0 / Q_MAX 1.463459e+12
Tally is -45
TIMESTEP 15148 / EPSILON 0.9712188000000133 / ACTION 1 / REWARD 0 / Q_MAX 8.844865e+11
Tally is -45
TIMESTEP 15149 / EPSILON 0.9712169000000133 / ACTION 1 / REWARD 0 / Q_MAX 1.180798e+12
Tally is -45
TIMESTEP 15150 / EPSILON 0.9712150000000133 / ACTION 1 / REWARD 0 / Q_MAX 1.357156e+12
Tally is -45
TIMESTEP 15151 / EPSILON 0.9712131000000133 / ACTION 1 / REWARD 0 / Q_MAX 1.618496e+12
Tally is -45
TIMESTEP 15152 / EPSILON 0.9712112000000133 / ACTION 0 / REWARD 0 / Q_MAX 1.907201e+12
Tally is -45
TIMESTEP 15153 / EPSILON 0.9712093000000133 / ACTION 1 / REWARD 0 / Q_MAX 1.161537e+12
Tally is -45
TIMESTEP 15154 / EPSILON 0.9712074000000133 / ACTION 2 / REWARD 0 / Q_MAX 5.930865e+11
Tally is -45
TIMESTEP 15155 / EPSILON 0.9712055000000133 / ACTION 2 / REWARD 0 / Q_MAX 1.030155e+12


TIMESTEP 15254 / EPSILON 0.9710174000000134 / ACTION 1 / REWARD 0 / Q_MAX 1.084548e+12
Tally is -45
TIMESTEP 15255 / EPSILON 0.9710155000000134 / ACTION 2 / REWARD 0 / Q_MAX 1.543858e+12
Tally is -45
TIMESTEP 15256 / EPSILON 0.9710136000000134 / ACTION 2 / REWARD 0 / Q_MAX 1.607229e+12
Tally is -45
TIMESTEP 15257 / EPSILON 0.9710117000000134 / ACTION 0 / REWARD 0 / Q_MAX 4.111232e+11
Tally is -45
TIMESTEP 15258 / EPSILON 0.9710098000000134 / ACTION 0 / REWARD 0 / Q_MAX 1.003798e+12
Tally is -45
TIMESTEP 15259 / EPSILON 0.9710079000000134 / ACTION 1 / REWARD 0 / Q_MAX -1.144175e+12
Tally is -45
TIMESTEP 15260 / EPSILON 0.9710060000000134 / ACTION 0 / REWARD 0 / Q_MAX -7.353797e+11
Tally is -45
TIMESTEP 15261 / EPSILON 0.9710041000000134 / ACTION 0 / REWARD 0 / Q_MAX -1.206476e+11
Tally is -45
TIMESTEP 15262 / EPSILON 0.9710022000000134 / ACTION 2 / REWARD 0 / Q_MAX -1.951398e+12
Tally is -45
TIMESTEP 15263 / EPSILON 0.9710003000000134 / ACTION 0 / REWARD 0 / Q_MAX -1.093439e+12
Tally is

TIMESTEP 15365 / EPSILON 0.9708065000000135 / ACTION 2 / REWARD 0 / Q_MAX 4.138879e+11
Tally is -45
TIMESTEP 15366 / EPSILON 0.9708046000000135 / ACTION 2 / REWARD 0 / Q_MAX -1.398527e+10
Tally is -46
TIMESTEP 15367 / EPSILON 0.9708027000000135 / ACTION 0 / REWARD -1 / Q_MAX -9.084859e+10
Tally is -46
TIMESTEP 15368 / EPSILON 0.9708008000000135 / ACTION 2 / REWARD 0 / Q_MAX -1.927046e+11
Tally is -46
TIMESTEP 15369 / EPSILON 0.9707989000000135 / ACTION 1 / REWARD 0 / Q_MAX -1.062486e+11
Tally is -46
TIMESTEP 15370 / EPSILON 0.9707970000000135 / ACTION 1 / REWARD 0 / Q_MAX -1.104880e+11
Tally is -46
TIMESTEP 15371 / EPSILON 0.9707951000000135 / ACTION 2 / REWARD 0 / Q_MAX 3.878251e+11
Tally is -46
TIMESTEP 15372 / EPSILON 0.9707932000000135 / ACTION 0 / REWARD 0 / Q_MAX 4.068308e+11
Tally is -46
TIMESTEP 15373 / EPSILON 0.9707913000000135 / ACTION 1 / REWARD 0 / Q_MAX -1.992500e+11
Tally is -46
TIMESTEP 15374 / EPSILON 0.9707894000000135 / ACTION 2 / REWARD 0 / Q_MAX -9.772869e+10
Tally

Tally is -46
TIMESTEP 15476 / EPSILON 0.9705956000000135 / ACTION 2 / REWARD 0 / Q_MAX 1.212480e+12
Tally is -46
TIMESTEP 15477 / EPSILON 0.9705937000000135 / ACTION 1 / REWARD 0 / Q_MAX 2.729223e+12
Tally is -46
TIMESTEP 15478 / EPSILON 0.9705918000000135 / ACTION 2 / REWARD 0 / Q_MAX 2.077992e+12
Tally is -46
TIMESTEP 15479 / EPSILON 0.9705899000000136 / ACTION 2 / REWARD 0 / Q_MAX 3.163492e+12
Tally is -46
TIMESTEP 15480 / EPSILON 0.9705880000000136 / ACTION 1 / REWARD 0 / Q_MAX 1.937370e+12
Tally is -46
TIMESTEP 15481 / EPSILON 0.9705861000000136 / ACTION 1 / REWARD 0 / Q_MAX 1.456038e+12
Tally is -46
TIMESTEP 15482 / EPSILON 0.9705842000000136 / ACTION 2 / REWARD 0 / Q_MAX 2.484670e+12
Tally is -46
TIMESTEP 15483 / EPSILON 0.9705823000000136 / ACTION 0 / REWARD 0 / Q_MAX 2.430398e+12
Tally is -46
TIMESTEP 15484 / EPSILON 0.9705804000000136 / ACTION 0 / REWARD 0 / Q_MAX 1.359099e+12
Tally is -46
TIMESTEP 15485 / EPSILON 0.9705785000000136 / ACTION 2 / REWARD 0 / Q_MAX 2.486965e+12


Tally is -46
TIMESTEP 15588 / EPSILON 0.9703828000000136 / ACTION 0 / REWARD 0 / Q_MAX 4.077010e+12
Tally is -46
TIMESTEP 15589 / EPSILON 0.9703809000000136 / ACTION 2 / REWARD 0 / Q_MAX 3.213360e+12
Tally is -46
TIMESTEP 15590 / EPSILON 0.9703790000000136 / ACTION 0 / REWARD 0 / Q_MAX 3.527880e+12
Tally is -46
TIMESTEP 15591 / EPSILON 0.9703771000000136 / ACTION 2 / REWARD 0 / Q_MAX 4.420122e+12
Tally is -46
TIMESTEP 15592 / EPSILON 0.9703752000000136 / ACTION 1 / REWARD 0 / Q_MAX 3.626745e+12
Tally is -46
TIMESTEP 15593 / EPSILON 0.9703733000000136 / ACTION 1 / REWARD 0 / Q_MAX 3.915635e+12
Tally is -46
TIMESTEP 15594 / EPSILON 0.9703714000000137 / ACTION 0 / REWARD 0 / Q_MAX 2.837606e+12
Tally is -46
TIMESTEP 15595 / EPSILON 0.9703695000000137 / ACTION 2 / REWARD 0 / Q_MAX 2.440175e+12
Tally is -46
TIMESTEP 15596 / EPSILON 0.9703676000000137 / ACTION 1 / REWARD 0 / Q_MAX 3.295140e+12
Tally is -46
TIMESTEP 15597 / EPSILON 0.9703657000000137 / ACTION 2 / REWARD 0 / Q_MAX 2.638472e+12


Tally is -47
TIMESTEP 15702 / EPSILON 0.9701662000000137 / ACTION 2 / REWARD 0 / Q_MAX 2.572884e+11
Tally is -47
TIMESTEP 15703 / EPSILON 0.9701643000000137 / ACTION 0 / REWARD 0 / Q_MAX 1.387402e+12
Tally is -47
TIMESTEP 15704 / EPSILON 0.9701624000000137 / ACTION 2 / REWARD 0 / Q_MAX 3.478131e+11
Tally is -47
TIMESTEP 15705 / EPSILON 0.9701605000000137 / ACTION 2 / REWARD 0 / Q_MAX 7.705636e+11
Tally is -47
TIMESTEP 15706 / EPSILON 0.9701586000000137 / ACTION 2 / REWARD 0 / Q_MAX 1.560656e+12
Tally is -47
TIMESTEP 15707 / EPSILON 0.9701567000000137 / ACTION 0 / REWARD 0 / Q_MAX 2.645456e+12
Tally is -47
TIMESTEP 15708 / EPSILON 0.9701548000000138 / ACTION 1 / REWARD 0 / Q_MAX 1.343088e+12
Tally is -47
TIMESTEP 15709 / EPSILON 0.9701529000000138 / ACTION 2 / REWARD 0 / Q_MAX 1.058282e+12
Tally is -47
TIMESTEP 15710 / EPSILON 0.9701510000000138 / ACTION 1 / REWARD 0 / Q_MAX 1.386704e+12
Tally is -47
TIMESTEP 15711 / EPSILON 0.9701491000000138 / ACTION 1 / REWARD 0 / Q_MAX 7.146379e+11


Tally is -47
TIMESTEP 15815 / EPSILON 0.9699515000000138 / ACTION 2 / REWARD 0 / Q_MAX 7.746208e+11
Tally is -47
TIMESTEP 15816 / EPSILON 0.9699496000000138 / ACTION 2 / REWARD 0 / Q_MAX 7.586204e+11
Tally is -47
TIMESTEP 15817 / EPSILON 0.9699477000000138 / ACTION 0 / REWARD 0 / Q_MAX 6.457698e+11
Tally is -47
TIMESTEP 15818 / EPSILON 0.9699458000000138 / ACTION 2 / REWARD 0 / Q_MAX 1.074436e+12
Tally is -47
TIMESTEP 15819 / EPSILON 0.9699439000000138 / ACTION 0 / REWARD 0 / Q_MAX 1.707413e+12
Tally is -47
TIMESTEP 15820 / EPSILON 0.9699420000000138 / ACTION 0 / REWARD 0 / Q_MAX 2.128859e+12
Tally is -47
TIMESTEP 15821 / EPSILON 0.9699401000000138 / ACTION 2 / REWARD 0 / Q_MAX 5.332683e+12
Tally is -47
TIMESTEP 15822 / EPSILON 0.9699382000000139 / ACTION 2 / REWARD 0 / Q_MAX 2.025978e+12
Tally is -47
TIMESTEP 15823 / EPSILON 0.9699363000000139 / ACTION 2 / REWARD 0 / Q_MAX 4.428922e+11
Tally is -47
TIMESTEP 15824 / EPSILON 0.9699344000000139 / ACTION 2 / REWARD 0 / Q_MAX 6.222575e+11


Tally is -48
TIMESTEP 15929 / EPSILON 0.9697349000000139 / ACTION 2 / REWARD 0 / Q_MAX 8.466309e+11
Tally is -48
TIMESTEP 15930 / EPSILON 0.969733000000014 / ACTION 1 / REWARD 0 / Q_MAX 7.839956e+11
Tally is -48
TIMESTEP 15931 / EPSILON 0.969731100000014 / ACTION 2 / REWARD 0 / Q_MAX 1.078656e+12
Tally is -48
TIMESTEP 15932 / EPSILON 0.969729200000014 / ACTION 0 / REWARD 0 / Q_MAX 9.488658e+11
Tally is -48
TIMESTEP 15933 / EPSILON 0.969727300000014 / ACTION 1 / REWARD 0 / Q_MAX 5.908869e+11
Tally is -48
TIMESTEP 15934 / EPSILON 0.969725400000014 / ACTION 2 / REWARD 0 / Q_MAX 1.134628e+12
Tally is -48
TIMESTEP 15935 / EPSILON 0.969723500000014 / ACTION 1 / REWARD 0 / Q_MAX 7.946131e+11
Tally is -48
TIMESTEP 15936 / EPSILON 0.969721600000014 / ACTION 1 / REWARD 0 / Q_MAX 6.312209e+11
Tally is -48
TIMESTEP 15937 / EPSILON 0.969719700000014 / ACTION 0 / REWARD 0 / Q_MAX 1.363390e+12
Tally is -48
TIMESTEP 15938 / EPSILON 0.969717800000014 / ACTION 0 / REWARD 0 / Q_MAX 1.235752e+12
Tally is 

Tally is -48
TIMESTEP 16039 / EPSILON 0.969525900000014 / ACTION 2 / REWARD 0 / Q_MAX -1.277241e+11
Tally is -48
TIMESTEP 16040 / EPSILON 0.969524000000014 / ACTION 2 / REWARD 0 / Q_MAX 2.611610e+08
Tally is -48
TIMESTEP 16041 / EPSILON 0.969522100000014 / ACTION 0 / REWARD 0 / Q_MAX -1.569297e+12
Tally is -48
TIMESTEP 16042 / EPSILON 0.969520200000014 / ACTION 0 / REWARD 0 / Q_MAX -3.155960e+11
Tally is -48
TIMESTEP 16043 / EPSILON 0.969518300000014 / ACTION 1 / REWARD 0 / Q_MAX -4.741205e+11
Tally is -48
TIMESTEP 16044 / EPSILON 0.969516400000014 / ACTION 1 / REWARD 0 / Q_MAX -1.218396e+12
Tally is -48
TIMESTEP 16045 / EPSILON 0.969514500000014 / ACTION 2 / REWARD 0 / Q_MAX 2.214469e+11
Tally is -48
TIMESTEP 16046 / EPSILON 0.969512600000014 / ACTION 1 / REWARD 0 / Q_MAX 1.591467e+12
Tally is -48
TIMESTEP 16047 / EPSILON 0.969510700000014 / ACTION 1 / REWARD 0 / Q_MAX 1.272479e+12
Tally is -48
TIMESTEP 16048 / EPSILON 0.969508800000014 / ACTION 2 / REWARD 0 / Q_MAX 1.736131e+12
Tally

TIMESTEP 16151 / EPSILON 0.9693131000000141 / ACTION 1 / REWARD 0 / Q_MAX 9.770979e+11
Tally is -49
TIMESTEP 16152 / EPSILON 0.9693112000000141 / ACTION 0 / REWARD 0 / Q_MAX 3.702492e+11
Tally is -49
TIMESTEP 16153 / EPSILON 0.9693093000000141 / ACTION 0 / REWARD 0 / Q_MAX 1.394774e+12
Tally is -49
TIMESTEP 16154 / EPSILON 0.9693074000000141 / ACTION 1 / REWARD 0 / Q_MAX 1.449164e+11
Tally is -49
TIMESTEP 16155 / EPSILON 0.9693055000000141 / ACTION 1 / REWARD 0 / Q_MAX -7.390774e+11
Tally is -49
TIMESTEP 16156 / EPSILON 0.9693036000000141 / ACTION 0 / REWARD 0 / Q_MAX 3.514002e+11
Tally is -49
TIMESTEP 16157 / EPSILON 0.9693017000000141 / ACTION 1 / REWARD 0 / Q_MAX 6.807662e+11
Tally is -49
TIMESTEP 16158 / EPSILON 0.9692998000000141 / ACTION 1 / REWARD 0 / Q_MAX 6.482437e+11
Tally is -49
TIMESTEP 16159 / EPSILON 0.9692979000000141 / ACTION 2 / REWARD 0 / Q_MAX 1.506402e+12
Tally is -49
TIMESTEP 16160 / EPSILON 0.9692960000000141 / ACTION 2 / REWARD 0 / Q_MAX 8.656981e+11
Tally is -49

TIMESTEP 16263 / EPSILON 0.9691003000000142 / ACTION 0 / REWARD 0 / Q_MAX 1.555838e+12
Tally is -49
TIMESTEP 16264 / EPSILON 0.9690984000000142 / ACTION 1 / REWARD 0 / Q_MAX 1.664659e+12
Tally is -49
TIMESTEP 16265 / EPSILON 0.9690965000000142 / ACTION 1 / REWARD 0 / Q_MAX 1.680799e+12
Tally is -49
TIMESTEP 16266 / EPSILON 0.9690946000000142 / ACTION 2 / REWARD 0 / Q_MAX 2.946422e+12
Tally is -49
TIMESTEP 16267 / EPSILON 0.9690927000000142 / ACTION 0 / REWARD 0 / Q_MAX 2.177462e+12
Tally is -49
TIMESTEP 16268 / EPSILON 0.9690908000000142 / ACTION 2 / REWARD 0 / Q_MAX 1.362086e+12
Tally is -49
TIMESTEP 16269 / EPSILON 0.9690889000000142 / ACTION 1 / REWARD 0 / Q_MAX 1.734335e+12
Tally is -49
TIMESTEP 16270 / EPSILON 0.9690870000000142 / ACTION 2 / REWARD 0 / Q_MAX 1.636019e+12
Tally is -49
TIMESTEP 16271 / EPSILON 0.9690851000000142 / ACTION 0 / REWARD 0 / Q_MAX 2.394126e+12
Tally is -49
TIMESTEP 16272 / EPSILON 0.9690832000000142 / ACTION 2 / REWARD 0 / Q_MAX 1.224959e+12
Tally is -49


Tally is -49
TIMESTEP 16374 / EPSILON 0.9688894000000143 / ACTION 2 / REWARD 0 / Q_MAX 3.419934e+12
Tally is -49
TIMESTEP 16375 / EPSILON 0.9688875000000143 / ACTION 1 / REWARD 0 / Q_MAX 3.036488e+12
Tally is -49
TIMESTEP 16376 / EPSILON 0.9688856000000143 / ACTION 2 / REWARD 0 / Q_MAX 3.768838e+12
Tally is -49
TIMESTEP 16377 / EPSILON 0.9688837000000143 / ACTION 1 / REWARD 0 / Q_MAX 1.609434e+12
Tally is -49
TIMESTEP 16378 / EPSILON 0.9688818000000143 / ACTION 2 / REWARD 0 / Q_MAX 1.158249e+12
Tally is -49
TIMESTEP 16379 / EPSILON 0.9688799000000143 / ACTION 1 / REWARD 0 / Q_MAX 1.456551e+12
Tally is -49
TIMESTEP 16380 / EPSILON 0.9688780000000143 / ACTION 1 / REWARD 0 / Q_MAX 1.316898e+12
Tally is -49
TIMESTEP 16381 / EPSILON 0.9688761000000143 / ACTION 0 / REWARD 0 / Q_MAX 1.226236e+12
Tally is -49
TIMESTEP 16382 / EPSILON 0.9688742000000143 / ACTION 1 / REWARD 0 / Q_MAX 5.498711e+11
Tally is -50
TIMESTEP 16383 / EPSILON 0.9688723000000143 / ACTION 1 / REWARD -1 / Q_MAX 4.344692e+11

TIMESTEP 16488 / EPSILON 0.9686728000000144 / ACTION 1 / REWARD 0 / Q_MAX 6.563848e+11
Tally is -50
TIMESTEP 16489 / EPSILON 0.9686709000000144 / ACTION 2 / REWARD 0 / Q_MAX 6.219954e+11
Tally is -50
TIMESTEP 16490 / EPSILON 0.9686690000000144 / ACTION 0 / REWARD 0 / Q_MAX 1.007196e+12
Tally is -50
TIMESTEP 16491 / EPSILON 0.9686671000000144 / ACTION 2 / REWARD 0 / Q_MAX 8.790282e+11
Tally is -50
TIMESTEP 16492 / EPSILON 0.9686652000000144 / ACTION 2 / REWARD 0 / Q_MAX 5.731473e+11
Tally is -50
TIMESTEP 16493 / EPSILON 0.9686633000000144 / ACTION 0 / REWARD 0 / Q_MAX 9.483135e+11
Tally is -50
TIMESTEP 16494 / EPSILON 0.9686614000000144 / ACTION 2 / REWARD 0 / Q_MAX 8.409635e+11
Tally is -50
TIMESTEP 16495 / EPSILON 0.9686595000000144 / ACTION 1 / REWARD 0 / Q_MAX 1.471601e+12
Tally is -50
TIMESTEP 16496 / EPSILON 0.9686576000000144 / ACTION 2 / REWARD 0 / Q_MAX 1.177219e+12
Tally is -50
TIMESTEP 16497 / EPSILON 0.9686557000000144 / ACTION 0 / REWARD 0 / Q_MAX 5.302128e+11
Tally is -50


TIMESTEP 16599 / EPSILON 0.9684619000000145 / ACTION 0 / REWARD 0 / Q_MAX 9.089439e+11
Tally is -50
TIMESTEP 16600 / EPSILON 0.9684600000000145 / ACTION 0 / REWARD 0 / Q_MAX 1.360135e+11
Tally is -50
TIMESTEP 16601 / EPSILON 0.9684581000000145 / ACTION 1 / REWARD 0 / Q_MAX 1.226681e+12
Tally is -50
TIMESTEP 16602 / EPSILON 0.9684562000000145 / ACTION 2 / REWARD 0 / Q_MAX -2.813292e+11
Tally is -50
TIMESTEP 16603 / EPSILON 0.9684543000000145 / ACTION 2 / REWARD 0 / Q_MAX 9.197103e+10
Tally is -50
TIMESTEP 16604 / EPSILON 0.9684524000000145 / ACTION 0 / REWARD 0 / Q_MAX 7.354143e+11
Tally is -50
TIMESTEP 16605 / EPSILON 0.9684505000000145 / ACTION 1 / REWARD 0 / Q_MAX 1.834317e+12
Tally is -50
TIMESTEP 16606 / EPSILON 0.9684486000000145 / ACTION 2 / REWARD 0 / Q_MAX 4.062901e+11
Tally is -50
TIMESTEP 16607 / EPSILON 0.9684467000000145 / ACTION 2 / REWARD 0 / Q_MAX 9.290623e+11
Tally is -50
TIMESTEP 16608 / EPSILON 0.9684448000000145 / ACTION 2 / REWARD 0 / Q_MAX 2.366691e+12
Tally is -50

TIMESTEP 16714 / EPSILON 0.9682434000000146 / ACTION 1 / REWARD 0 / Q_MAX 9.588322e+11
Tally is -51
TIMESTEP 16715 / EPSILON 0.9682415000000146 / ACTION 0 / REWARD 0 / Q_MAX 8.332478e+11
Tally is -51
TIMESTEP 16716 / EPSILON 0.9682396000000146 / ACTION 1 / REWARD 0 / Q_MAX 1.050707e+12
Tally is -51
TIMESTEP 16717 / EPSILON 0.9682377000000146 / ACTION 2 / REWARD 0 / Q_MAX 1.565761e+12
Tally is -51
TIMESTEP 16718 / EPSILON 0.9682358000000146 / ACTION 1 / REWARD 0 / Q_MAX 1.277202e+12
Tally is -51
TIMESTEP 16719 / EPSILON 0.9682339000000146 / ACTION 0 / REWARD 0 / Q_MAX 1.012945e+12
Tally is -51
TIMESTEP 16720 / EPSILON 0.9682320000000146 / ACTION 2 / REWARD 0 / Q_MAX 9.516781e+11
Tally is -51
TIMESTEP 16721 / EPSILON 0.9682301000000146 / ACTION 1 / REWARD 0 / Q_MAX 5.345822e+11
Tally is -51
TIMESTEP 16722 / EPSILON 0.9682282000000146 / ACTION 0 / REWARD 0 / Q_MAX 3.900247e+11
Tally is -51
TIMESTEP 16723 / EPSILON 0.9682263000000146 / ACTION 0 / REWARD 0 / Q_MAX 2.514166e+11
Tally is -51


Tally is -51
TIMESTEP 16828 / EPSILON 0.9680268000000147 / ACTION 0 / REWARD 0 / Q_MAX 2.504801e+11
Tally is -51
TIMESTEP 16829 / EPSILON 0.9680249000000147 / ACTION 2 / REWARD 0 / Q_MAX 1.353437e+12
Tally is -51
TIMESTEP 16830 / EPSILON 0.9680230000000147 / ACTION 1 / REWARD 0 / Q_MAX 2.727140e+12
Tally is -51
TIMESTEP 16831 / EPSILON 0.9680211000000147 / ACTION 0 / REWARD 0 / Q_MAX 2.294548e+12
Tally is -51
TIMESTEP 16832 / EPSILON 0.9680192000000147 / ACTION 1 / REWARD 0 / Q_MAX 1.964381e+12
Tally is -51
TIMESTEP 16833 / EPSILON 0.9680173000000147 / ACTION 2 / REWARD 0 / Q_MAX 6.647813e+11
Tally is -51
TIMESTEP 16834 / EPSILON 0.9680154000000147 / ACTION 2 / REWARD 0 / Q_MAX 1.166270e+12
Tally is -51
TIMESTEP 16835 / EPSILON 0.9680135000000147 / ACTION 0 / REWARD 0 / Q_MAX 1.489109e+12
Tally is -51
TIMESTEP 16836 / EPSILON 0.9680116000000147 / ACTION 0 / REWARD 0 / Q_MAX 4.921780e+12
Tally is -51
TIMESTEP 16837 / EPSILON 0.9680097000000147 / ACTION 1 / REWARD 0 / Q_MAX 1.653376e+12


Tally is -52
TIMESTEP 16940 / EPSILON 0.9678140000000148 / ACTION 0 / REWARD 0 / Q_MAX 2.361930e+12
Tally is -52
TIMESTEP 16941 / EPSILON 0.9678121000000148 / ACTION 1 / REWARD 0 / Q_MAX 1.421580e+12
Tally is -52
TIMESTEP 16942 / EPSILON 0.9678102000000148 / ACTION 1 / REWARD 0 / Q_MAX -2.615956e+11
Tally is -52
TIMESTEP 16943 / EPSILON 0.9678083000000148 / ACTION 0 / REWARD 0 / Q_MAX 1.137879e+12
Tally is -52
TIMESTEP 16944 / EPSILON 0.9678064000000148 / ACTION 1 / REWARD 0 / Q_MAX 1.555901e+12
Tally is -52
TIMESTEP 16945 / EPSILON 0.9678045000000148 / ACTION 0 / REWARD 0 / Q_MAX -1.267254e+11
Tally is -52
TIMESTEP 16946 / EPSILON 0.9678026000000148 / ACTION 1 / REWARD 0 / Q_MAX -7.681671e+09
Tally is -52
TIMESTEP 16947 / EPSILON 0.9678007000000148 / ACTION 2 / REWARD 0 / Q_MAX 2.428741e+11
Tally is -52
TIMESTEP 16948 / EPSILON 0.9677988000000148 / ACTION 2 / REWARD 0 / Q_MAX 1.662365e+12
Tally is -52
TIMESTEP 16949 / EPSILON 0.9677969000000148 / ACTION 2 / REWARD 0 / Q_MAX 1.382953e+

TIMESTEP 17054 / EPSILON 0.9675974000000149 / ACTION 2 / REWARD 0 / Q_MAX 4.413241e+12
Tally is -52
TIMESTEP 17055 / EPSILON 0.9675955000000149 / ACTION 2 / REWARD 0 / Q_MAX 4.416932e+12
Tally is -52
TIMESTEP 17056 / EPSILON 0.9675936000000149 / ACTION 2 / REWARD 0 / Q_MAX 3.951909e+12
Tally is -52
TIMESTEP 17057 / EPSILON 0.9675917000000149 / ACTION 0 / REWARD 0 / Q_MAX 4.353182e+12
Tally is -52
TIMESTEP 17058 / EPSILON 0.9675898000000149 / ACTION 2 / REWARD 0 / Q_MAX 3.682368e+12
Tally is -52
TIMESTEP 17059 / EPSILON 0.9675879000000149 / ACTION 1 / REWARD 0 / Q_MAX 1.625906e+12
Tally is -52
TIMESTEP 17060 / EPSILON 0.9675860000000149 / ACTION 2 / REWARD 0 / Q_MAX 2.463645e+12
Tally is -52
TIMESTEP 17061 / EPSILON 0.9675841000000149 / ACTION 2 / REWARD 0 / Q_MAX 2.712300e+12
Tally is -52
TIMESTEP 17062 / EPSILON 0.9675822000000149 / ACTION 1 / REWARD 0 / Q_MAX 3.614292e+12
Tally is -52
TIMESTEP 17063 / EPSILON 0.9675803000000149 / ACTION 1 / REWARD 0 / Q_MAX 3.127233e+12
Tally is -52


TIMESTEP 17164 / EPSILON 0.967388400000015 / ACTION 1 / REWARD 0 / Q_MAX 2.065725e+12
Tally is -52
TIMESTEP 17165 / EPSILON 0.967386500000015 / ACTION 1 / REWARD 0 / Q_MAX 3.258333e+12
Tally is -52
TIMESTEP 17166 / EPSILON 0.967384600000015 / ACTION 0 / REWARD 0 / Q_MAX 3.760370e+12
Tally is -52
TIMESTEP 17167 / EPSILON 0.967382700000015 / ACTION 1 / REWARD 0 / Q_MAX 5.450514e+12
Tally is -52
TIMESTEP 17168 / EPSILON 0.967380800000015 / ACTION 0 / REWARD 0 / Q_MAX 3.560982e+12
Tally is -52
TIMESTEP 17169 / EPSILON 0.967378900000015 / ACTION 0 / REWARD 0 / Q_MAX 3.755657e+12
Tally is -52
TIMESTEP 17170 / EPSILON 0.967377000000015 / ACTION 0 / REWARD 0 / Q_MAX 4.277221e+12
Tally is -52
TIMESTEP 17171 / EPSILON 0.967375100000015 / ACTION 1 / REWARD 0 / Q_MAX 4.422977e+12
Tally is -52
TIMESTEP 17172 / EPSILON 0.967373200000015 / ACTION 0 / REWARD 0 / Q_MAX 2.315786e+12
Tally is -52
TIMESTEP 17173 / EPSILON 0.967371300000015 / ACTION 2 / REWARD 0 / Q_MAX 4.882566e+12
Tally is -52
TIMESTEP 1

TIMESTEP 17280 / EPSILON 0.9671680000000151 / ACTION 1 / REWARD 0 / Q_MAX 9.771729e+11
Tally is -52
TIMESTEP 17281 / EPSILON 0.9671661000000151 / ACTION 2 / REWARD 0 / Q_MAX 7.060752e+11
Tally is -52
TIMESTEP 17282 / EPSILON 0.9671642000000151 / ACTION 2 / REWARD 0 / Q_MAX 1.053422e+12
Tally is -52
TIMESTEP 17283 / EPSILON 0.9671623000000151 / ACTION 1 / REWARD 0 / Q_MAX 5.847463e+10
Tally is -52
TIMESTEP 17284 / EPSILON 0.9671604000000151 / ACTION 0 / REWARD 0 / Q_MAX 6.771418e+11
Tally is -52
TIMESTEP 17285 / EPSILON 0.9671585000000151 / ACTION 0 / REWARD 0 / Q_MAX 9.256318e+11
Tally is -52
TIMESTEP 17286 / EPSILON 0.9671566000000151 / ACTION 0 / REWARD 0 / Q_MAX 1.275130e+12
Tally is -52
TIMESTEP 17287 / EPSILON 0.9671547000000151 / ACTION 2 / REWARD 0 / Q_MAX 1.254515e+12
Tally is -52
TIMESTEP 17288 / EPSILON 0.9671528000000151 / ACTION 1 / REWARD 0 / Q_MAX 1.128394e+12
Tally is -52
TIMESTEP 17289 / EPSILON 0.9671509000000151 / ACTION 0 / REWARD 0 / Q_MAX 1.226577e+12
Tally is -52


TIMESTEP 17394 / EPSILON 0.9669514000000152 / ACTION 0 / REWARD 0 / Q_MAX 1.681809e+12
Tally is -53
TIMESTEP 17395 / EPSILON 0.9669495000000152 / ACTION 0 / REWARD 0 / Q_MAX 6.241554e+11
Tally is -53
TIMESTEP 17396 / EPSILON 0.9669476000000152 / ACTION 1 / REWARD 0 / Q_MAX -7.169847e+11
Tally is -53
TIMESTEP 17397 / EPSILON 0.9669457000000152 / ACTION 2 / REWARD 0 / Q_MAX 1.223847e+12
Tally is -53
TIMESTEP 17398 / EPSILON 0.9669438000000152 / ACTION 2 / REWARD 0 / Q_MAX 1.082873e+12
Tally is -53
TIMESTEP 17399 / EPSILON 0.9669419000000152 / ACTION 0 / REWARD 0 / Q_MAX 1.420404e+12
Tally is -53
TIMESTEP 17400 / EPSILON 0.9669400000000152 / ACTION 1 / REWARD 0 / Q_MAX 1.712615e+12
Tally is -53
TIMESTEP 17401 / EPSILON 0.9669381000000152 / ACTION 2 / REWARD 0 / Q_MAX 1.910596e+12
Tally is -53
TIMESTEP 17402 / EPSILON 0.9669362000000152 / ACTION 0 / REWARD 0 / Q_MAX 7.588552e+11
Tally is -53
TIMESTEP 17403 / EPSILON 0.9669343000000152 / ACTION 1 / REWARD 0 / Q_MAX 1.755509e+12
Tally is -53

Tally is -53
TIMESTEP 17508 / EPSILON 0.9667348000000153 / ACTION 2 / REWARD 0 / Q_MAX 3.724225e+11
Tally is -53
TIMESTEP 17509 / EPSILON 0.9667329000000153 / ACTION 0 / REWARD 0 / Q_MAX 1.477666e+12
Tally is -53
TIMESTEP 17510 / EPSILON 0.9667310000000153 / ACTION 2 / REWARD 0 / Q_MAX 1.106842e+12
Tally is -53
TIMESTEP 17511 / EPSILON 0.9667291000000153 / ACTION 0 / REWARD 0 / Q_MAX 4.292333e+11
Tally is -53
TIMESTEP 17512 / EPSILON 0.9667272000000153 / ACTION 1 / REWARD 0 / Q_MAX 6.243500e+11
Tally is -53
TIMESTEP 17513 / EPSILON 0.9667253000000153 / ACTION 0 / REWARD 0 / Q_MAX 9.991179e+11
Tally is -53
TIMESTEP 17514 / EPSILON 0.9667234000000153 / ACTION 1 / REWARD 0 / Q_MAX 9.214292e+11
Tally is -53
TIMESTEP 17515 / EPSILON 0.9667215000000153 / ACTION 1 / REWARD 0 / Q_MAX 1.232545e+12
Tally is -53
TIMESTEP 17516 / EPSILON 0.9667196000000153 / ACTION 0 / REWARD 0 / Q_MAX 1.379466e+12
Tally is -53
TIMESTEP 17517 / EPSILON 0.9667177000000153 / ACTION 0 / REWARD 0 / Q_MAX 1.468864e+12


TIMESTEP 17621 / EPSILON 0.9665201000000154 / ACTION 2 / REWARD 0 / Q_MAX 1.639240e+12
Tally is -53
TIMESTEP 17622 / EPSILON 0.9665182000000154 / ACTION 2 / REWARD 0 / Q_MAX 1.562329e+12
Tally is -53
TIMESTEP 17623 / EPSILON 0.9665163000000154 / ACTION 2 / REWARD 0 / Q_MAX 6.746695e+11
Tally is -53
TIMESTEP 17624 / EPSILON 0.9665144000000154 / ACTION 0 / REWARD 0 / Q_MAX 8.210705e+11
Tally is -53
TIMESTEP 17625 / EPSILON 0.9665125000000154 / ACTION 2 / REWARD 0 / Q_MAX 1.600330e+12
Tally is -53
TIMESTEP 17626 / EPSILON 0.9665106000000154 / ACTION 1 / REWARD 0 / Q_MAX 2.465994e+12
Tally is -53
TIMESTEP 17627 / EPSILON 0.9665087000000154 / ACTION 1 / REWARD 0 / Q_MAX 1.313939e+12
Tally is -53
TIMESTEP 17628 / EPSILON 0.9665068000000154 / ACTION 2 / REWARD 0 / Q_MAX 2.394618e+12
Tally is -53
TIMESTEP 17629 / EPSILON 0.9665049000000154 / ACTION 1 / REWARD 0 / Q_MAX 2.222744e+12
Tally is -53
TIMESTEP 17630 / EPSILON 0.9665030000000154 / ACTION 1 / REWARD 0 / Q_MAX 1.727111e+12
Tally is -53


Tally is -54
TIMESTEP 17731 / EPSILON 0.9663111000000155 / ACTION 1 / REWARD 0 / Q_MAX -3.953390e+11
Tally is -54
TIMESTEP 17732 / EPSILON 0.9663092000000155 / ACTION 1 / REWARD 0 / Q_MAX -5.696677e+10
Tally is -54
TIMESTEP 17733 / EPSILON 0.9663073000000155 / ACTION 2 / REWARD 0 / Q_MAX 3.802580e+11
Tally is -54
TIMESTEP 17734 / EPSILON 0.9663054000000155 / ACTION 0 / REWARD 0 / Q_MAX 8.433994e+11
Tally is -54
TIMESTEP 17735 / EPSILON 0.9663035000000155 / ACTION 1 / REWARD 0 / Q_MAX 3.637764e+12
Tally is -54
TIMESTEP 17736 / EPSILON 0.9663016000000155 / ACTION 0 / REWARD 0 / Q_MAX 2.994123e+12
Tally is -54
TIMESTEP 17737 / EPSILON 0.9662997000000155 / ACTION 0 / REWARD 0 / Q_MAX 3.215825e+12
Tally is -54
TIMESTEP 17738 / EPSILON 0.9662978000000155 / ACTION 1 / REWARD 0 / Q_MAX 1.483703e+12
Tally is -54
TIMESTEP 17739 / EPSILON 0.9662959000000155 / ACTION 0 / REWARD 0 / Q_MAX 2.575329e+12
Tally is -54
TIMESTEP 17740 / EPSILON 0.9662940000000155 / ACTION 2 / REWARD 0 / Q_MAX 1.920548e+1

Tally is -54
TIMESTEP 17844 / EPSILON 0.9660964000000156 / ACTION 2 / REWARD 0 / Q_MAX 9.211127e+11
Tally is -54
TIMESTEP 17845 / EPSILON 0.9660945000000156 / ACTION 2 / REWARD 0 / Q_MAX 7.780562e+11
Tally is -54
TIMESTEP 17846 / EPSILON 0.9660926000000156 / ACTION 1 / REWARD 0 / Q_MAX 1.426898e+12
Tally is -54
TIMESTEP 17847 / EPSILON 0.9660907000000156 / ACTION 0 / REWARD 0 / Q_MAX 1.181537e+12
Tally is -54
TIMESTEP 17848 / EPSILON 0.9660888000000156 / ACTION 0 / REWARD 0 / Q_MAX 1.928562e+12
Tally is -54
TIMESTEP 17849 / EPSILON 0.9660869000000156 / ACTION 0 / REWARD 0 / Q_MAX 1.219950e+12
Tally is -54
TIMESTEP 17850 / EPSILON 0.9660850000000156 / ACTION 1 / REWARD 0 / Q_MAX 9.282573e+11
Tally is -54
TIMESTEP 17851 / EPSILON 0.9660831000000156 / ACTION 1 / REWARD 0 / Q_MAX 1.383663e+12
Tally is -54
TIMESTEP 17852 / EPSILON 0.9660812000000156 / ACTION 0 / REWARD 0 / Q_MAX 1.014365e+12
Tally is -54
TIMESTEP 17853 / EPSILON 0.9660793000000156 / ACTION 0 / REWARD 0 / Q_MAX -4.880315e+11

TIMESTEP 17957 / EPSILON 0.9658817000000157 / ACTION 0 / REWARD 0 / Q_MAX 8.411922e+11
Tally is -54
TIMESTEP 17958 / EPSILON 0.9658798000000157 / ACTION 0 / REWARD 0 / Q_MAX 1.990647e+12
Tally is -54
TIMESTEP 17959 / EPSILON 0.9658779000000157 / ACTION 0 / REWARD 0 / Q_MAX 2.979527e+12
Tally is -54
TIMESTEP 17960 / EPSILON 0.9658760000000157 / ACTION 0 / REWARD 0 / Q_MAX 1.359267e+12
Tally is -54
TIMESTEP 17961 / EPSILON 0.9658741000000157 / ACTION 0 / REWARD 0 / Q_MAX 4.272326e+12
Tally is -54
TIMESTEP 17962 / EPSILON 0.9658722000000157 / ACTION 1 / REWARD 0 / Q_MAX 3.733695e+12
Tally is -54
TIMESTEP 17963 / EPSILON 0.9658703000000157 / ACTION 2 / REWARD 0 / Q_MAX 3.314671e+12
Tally is -54
TIMESTEP 17964 / EPSILON 0.9658684000000157 / ACTION 0 / REWARD 0 / Q_MAX 2.579774e+12
Tally is -54
TIMESTEP 17965 / EPSILON 0.9658665000000157 / ACTION 2 / REWARD 0 / Q_MAX 1.102295e+12
Tally is -54
TIMESTEP 17966 / EPSILON 0.9658646000000157 / ACTION 0 / REWARD 0 / Q_MAX 1.574038e+12
Tally is -54


Tally is -54
TIMESTEP 18071 / EPSILON 0.9656651000000158 / ACTION 1 / REWARD 0 / Q_MAX 6.915287e+11
Tally is -54
TIMESTEP 18072 / EPSILON 0.9656632000000158 / ACTION 2 / REWARD 0 / Q_MAX 1.016474e+12
Tally is -54
TIMESTEP 18073 / EPSILON 0.9656613000000158 / ACTION 1 / REWARD 0 / Q_MAX 2.754511e+11
Tally is -54
TIMESTEP 18074 / EPSILON 0.9656594000000158 / ACTION 1 / REWARD 0 / Q_MAX 6.507195e+11
Tally is -54
TIMESTEP 18075 / EPSILON 0.9656575000000158 / ACTION 2 / REWARD 0 / Q_MAX 5.399836e+11
Tally is -54
TIMESTEP 18076 / EPSILON 0.9656556000000158 / ACTION 2 / REWARD 0 / Q_MAX 4.360721e+11
Tally is -54
TIMESTEP 18077 / EPSILON 0.9656537000000158 / ACTION 2 / REWARD 0 / Q_MAX 1.278058e+11
Tally is -54
TIMESTEP 18078 / EPSILON 0.9656518000000158 / ACTION 2 / REWARD 0 / Q_MAX 1.194669e+12
Tally is -54
TIMESTEP 18079 / EPSILON 0.9656499000000158 / ACTION 0 / REWARD 0 / Q_MAX 6.629801e+11
Tally is -54
TIMESTEP 18080 / EPSILON 0.9656480000000158 / ACTION 2 / REWARD 0 / Q_MAX 5.286490e+11


Tally is -55
TIMESTEP 18184 / EPSILON 0.9654504000000159 / ACTION 0 / REWARD 0 / Q_MAX 5.382578e+12
Tally is -55
TIMESTEP 18185 / EPSILON 0.9654485000000159 / ACTION 1 / REWARD 0 / Q_MAX 4.600589e+12
Tally is -55
TIMESTEP 18186 / EPSILON 0.9654466000000159 / ACTION 0 / REWARD 0 / Q_MAX 6.949627e+12
Tally is -55
TIMESTEP 18187 / EPSILON 0.9654447000000159 / ACTION 2 / REWARD 0 / Q_MAX 4.986288e+12
Tally is -55
TIMESTEP 18188 / EPSILON 0.9654428000000159 / ACTION 1 / REWARD 0 / Q_MAX 5.987329e+12
Tally is -55
TIMESTEP 18189 / EPSILON 0.9654409000000159 / ACTION 2 / REWARD 0 / Q_MAX 2.319894e+12
Tally is -55
TIMESTEP 18190 / EPSILON 0.9654390000000159 / ACTION 1 / REWARD 0 / Q_MAX 1.806039e+12
Tally is -55
TIMESTEP 18191 / EPSILON 0.9654371000000159 / ACTION 1 / REWARD 0 / Q_MAX 3.156843e+12
Tally is -55
TIMESTEP 18192 / EPSILON 0.9654352000000159 / ACTION 0 / REWARD 0 / Q_MAX 3.410777e+12
Tally is -55
TIMESTEP 18193 / EPSILON 0.9654333000000159 / ACTION 0 / REWARD 0 / Q_MAX 2.723471e+12


Tally is -55
TIMESTEP 18294 / EPSILON 0.965241400000016 / ACTION 0 / REWARD 0 / Q_MAX 2.631251e+12
Tally is -55
TIMESTEP 18295 / EPSILON 0.965239500000016 / ACTION 1 / REWARD 0 / Q_MAX 2.069491e+12
Tally is -55
TIMESTEP 18296 / EPSILON 0.965237600000016 / ACTION 2 / REWARD 0 / Q_MAX 2.536368e+12
Tally is -55
TIMESTEP 18297 / EPSILON 0.965235700000016 / ACTION 1 / REWARD 0 / Q_MAX 2.736552e+12
Tally is -55
TIMESTEP 18298 / EPSILON 0.965233800000016 / ACTION 1 / REWARD 0 / Q_MAX 2.639513e+12
Tally is -55
TIMESTEP 18299 / EPSILON 0.965231900000016 / ACTION 1 / REWARD 0 / Q_MAX 1.792367e+12
Tally is -55
TIMESTEP 18300 / EPSILON 0.965230000000016 / ACTION 2 / REWARD 0 / Q_MAX 2.413519e+12
Tally is -55
TIMESTEP 18301 / EPSILON 0.965228100000016 / ACTION 1 / REWARD 0 / Q_MAX 2.845893e+12
Tally is -55
TIMESTEP 18302 / EPSILON 0.965226200000016 / ACTION 1 / REWARD 0 / Q_MAX 1.989879e+12
Tally is -55
TIMESTEP 18303 / EPSILON 0.965224300000016 / ACTION 1 / REWARD 0 / Q_MAX 7.813246e+11
Tally is -

TIMESTEP 18406 / EPSILON 0.9650286000000161 / ACTION 2 / REWARD 0 / Q_MAX 1.078146e+12
Tally is -56
TIMESTEP 18407 / EPSILON 0.9650267000000161 / ACTION 2 / REWARD 0 / Q_MAX 1.968936e+12
Tally is -56
TIMESTEP 18408 / EPSILON 0.9650248000000161 / ACTION 2 / REWARD 0 / Q_MAX 2.193630e+12
Tally is -56
TIMESTEP 18409 / EPSILON 0.9650229000000161 / ACTION 0 / REWARD 0 / Q_MAX 2.129973e+12
Tally is -56
TIMESTEP 18410 / EPSILON 0.9650210000000161 / ACTION 0 / REWARD 0 / Q_MAX 1.872992e+12
Tally is -56
TIMESTEP 18411 / EPSILON 0.9650191000000161 / ACTION 0 / REWARD 0 / Q_MAX 1.362526e+12
Tally is -56
TIMESTEP 18412 / EPSILON 0.9650172000000161 / ACTION 0 / REWARD 0 / Q_MAX -1.482217e+12
Tally is -56
TIMESTEP 18413 / EPSILON 0.9650153000000161 / ACTION 2 / REWARD 0 / Q_MAX -1.995787e+12
Tally is -56
TIMESTEP 18414 / EPSILON 0.9650134000000161 / ACTION 1 / REWARD 0 / Q_MAX 4.320667e+11
Tally is -56
TIMESTEP 18415 / EPSILON 0.9650115000000161 / ACTION 2 / REWARD 0 / Q_MAX 1.497931e+11
Tally is -5

Tally is -56
TIMESTEP 18518 / EPSILON 0.9648158000000162 / ACTION 2 / REWARD 0 / Q_MAX 9.159678e+11
Tally is -56
TIMESTEP 18519 / EPSILON 0.9648139000000162 / ACTION 0 / REWARD 0 / Q_MAX 1.164173e+12
Tally is -56
TIMESTEP 18520 / EPSILON 0.9648120000000162 / ACTION 0 / REWARD 0 / Q_MAX 1.057713e+12
Tally is -56
TIMESTEP 18521 / EPSILON 0.9648101000000162 / ACTION 2 / REWARD 0 / Q_MAX 1.654226e+12
Tally is -56
TIMESTEP 18522 / EPSILON 0.9648082000000162 / ACTION 1 / REWARD 0 / Q_MAX 1.332729e+12
Tally is -56
TIMESTEP 18523 / EPSILON 0.9648063000000162 / ACTION 2 / REWARD 0 / Q_MAX 1.958765e+12
Tally is -56
TIMESTEP 18524 / EPSILON 0.9648044000000162 / ACTION 0 / REWARD 0 / Q_MAX 9.771741e+11
Tally is -56
TIMESTEP 18525 / EPSILON 0.9648025000000162 / ACTION 2 / REWARD 0 / Q_MAX 1.649322e+12
Tally is -56
TIMESTEP 18526 / EPSILON 0.9648006000000162 / ACTION 0 / REWARD 0 / Q_MAX 1.759799e+12
Tally is -56
TIMESTEP 18527 / EPSILON 0.9647987000000162 / ACTION 2 / REWARD 0 / Q_MAX 1.539985e+12


TIMESTEP 18628 / EPSILON 0.9646068000000163 / ACTION 2 / REWARD 0 / Q_MAX 3.256867e+12
Tally is -56
TIMESTEP 18629 / EPSILON 0.9646049000000163 / ACTION 0 / REWARD 0 / Q_MAX 2.623102e+12
Tally is -56
TIMESTEP 18630 / EPSILON 0.9646030000000163 / ACTION 2 / REWARD 0 / Q_MAX 2.693067e+12
Tally is -56
TIMESTEP 18631 / EPSILON 0.9646011000000163 / ACTION 0 / REWARD 0 / Q_MAX 2.432591e+12
Tally is -56
TIMESTEP 18632 / EPSILON 0.9645992000000163 / ACTION 1 / REWARD 0 / Q_MAX 1.873278e+12
Tally is -56
TIMESTEP 18633 / EPSILON 0.9645973000000163 / ACTION 2 / REWARD 0 / Q_MAX 2.065015e+12
Tally is -56
TIMESTEP 18634 / EPSILON 0.9645954000000163 / ACTION 2 / REWARD 0 / Q_MAX 1.865300e+12
Tally is -56
TIMESTEP 18635 / EPSILON 0.9645935000000163 / ACTION 1 / REWARD 0 / Q_MAX 2.140814e+12
Tally is -56
TIMESTEP 18636 / EPSILON 0.9645916000000163 / ACTION 0 / REWARD 0 / Q_MAX 1.968292e+12
Tally is -56
TIMESTEP 18637 / EPSILON 0.9645897000000163 / ACTION 1 / REWARD 0 / Q_MAX 1.705349e+12
Tally is -56


Tally is -56
TIMESTEP 18742 / EPSILON 0.9643902000000164 / ACTION 0 / REWARD 0 / Q_MAX 1.936502e+12
Tally is -56
TIMESTEP 18743 / EPSILON 0.9643883000000164 / ACTION 2 / REWARD 0 / Q_MAX 1.628036e+12
Tally is -56
TIMESTEP 18744 / EPSILON 0.9643864000000164 / ACTION 2 / REWARD 0 / Q_MAX 2.397344e+12
Tally is -56
TIMESTEP 18745 / EPSILON 0.9643845000000164 / ACTION 0 / REWARD 0 / Q_MAX 2.646535e+12
Tally is -56
TIMESTEP 18746 / EPSILON 0.9643826000000164 / ACTION 2 / REWARD 0 / Q_MAX 2.004394e+12
Tally is -56
TIMESTEP 18747 / EPSILON 0.9643807000000164 / ACTION 1 / REWARD 0 / Q_MAX 1.594657e+12
Tally is -56
TIMESTEP 18748 / EPSILON 0.9643788000000164 / ACTION 2 / REWARD 0 / Q_MAX 1.612936e+12
Tally is -56
TIMESTEP 18749 / EPSILON 0.9643769000000164 / ACTION 0 / REWARD 0 / Q_MAX 1.251244e+12
Tally is -56
TIMESTEP 18750 / EPSILON 0.9643750000000164 / ACTION 2 / REWARD 0 / Q_MAX 8.981477e+11
Tally is -56
TIMESTEP 18751 / EPSILON 0.9643731000000164 / ACTION 0 / REWARD 0 / Q_MAX 2.089661e+12


Tally is -56
TIMESTEP 18848 / EPSILON 0.9641888000000165 / ACTION 2 / REWARD 0 / Q_MAX 1.121756e+12
Tally is -56
TIMESTEP 18849 / EPSILON 0.9641869000000165 / ACTION 0 / REWARD 0 / Q_MAX 6.085061e+11
Tally is -56
TIMESTEP 18850 / EPSILON 0.9641850000000165 / ACTION 2 / REWARD 0 / Q_MAX 1.314549e+12
Tally is -56
TIMESTEP 18851 / EPSILON 0.9641831000000165 / ACTION 0 / REWARD 0 / Q_MAX 1.664458e+12
Tally is -56
TIMESTEP 18852 / EPSILON 0.9641812000000165 / ACTION 2 / REWARD 0 / Q_MAX 1.815562e+12
Tally is -56
TIMESTEP 18853 / EPSILON 0.9641793000000165 / ACTION 1 / REWARD 0 / Q_MAX 1.017714e+12
Tally is -56
TIMESTEP 18854 / EPSILON 0.9641774000000165 / ACTION 1 / REWARD 0 / Q_MAX 4.564029e+11
Tally is -56
TIMESTEP 18855 / EPSILON 0.9641755000000165 / ACTION 1 / REWARD 0 / Q_MAX 5.833473e+11
Tally is -56
TIMESTEP 18856 / EPSILON 0.9641736000000165 / ACTION 1 / REWARD 0 / Q_MAX 7.678549e+11
Tally is -56
TIMESTEP 18857 / EPSILON 0.9641717000000165 / ACTION 0 / REWARD 0 / Q_MAX 4.605763e+11


TIMESTEP 18959 / EPSILON 0.9639779000000166 / ACTION 2 / REWARD 0 / Q_MAX 2.071181e+12
Tally is -57
TIMESTEP 18960 / EPSILON 0.9639760000000166 / ACTION 1 / REWARD 0 / Q_MAX 2.151830e+12
Tally is -57
TIMESTEP 18961 / EPSILON 0.9639741000000166 / ACTION 0 / REWARD 0 / Q_MAX 1.475303e+12
Tally is -57
TIMESTEP 18962 / EPSILON 0.9639722000000166 / ACTION 1 / REWARD 0 / Q_MAX 2.064569e+12
Tally is -57
TIMESTEP 18963 / EPSILON 0.9639703000000166 / ACTION 2 / REWARD 0 / Q_MAX 2.078252e+12
Tally is -57
TIMESTEP 18964 / EPSILON 0.9639684000000166 / ACTION 2 / REWARD 0 / Q_MAX 2.575632e+12
Tally is -57
TIMESTEP 18965 / EPSILON 0.9639665000000166 / ACTION 2 / REWARD 0 / Q_MAX 1.218212e+12
Tally is -57
TIMESTEP 18966 / EPSILON 0.9639646000000166 / ACTION 2 / REWARD 0 / Q_MAX 2.004386e+12
Tally is -57
TIMESTEP 18967 / EPSILON 0.9639627000000166 / ACTION 1 / REWARD 0 / Q_MAX 2.268458e+12
Tally is -57
TIMESTEP 18968 / EPSILON 0.9639608000000166 / ACTION 2 / REWARD 0 / Q_MAX 2.070745e+12
Tally is -57


Tally is -57
TIMESTEP 19065 / EPSILON 0.9637765000000167 / ACTION 0 / REWARD 0 / Q_MAX 4.197908e+11
Tally is -57
TIMESTEP 19066 / EPSILON 0.9637746000000167 / ACTION 0 / REWARD 0 / Q_MAX 3.029758e+11
Tally is -57
TIMESTEP 19067 / EPSILON 0.9637727000000167 / ACTION 1 / REWARD 0 / Q_MAX 9.617966e+11
Tally is -57
TIMESTEP 19068 / EPSILON 0.9637708000000167 / ACTION 1 / REWARD 0 / Q_MAX 1.236901e+12
Tally is -57
TIMESTEP 19069 / EPSILON 0.9637689000000167 / ACTION 1 / REWARD 0 / Q_MAX 2.052275e+12
Tally is -57
TIMESTEP 19070 / EPSILON 0.9637670000000167 / ACTION 0 / REWARD 0 / Q_MAX 2.772057e+12
Tally is -57
TIMESTEP 19071 / EPSILON 0.9637651000000167 / ACTION 0 / REWARD 0 / Q_MAX 1.945373e+12
Tally is -57
TIMESTEP 19072 / EPSILON 0.9637632000000167 / ACTION 0 / REWARD 0 / Q_MAX 2.387297e+12
Tally is -57
TIMESTEP 19073 / EPSILON 0.9637613000000167 / ACTION 0 / REWARD 0 / Q_MAX 1.330305e+12
Tally is -57
TIMESTEP 19074 / EPSILON 0.9637594000000167 / ACTION 0 / REWARD 0 / Q_MAX 8.979947e+11


Tally is -58
TIMESTEP 19172 / EPSILON 0.9635732000000168 / ACTION 1 / REWARD 0 / Q_MAX 1.523301e+12
Tally is -58
TIMESTEP 19173 / EPSILON 0.9635713000000168 / ACTION 2 / REWARD 0 / Q_MAX 9.708289e+11
Tally is -58
TIMESTEP 19174 / EPSILON 0.9635694000000168 / ACTION 0 / REWARD 0 / Q_MAX 1.393407e+12
Tally is -58
TIMESTEP 19175 / EPSILON 0.9635675000000168 / ACTION 2 / REWARD 0 / Q_MAX 3.419236e+12
Tally is -58
TIMESTEP 19176 / EPSILON 0.9635656000000168 / ACTION 1 / REWARD 0 / Q_MAX 3.534683e+12
Tally is -58
TIMESTEP 19177 / EPSILON 0.9635637000000168 / ACTION 0 / REWARD 0 / Q_MAX 2.395246e+12
Tally is -58
TIMESTEP 19178 / EPSILON 0.9635618000000168 / ACTION 1 / REWARD 0 / Q_MAX 1.806006e+12
Tally is -58
TIMESTEP 19179 / EPSILON 0.9635599000000168 / ACTION 0 / REWARD 0 / Q_MAX 2.274679e+12
Tally is -58
TIMESTEP 19180 / EPSILON 0.9635580000000168 / ACTION 2 / REWARD 0 / Q_MAX 2.819457e+12
Tally is -58
TIMESTEP 19181 / EPSILON 0.9635561000000168 / ACTION 0 / REWARD 0 / Q_MAX 3.229657e+12


Tally is -58
TIMESTEP 19281 / EPSILON 0.9633661000000169 / ACTION 1 / REWARD 0 / Q_MAX 1.977795e+11
Tally is -58
TIMESTEP 19282 / EPSILON 0.9633642000000169 / ACTION 2 / REWARD 0 / Q_MAX -1.668054e+11
Tally is -58
TIMESTEP 19283 / EPSILON 0.9633623000000169 / ACTION 2 / REWARD 0 / Q_MAX 4.940889e+11
Tally is -58
TIMESTEP 19284 / EPSILON 0.9633604000000169 / ACTION 0 / REWARD 0 / Q_MAX 4.455021e+11
Tally is -58
TIMESTEP 19285 / EPSILON 0.9633585000000169 / ACTION 0 / REWARD 0 / Q_MAX -9.381695e+10
Tally is -58
TIMESTEP 19286 / EPSILON 0.9633566000000169 / ACTION 0 / REWARD 0 / Q_MAX -1.298329e+11
Tally is -58
TIMESTEP 19287 / EPSILON 0.9633547000000169 / ACTION 1 / REWARD 0 / Q_MAX -9.401034e+11
Tally is -58
TIMESTEP 19288 / EPSILON 0.9633528000000169 / ACTION 2 / REWARD 0 / Q_MAX -2.932449e+11
Tally is -58
TIMESTEP 19289 / EPSILON 0.9633509000000169 / ACTION 0 / REWARD 0 / Q_MAX -1.404401e+11
Tally is -58
TIMESTEP 19290 / EPSILON 0.9633490000000169 / ACTION 1 / REWARD 0 / Q_MAX 8.22789

Tally is -58
TIMESTEP 19385 / EPSILON 0.963168500000017 / ACTION 0 / REWARD 0 / Q_MAX 2.377762e+12
Tally is -58
TIMESTEP 19386 / EPSILON 0.963166600000017 / ACTION 2 / REWARD 0 / Q_MAX 2.661395e+12
Tally is -58
TIMESTEP 19387 / EPSILON 0.963164700000017 / ACTION 2 / REWARD 0 / Q_MAX 1.370906e+12
Tally is -58
TIMESTEP 19388 / EPSILON 0.963162800000017 / ACTION 1 / REWARD 0 / Q_MAX 1.528343e+12
Tally is -58
TIMESTEP 19389 / EPSILON 0.963160900000017 / ACTION 1 / REWARD 0 / Q_MAX 1.355309e+12
Tally is -58
TIMESTEP 19390 / EPSILON 0.963159000000017 / ACTION 2 / REWARD 0 / Q_MAX 2.241677e+12
Tally is -58
TIMESTEP 19391 / EPSILON 0.963157100000017 / ACTION 2 / REWARD 0 / Q_MAX 2.290386e+12
Tally is -58
TIMESTEP 19392 / EPSILON 0.963155200000017 / ACTION 0 / REWARD 0 / Q_MAX 2.571889e+12
Tally is -58
TIMESTEP 19393 / EPSILON 0.963153300000017 / ACTION 1 / REWARD 0 / Q_MAX 2.557407e+12
Tally is -58
TIMESTEP 19394 / EPSILON 0.963151400000017 / ACTION 0 / REWARD 0 / Q_MAX 1.914616e+12
Tally is -

TIMESTEP 19497 / EPSILON 0.9629557000000171 / ACTION 2 / REWARD 0 / Q_MAX 1.670118e+12
Tally is -59
TIMESTEP 19498 / EPSILON 0.9629538000000171 / ACTION 1 / REWARD 0 / Q_MAX 2.037387e+12
Tally is -59
TIMESTEP 19499 / EPSILON 0.9629519000000171 / ACTION 1 / REWARD 0 / Q_MAX 2.396805e+12
Tally is -59
TIMESTEP 19500 / EPSILON 0.9629500000000171 / ACTION 1 / REWARD 0 / Q_MAX 2.594296e+12
Tally is -59
TIMESTEP 19501 / EPSILON 0.9629481000000171 / ACTION 0 / REWARD 0 / Q_MAX 1.954195e+12
Tally is -59
TIMESTEP 19502 / EPSILON 0.9629462000000171 / ACTION 1 / REWARD 0 / Q_MAX 1.649913e+12
Tally is -59
TIMESTEP 19503 / EPSILON 0.9629443000000171 / ACTION 0 / REWARD 0 / Q_MAX 1.488878e+12
Tally is -59
TIMESTEP 19504 / EPSILON 0.9629424000000171 / ACTION 2 / REWARD 0 / Q_MAX 1.781039e+12
Tally is -59
TIMESTEP 19505 / EPSILON 0.9629405000000171 / ACTION 2 / REWARD 0 / Q_MAX 2.018715e+12
Tally is -59
TIMESTEP 19506 / EPSILON 0.9629386000000171 / ACTION 0 / REWARD 0 / Q_MAX 1.263640e+12
Tally is -59


TIMESTEP 19611 / EPSILON 0.9627391000000172 / ACTION 0 / REWARD 0 / Q_MAX 1.957377e+12
Tally is -59
TIMESTEP 19612 / EPSILON 0.9627372000000172 / ACTION 0 / REWARD 0 / Q_MAX 1.516138e+12
Tally is -59
TIMESTEP 19613 / EPSILON 0.9627353000000172 / ACTION 0 / REWARD 0 / Q_MAX 1.263158e+12
Tally is -59
TIMESTEP 19614 / EPSILON 0.9627334000000172 / ACTION 2 / REWARD 0 / Q_MAX 7.531434e+11
Tally is -59
TIMESTEP 19615 / EPSILON 0.9627315000000172 / ACTION 1 / REWARD 0 / Q_MAX 1.744029e+12
Tally is -59
TIMESTEP 19616 / EPSILON 0.9627296000000172 / ACTION 1 / REWARD 0 / Q_MAX 1.278320e+12
Tally is -59
TIMESTEP 19617 / EPSILON 0.9627277000000172 / ACTION 0 / REWARD 0 / Q_MAX 7.750280e+11
Tally is -59
TIMESTEP 19618 / EPSILON 0.9627258000000172 / ACTION 1 / REWARD 0 / Q_MAX 2.042639e+11
Tally is -59
TIMESTEP 19619 / EPSILON 0.9627239000000172 / ACTION 1 / REWARD 0 / Q_MAX 1.138424e+12
Tally is -59
TIMESTEP 19620 / EPSILON 0.9627220000000172 / ACTION 0 / REWARD 0 / Q_MAX 1.470811e+12
Tally is -59


TIMESTEP 19722 / EPSILON 0.9625282000000173 / ACTION 0 / REWARD 0 / Q_MAX 2.487335e+12
Tally is -60
TIMESTEP 19723 / EPSILON 0.9625263000000173 / ACTION 2 / REWARD 0 / Q_MAX 8.931879e+11
Tally is -60
TIMESTEP 19724 / EPSILON 0.9625244000000173 / ACTION 1 / REWARD 0 / Q_MAX 3.224983e+12
Tally is -60
TIMESTEP 19725 / EPSILON 0.9625225000000173 / ACTION 2 / REWARD 0 / Q_MAX 1.454619e+12
Tally is -60
TIMESTEP 19726 / EPSILON 0.9625206000000173 / ACTION 1 / REWARD 0 / Q_MAX 2.321249e+12
Tally is -60
TIMESTEP 19727 / EPSILON 0.9625187000000173 / ACTION 2 / REWARD 0 / Q_MAX 2.469620e+12
Tally is -60
TIMESTEP 19728 / EPSILON 0.9625168000000173 / ACTION 0 / REWARD 0 / Q_MAX 3.424300e+12
Tally is -60
TIMESTEP 19729 / EPSILON 0.9625149000000173 / ACTION 1 / REWARD 0 / Q_MAX 1.505922e+12
Tally is -60
TIMESTEP 19730 / EPSILON 0.9625130000000173 / ACTION 0 / REWARD 0 / Q_MAX 9.757810e+11
Tally is -60
TIMESTEP 19731 / EPSILON 0.9625111000000173 / ACTION 2 / REWARD 0 / Q_MAX 1.053027e+12
Tally is -60


Tally is -60
TIMESTEP 19833 / EPSILON 0.9623173000000174 / ACTION 2 / REWARD 0 / Q_MAX 1.564629e+12
Tally is -60
TIMESTEP 19834 / EPSILON 0.9623154000000174 / ACTION 0 / REWARD 0 / Q_MAX 1.399536e+12
Tally is -60
TIMESTEP 19835 / EPSILON 0.9623135000000174 / ACTION 1 / REWARD 0 / Q_MAX 1.710345e+12
Tally is -60
TIMESTEP 19836 / EPSILON 0.9623116000000174 / ACTION 1 / REWARD 0 / Q_MAX 1.805751e+12
Tally is -60
TIMESTEP 19837 / EPSILON 0.9623097000000174 / ACTION 2 / REWARD 0 / Q_MAX 2.100586e+12
Tally is -60
TIMESTEP 19838 / EPSILON 0.9623078000000174 / ACTION 2 / REWARD 0 / Q_MAX 2.184105e+12
Tally is -60
TIMESTEP 19839 / EPSILON 0.9623059000000174 / ACTION 2 / REWARD 0 / Q_MAX 1.690743e+12
Tally is -60
TIMESTEP 19840 / EPSILON 0.9623040000000174 / ACTION 0 / REWARD 0 / Q_MAX 1.164647e+12
Tally is -60
TIMESTEP 19841 / EPSILON 0.9623021000000174 / ACTION 2 / REWARD 0 / Q_MAX 1.239005e+12
Tally is -60
TIMESTEP 19842 / EPSILON 0.9623002000000174 / ACTION 1 / REWARD 0 / Q_MAX 1.578273e+12


Tally is -61
TIMESTEP 19946 / EPSILON 0.9621026000000175 / ACTION 2 / REWARD 0 / Q_MAX 3.109793e+12
Tally is -61
TIMESTEP 19947 / EPSILON 0.9621007000000175 / ACTION 1 / REWARD 0 / Q_MAX 2.755573e+12
Tally is -61
TIMESTEP 19948 / EPSILON 0.9620988000000175 / ACTION 0 / REWARD 0 / Q_MAX 1.771502e+12
Tally is -61
TIMESTEP 19949 / EPSILON 0.9620969000000175 / ACTION 1 / REWARD 0 / Q_MAX 3.638059e+12
Tally is -61
TIMESTEP 19950 / EPSILON 0.9620950000000175 / ACTION 0 / REWARD 0 / Q_MAX 3.678483e+12
Tally is -61
TIMESTEP 19951 / EPSILON 0.9620931000000175 / ACTION 0 / REWARD 0 / Q_MAX 1.817982e+12
Tally is -61
TIMESTEP 19952 / EPSILON 0.9620912000000175 / ACTION 2 / REWARD 0 / Q_MAX 3.121191e+12
Tally is -61
TIMESTEP 19953 / EPSILON 0.9620893000000175 / ACTION 0 / REWARD 0 / Q_MAX 4.760306e+12
Tally is -61
TIMESTEP 19954 / EPSILON 0.9620874000000175 / ACTION 2 / REWARD 0 / Q_MAX 3.381883e+12
Tally is -61
TIMESTEP 19955 / EPSILON 0.9620855000000175 / ACTION 0 / REWARD 0 / Q_MAX 3.566169e+12


Tally is -61
TIMESTEP 20038 / EPSILON 0.9619278000000175 / ACTION 2 / REWARD 0 / Q_MAX 2.478721e+11
Tally is -61
TIMESTEP 20039 / EPSILON 0.9619259000000175 / ACTION 1 / REWARD 0 / Q_MAX 2.312912e+11
Tally is -61
TIMESTEP 20040 / EPSILON 0.9619240000000175 / ACTION 2 / REWARD 0 / Q_MAX -4.301638e+11
Tally is -61
TIMESTEP 20041 / EPSILON 0.9619221000000175 / ACTION 2 / REWARD 0 / Q_MAX 3.843776e+11
Tally is -61
TIMESTEP 20042 / EPSILON 0.9619202000000175 / ACTION 2 / REWARD 0 / Q_MAX 9.130032e+11
Tally is -61
TIMESTEP 20043 / EPSILON 0.9619183000000175 / ACTION 0 / REWARD 0 / Q_MAX 2.668889e+11
Tally is -61
TIMESTEP 20044 / EPSILON 0.9619164000000175 / ACTION 2 / REWARD 0 / Q_MAX 5.135803e+11
Tally is -61
TIMESTEP 20045 / EPSILON 0.9619145000000175 / ACTION 1 / REWARD 0 / Q_MAX 9.263268e+11
Tally is -61
TIMESTEP 20046 / EPSILON 0.9619126000000175 / ACTION 0 / REWARD 0 / Q_MAX 6.533132e+11
Tally is -61
TIMESTEP 20047 / EPSILON 0.9619107000000175 / ACTION 0 / REWARD 0 / Q_MAX 1.589509e+12

TIMESTEP 20130 / EPSILON 0.9617530000000176 / ACTION 2 / REWARD 0 / Q_MAX 4.163922e+12
Tally is -61
TIMESTEP 20131 / EPSILON 0.9617511000000176 / ACTION 1 / REWARD 0 / Q_MAX 3.833398e+12
Tally is -61
TIMESTEP 20132 / EPSILON 0.9617492000000176 / ACTION 0 / REWARD 0 / Q_MAX 4.046683e+12
Tally is -61
TIMESTEP 20133 / EPSILON 0.9617473000000176 / ACTION 1 / REWARD 0 / Q_MAX 2.680096e+12
Tally is -61
TIMESTEP 20134 / EPSILON 0.9617454000000176 / ACTION 0 / REWARD 0 / Q_MAX 4.623584e+12
Tally is -61
TIMESTEP 20135 / EPSILON 0.9617435000000176 / ACTION 1 / REWARD 0 / Q_MAX 2.335847e+12
Tally is -61
TIMESTEP 20136 / EPSILON 0.9617416000000176 / ACTION 2 / REWARD 0 / Q_MAX 3.176117e+12
Tally is -61
TIMESTEP 20137 / EPSILON 0.9617397000000176 / ACTION 2 / REWARD 0 / Q_MAX 3.059167e+12
Tally is -61
TIMESTEP 20138 / EPSILON 0.9617378000000176 / ACTION 0 / REWARD 0 / Q_MAX 3.479294e+12
Tally is -61
TIMESTEP 20139 / EPSILON 0.9617359000000176 / ACTION 2 / REWARD 0 / Q_MAX 3.958950e+12
Tally is -61


Tally is -62
TIMESTEP 20232 / EPSILON 0.9615592000000177 / ACTION 2 / REWARD 0 / Q_MAX 1.631090e+12
Tally is -62
TIMESTEP 20233 / EPSILON 0.9615573000000177 / ACTION 1 / REWARD 0 / Q_MAX 1.374776e+12
Tally is -62
TIMESTEP 20234 / EPSILON 0.9615554000000177 / ACTION 0 / REWARD 0 / Q_MAX 1.077458e+12
Tally is -62
TIMESTEP 20235 / EPSILON 0.9615535000000177 / ACTION 1 / REWARD 0 / Q_MAX 1.608211e+12
Tally is -62
TIMESTEP 20236 / EPSILON 0.9615516000000177 / ACTION 1 / REWARD 0 / Q_MAX 1.707810e+12
Tally is -62
TIMESTEP 20237 / EPSILON 0.9615497000000177 / ACTION 1 / REWARD 0 / Q_MAX 2.186700e+12
Tally is -62
TIMESTEP 20238 / EPSILON 0.9615478000000177 / ACTION 0 / REWARD 0 / Q_MAX 1.458463e+12
Tally is -62
TIMESTEP 20239 / EPSILON 0.9615459000000177 / ACTION 2 / REWARD 0 / Q_MAX 5.435347e+11
Tally is -62
TIMESTEP 20240 / EPSILON 0.9615440000000177 / ACTION 2 / REWARD 0 / Q_MAX 6.159560e+11
Tally is -62
TIMESTEP 20241 / EPSILON 0.9615421000000177 / ACTION 0 / REWARD 0 / Q_MAX 9.795020e+10


TIMESTEP 20346 / EPSILON 0.9613426000000178 / ACTION 2 / REWARD 0 / Q_MAX 3.823232e+12
Tally is -62
TIMESTEP 20347 / EPSILON 0.9613407000000178 / ACTION 1 / REWARD 0 / Q_MAX 2.753541e+12
Tally is -62
TIMESTEP 20348 / EPSILON 0.9613388000000178 / ACTION 1 / REWARD 0 / Q_MAX 2.362736e+12
Tally is -62
TIMESTEP 20349 / EPSILON 0.9613369000000178 / ACTION 1 / REWARD 0 / Q_MAX 3.449496e+12
Tally is -62
TIMESTEP 20350 / EPSILON 0.9613350000000178 / ACTION 0 / REWARD 0 / Q_MAX 3.826332e+12
Tally is -62
TIMESTEP 20351 / EPSILON 0.9613331000000178 / ACTION 1 / REWARD 0 / Q_MAX 2.955972e+12
Tally is -62
TIMESTEP 20352 / EPSILON 0.9613312000000178 / ACTION 0 / REWARD 0 / Q_MAX 4.236985e+12
Tally is -62
TIMESTEP 20353 / EPSILON 0.9613293000000178 / ACTION 0 / REWARD 0 / Q_MAX 4.949778e+12
Tally is -62
TIMESTEP 20354 / EPSILON 0.9613274000000178 / ACTION 1 / REWARD 0 / Q_MAX 3.124652e+12
Tally is -62
TIMESTEP 20355 / EPSILON 0.9613255000000178 / ACTION 0 / REWARD 0 / Q_MAX 2.939175e+12
Tally is -62


TIMESTEP 20459 / EPSILON 0.9611279000000179 / ACTION 1 / REWARD 0 / Q_MAX 6.412614e+11
Tally is -63
TIMESTEP 20460 / EPSILON 0.9611260000000179 / ACTION 2 / REWARD 0 / Q_MAX -1.238472e+11
Tally is -63
TIMESTEP 20461 / EPSILON 0.9611241000000179 / ACTION 2 / REWARD 0 / Q_MAX -2.126871e+11
Tally is -63
TIMESTEP 20462 / EPSILON 0.9611222000000179 / ACTION 1 / REWARD 0 / Q_MAX 2.717295e+12
Tally is -63
TIMESTEP 20463 / EPSILON 0.9611203000000179 / ACTION 2 / REWARD 0 / Q_MAX 3.227527e+12
Tally is -63
TIMESTEP 20464 / EPSILON 0.9611184000000179 / ACTION 2 / REWARD 0 / Q_MAX 2.573517e+12
Tally is -63
TIMESTEP 20465 / EPSILON 0.9611165000000179 / ACTION 0 / REWARD 0 / Q_MAX 1.489220e+12
Tally is -63
TIMESTEP 20466 / EPSILON 0.9611146000000179 / ACTION 1 / REWARD 0 / Q_MAX 1.485945e+12
Tally is -63
TIMESTEP 20467 / EPSILON 0.9611127000000179 / ACTION 2 / REWARD 0 / Q_MAX 3.015494e+12
Tally is -63
TIMESTEP 20468 / EPSILON 0.9611108000000179 / ACTION 1 / REWARD 0 / Q_MAX 2.578032e+12
Tally is -6

Tally is -63
TIMESTEP 20571 / EPSILON 0.960915100000018 / ACTION 2 / REWARD 0 / Q_MAX 6.960655e+11
Tally is -63
TIMESTEP 20572 / EPSILON 0.960913200000018 / ACTION 2 / REWARD 0 / Q_MAX 1.985172e+12
Tally is -63
TIMESTEP 20573 / EPSILON 0.960911300000018 / ACTION 1 / REWARD 0 / Q_MAX 1.971105e+12
Tally is -63
TIMESTEP 20574 / EPSILON 0.960909400000018 / ACTION 1 / REWARD 0 / Q_MAX 2.526117e+12
Tally is -63
TIMESTEP 20575 / EPSILON 0.960907500000018 / ACTION 1 / REWARD 0 / Q_MAX 2.275364e+12
Tally is -63
TIMESTEP 20576 / EPSILON 0.960905600000018 / ACTION 1 / REWARD 0 / Q_MAX 1.446745e+12
Tally is -63
TIMESTEP 20577 / EPSILON 0.960903700000018 / ACTION 1 / REWARD 0 / Q_MAX 1.634756e+12
Tally is -63
TIMESTEP 20578 / EPSILON 0.960901800000018 / ACTION 0 / REWARD 0 / Q_MAX 1.211996e+12
Tally is -63
TIMESTEP 20579 / EPSILON 0.960899900000018 / ACTION 1 / REWARD 0 / Q_MAX 1.092980e+12
Tally is -63
TIMESTEP 20580 / EPSILON 0.960898000000018 / ACTION 2 / REWARD 0 / Q_MAX 1.421871e+12
Tally is -

Tally is -64
TIMESTEP 20680 / EPSILON 0.9607080000000181 / ACTION 1 / REWARD 0 / Q_MAX 3.009438e+12
Tally is -64
TIMESTEP 20681 / EPSILON 0.9607061000000181 / ACTION 0 / REWARD 0 / Q_MAX 4.139713e+12
Tally is -64
TIMESTEP 20682 / EPSILON 0.9607042000000181 / ACTION 2 / REWARD 0 / Q_MAX 4.343430e+12
Tally is -64
TIMESTEP 20683 / EPSILON 0.9607023000000181 / ACTION 2 / REWARD 0 / Q_MAX 3.878095e+12
Tally is -64
TIMESTEP 20684 / EPSILON 0.9607004000000181 / ACTION 1 / REWARD 0 / Q_MAX 3.748077e+12
Tally is -64
TIMESTEP 20685 / EPSILON 0.9606985000000181 / ACTION 0 / REWARD 0 / Q_MAX 2.780873e+12
Tally is -64
TIMESTEP 20686 / EPSILON 0.9606966000000181 / ACTION 2 / REWARD 0 / Q_MAX 1.744413e+12
Tally is -64
TIMESTEP 20687 / EPSILON 0.9606947000000181 / ACTION 2 / REWARD 0 / Q_MAX 2.582915e+12
Tally is -64
TIMESTEP 20688 / EPSILON 0.9606928000000181 / ACTION 0 / REWARD 0 / Q_MAX 2.660654e+12
Tally is -64
TIMESTEP 20689 / EPSILON 0.9606909000000181 / ACTION 0 / REWARD 0 / Q_MAX 2.262480e+12


TIMESTEP 20790 / EPSILON 0.9604990000000182 / ACTION 2 / REWARD 0 / Q_MAX 6.365083e+11
Tally is -64
TIMESTEP 20791 / EPSILON 0.9604971000000182 / ACTION 1 / REWARD 0 / Q_MAX 5.456386e+11
Tally is -64
TIMESTEP 20792 / EPSILON 0.9604952000000182 / ACTION 2 / REWARD 0 / Q_MAX 9.851462e+11
Tally is -64
TIMESTEP 20793 / EPSILON 0.9604933000000182 / ACTION 2 / REWARD 0 / Q_MAX 1.331169e+12
Tally is -64
TIMESTEP 20794 / EPSILON 0.9604914000000182 / ACTION 0 / REWARD 0 / Q_MAX 1.166717e+12
Tally is -64
TIMESTEP 20795 / EPSILON 0.9604895000000182 / ACTION 2 / REWARD 0 / Q_MAX 1.000945e+12
Tally is -64
TIMESTEP 20796 / EPSILON 0.9604876000000182 / ACTION 0 / REWARD 0 / Q_MAX 8.105696e+11
Tally is -64
TIMESTEP 20797 / EPSILON 0.9604857000000182 / ACTION 0 / REWARD 0 / Q_MAX 7.826905e+11
Tally is -64
TIMESTEP 20798 / EPSILON 0.9604838000000182 / ACTION 1 / REWARD 0 / Q_MAX 7.862109e+11
Tally is -64
TIMESTEP 20799 / EPSILON 0.9604819000000182 / ACTION 2 / REWARD 0 / Q_MAX 8.144745e+11
Tally is -64


TIMESTEP 20904 / EPSILON 0.9602824000000183 / ACTION 2 / REWARD 0 / Q_MAX 3.387014e+12
Tally is -64
TIMESTEP 20905 / EPSILON 0.9602805000000183 / ACTION 1 / REWARD 0 / Q_MAX 2.482611e+12
Tally is -64
TIMESTEP 20906 / EPSILON 0.9602786000000183 / ACTION 2 / REWARD 0 / Q_MAX 2.321474e+12
Tally is -64
TIMESTEP 20907 / EPSILON 0.9602767000000183 / ACTION 2 / REWARD 0 / Q_MAX 1.703110e+12
Tally is -64
TIMESTEP 20908 / EPSILON 0.9602748000000183 / ACTION 0 / REWARD 0 / Q_MAX 2.046787e+12
Tally is -64
TIMESTEP 20909 / EPSILON 0.9602729000000183 / ACTION 1 / REWARD 0 / Q_MAX 2.108893e+12
Tally is -64
TIMESTEP 20910 / EPSILON 0.9602710000000183 / ACTION 0 / REWARD 0 / Q_MAX 1.237897e+12
Tally is -64
TIMESTEP 20911 / EPSILON 0.9602691000000183 / ACTION 2 / REWARD 0 / Q_MAX 1.806199e+12
Tally is -64
TIMESTEP 20912 / EPSILON 0.9602672000000183 / ACTION 2 / REWARD 0 / Q_MAX 1.330496e+12
Tally is -64
TIMESTEP 20913 / EPSILON 0.9602653000000183 / ACTION 1 / REWARD 0 / Q_MAX 1.869471e+12
Tally is -64


Tally is -65
TIMESTEP 21016 / EPSILON 0.9600696000000184 / ACTION 2 / REWARD 0 / Q_MAX 1.035508e+12
Tally is -65
TIMESTEP 21017 / EPSILON 0.9600677000000184 / ACTION 2 / REWARD 0 / Q_MAX 1.430035e+11
Tally is -65
TIMESTEP 21018 / EPSILON 0.9600658000000184 / ACTION 1 / REWARD 0 / Q_MAX 1.178021e+12
Tally is -65
TIMESTEP 21019 / EPSILON 0.9600639000000184 / ACTION 1 / REWARD 0 / Q_MAX 2.922496e+12
Tally is -65
TIMESTEP 21020 / EPSILON 0.9600620000000184 / ACTION 0 / REWARD 0 / Q_MAX 9.224340e+11
Tally is -65
TIMESTEP 21021 / EPSILON 0.9600601000000184 / ACTION 1 / REWARD 0 / Q_MAX 1.775908e+12
Tally is -65
TIMESTEP 21022 / EPSILON 0.9600582000000184 / ACTION 1 / REWARD 0 / Q_MAX 1.292115e+12
Tally is -65
TIMESTEP 21023 / EPSILON 0.9600563000000184 / ACTION 1 / REWARD 0 / Q_MAX 1.710366e+12
Tally is -65
TIMESTEP 21024 / EPSILON 0.9600544000000184 / ACTION 1 / REWARD 0 / Q_MAX 2.569462e+12
Tally is -65
TIMESTEP 21025 / EPSILON 0.9600525000000184 / ACTION 2 / REWARD 0 / Q_MAX 2.341542e+12


TIMESTEP 21128 / EPSILON 0.9598568000000185 / ACTION 0 / REWARD 0 / Q_MAX 2.304875e+12
Tally is -65
TIMESTEP 21129 / EPSILON 0.9598549000000185 / ACTION 2 / REWARD 0 / Q_MAX 2.937539e+12
Tally is -65
TIMESTEP 21130 / EPSILON 0.9598530000000185 / ACTION 2 / REWARD 0 / Q_MAX 5.095343e+12
Tally is -65
TIMESTEP 21131 / EPSILON 0.9598511000000185 / ACTION 1 / REWARD 0 / Q_MAX 4.090554e+12
Tally is -65
TIMESTEP 21132 / EPSILON 0.9598492000000185 / ACTION 2 / REWARD 0 / Q_MAX 4.768168e+12
Tally is -65
TIMESTEP 21133 / EPSILON 0.9598473000000185 / ACTION 1 / REWARD 0 / Q_MAX 3.307402e+12
Tally is -65
TIMESTEP 21134 / EPSILON 0.9598454000000185 / ACTION 0 / REWARD 0 / Q_MAX 1.310083e+12
Tally is -65
TIMESTEP 21135 / EPSILON 0.9598435000000185 / ACTION 0 / REWARD 0 / Q_MAX 2.077506e+12
Tally is -65
TIMESTEP 21136 / EPSILON 0.9598416000000185 / ACTION 0 / REWARD 0 / Q_MAX 2.486590e+12
Tally is -65
TIMESTEP 21137 / EPSILON 0.9598397000000185 / ACTION 2 / REWARD 0 / Q_MAX 3.069605e+12
Tally is -65


Tally is -66
TIMESTEP 21242 / EPSILON 0.9596402000000186 / ACTION 0 / REWARD 0 / Q_MAX 4.038498e+12
Tally is -66
TIMESTEP 21243 / EPSILON 0.9596383000000186 / ACTION 0 / REWARD 0 / Q_MAX 4.501292e+12
Tally is -66
TIMESTEP 21244 / EPSILON 0.9596364000000186 / ACTION 0 / REWARD 0 / Q_MAX 2.201217e+12
Tally is -66
TIMESTEP 21245 / EPSILON 0.9596345000000186 / ACTION 1 / REWARD 0 / Q_MAX 1.436153e+12
Tally is -66
TIMESTEP 21246 / EPSILON 0.9596326000000186 / ACTION 1 / REWARD 0 / Q_MAX 2.891236e+12
Tally is -66
TIMESTEP 21247 / EPSILON 0.9596307000000186 / ACTION 0 / REWARD 0 / Q_MAX 5.241005e+12
Tally is -66
TIMESTEP 21248 / EPSILON 0.9596288000000186 / ACTION 0 / REWARD 0 / Q_MAX 5.443522e+12
Tally is -66
TIMESTEP 21249 / EPSILON 0.9596269000000186 / ACTION 0 / REWARD 0 / Q_MAX 5.187738e+12
Tally is -66
TIMESTEP 21250 / EPSILON 0.9596250000000186 / ACTION 0 / REWARD 0 / Q_MAX 8.161052e+11
Tally is -66
TIMESTEP 21251 / EPSILON 0.9596231000000186 / ACTION 0 / REWARD 0 / Q_MAX 1.273752e+12


TIMESTEP 21344 / EPSILON 0.9594464000000187 / ACTION 0 / REWARD 0 / Q_MAX 6.868979e+11
Tally is -66
TIMESTEP 21345 / EPSILON 0.9594445000000187 / ACTION 1 / REWARD 0 / Q_MAX 1.289205e+12
Tally is -66
TIMESTEP 21346 / EPSILON 0.9594426000000187 / ACTION 1 / REWARD 0 / Q_MAX 3.921229e+12
Tally is -66
TIMESTEP 21347 / EPSILON 0.9594407000000187 / ACTION 0 / REWARD 0 / Q_MAX 3.317324e+12
Tally is -66
TIMESTEP 21348 / EPSILON 0.9594388000000187 / ACTION 1 / REWARD 0 / Q_MAX 2.259155e+12
Tally is -66
TIMESTEP 21349 / EPSILON 0.9594369000000187 / ACTION 2 / REWARD 0 / Q_MAX 1.581176e+12
Tally is -66
TIMESTEP 21350 / EPSILON 0.9594350000000187 / ACTION 0 / REWARD 0 / Q_MAX 2.132401e+12
Tally is -66
TIMESTEP 21351 / EPSILON 0.9594331000000187 / ACTION 0 / REWARD 0 / Q_MAX -3.409742e+11
Tally is -66
TIMESTEP 21352 / EPSILON 0.9594312000000187 / ACTION 0 / REWARD 0 / Q_MAX 1.635851e+12
Tally is -66
TIMESTEP 21353 / EPSILON 0.9594293000000187 / ACTION 0 / REWARD 0 / Q_MAX 1.317951e+12
Tally is -66

TIMESTEP 21437 / EPSILON 0.9592697000000188 / ACTION 0 / REWARD 0 / Q_MAX 2.130170e+12
Tally is -67
TIMESTEP 21438 / EPSILON 0.9592678000000188 / ACTION 2 / REWARD 0 / Q_MAX 1.901126e+12
Tally is -67
TIMESTEP 21439 / EPSILON 0.9592659000000188 / ACTION 2 / REWARD 0 / Q_MAX 2.952491e+12
Tally is -67
TIMESTEP 21440 / EPSILON 0.9592640000000188 / ACTION 0 / REWARD 0 / Q_MAX 3.295061e+12
Tally is -67
TIMESTEP 21441 / EPSILON 0.9592621000000188 / ACTION 2 / REWARD 0 / Q_MAX 2.380068e+12
Tally is -67
TIMESTEP 21442 / EPSILON 0.9592602000000188 / ACTION 2 / REWARD 0 / Q_MAX 1.341541e+12
Tally is -67
TIMESTEP 21443 / EPSILON 0.9592583000000188 / ACTION 1 / REWARD 0 / Q_MAX 1.237921e+12
Tally is -67
TIMESTEP 21444 / EPSILON 0.9592564000000188 / ACTION 2 / REWARD 0 / Q_MAX 3.398281e+12
Tally is -67
TIMESTEP 21445 / EPSILON 0.9592545000000188 / ACTION 0 / REWARD 0 / Q_MAX 4.319683e+12
Tally is -67
TIMESTEP 21446 / EPSILON 0.9592526000000188 / ACTION 0 / REWARD 0 / Q_MAX 3.534293e+12
Tally is -67


TIMESTEP 21549 / EPSILON 0.9590569000000189 / ACTION 2 / REWARD 0 / Q_MAX 6.347990e+11
Tally is -67
TIMESTEP 21550 / EPSILON 0.9590550000000189 / ACTION 2 / REWARD 0 / Q_MAX -1.477093e+11
Tally is -67
TIMESTEP 21551 / EPSILON 0.9590531000000189 / ACTION 1 / REWARD 0 / Q_MAX 9.547324e+11
Tally is -67
TIMESTEP 21552 / EPSILON 0.9590512000000189 / ACTION 0 / REWARD 0 / Q_MAX 7.939732e+11
Tally is -67
TIMESTEP 21553 / EPSILON 0.9590493000000189 / ACTION 0 / REWARD 0 / Q_MAX 3.584765e+11
Tally is -67
TIMESTEP 21554 / EPSILON 0.9590474000000189 / ACTION 1 / REWARD 0 / Q_MAX 7.338407e+11
Tally is -67
TIMESTEP 21555 / EPSILON 0.9590455000000189 / ACTION 0 / REWARD 0 / Q_MAX 1.669062e+12
Tally is -67
TIMESTEP 21556 / EPSILON 0.9590436000000189 / ACTION 2 / REWARD 0 / Q_MAX 1.057077e+12
Tally is -67
TIMESTEP 21557 / EPSILON 0.9590417000000189 / ACTION 2 / REWARD 0 / Q_MAX 8.151609e+11
Tally is -67
TIMESTEP 21558 / EPSILON 0.9590398000000189 / ACTION 1 / REWARD 0 / Q_MAX 1.072647e+12
Tally is -67

Tally is -67
TIMESTEP 21660 / EPSILON 0.958846000000019 / ACTION 2 / REWARD 0 / Q_MAX 4.716751e+11
Tally is -67
TIMESTEP 21661 / EPSILON 0.958844100000019 / ACTION 2 / REWARD 0 / Q_MAX 4.013901e+11
Tally is -67
TIMESTEP 21662 / EPSILON 0.958842200000019 / ACTION 2 / REWARD 0 / Q_MAX -1.455851e+11
Tally is -67
TIMESTEP 21663 / EPSILON 0.958840300000019 / ACTION 2 / REWARD 0 / Q_MAX -1.236625e+12
Tally is -67
TIMESTEP 21664 / EPSILON 0.958838400000019 / ACTION 2 / REWARD 0 / Q_MAX -1.135982e+12
Tally is -67
TIMESTEP 21665 / EPSILON 0.958836500000019 / ACTION 0 / REWARD 0 / Q_MAX -1.502330e+12
Tally is -67
TIMESTEP 21666 / EPSILON 0.958834600000019 / ACTION 0 / REWARD 0 / Q_MAX -1.396145e+12
Tally is -67
TIMESTEP 21667 / EPSILON 0.958832700000019 / ACTION 1 / REWARD 0 / Q_MAX -2.900707e+11
Tally is -67
TIMESTEP 21668 / EPSILON 0.958830800000019 / ACTION 2 / REWARD 0 / Q_MAX 5.693840e+11
Tally is -67
TIMESTEP 21669 / EPSILON 0.958828900000019 / ACTION 0 / REWARD 0 / Q_MAX -1.064776e+10
Tal

TIMESTEP 21752 / EPSILON 0.958671200000019 / ACTION 2 / REWARD 0 / Q_MAX 2.825473e+12
Tally is -68
TIMESTEP 21753 / EPSILON 0.958669300000019 / ACTION 0 / REWARD 0 / Q_MAX 2.750893e+12
Tally is -68
TIMESTEP 21754 / EPSILON 0.958667400000019 / ACTION 2 / REWARD 0 / Q_MAX 3.206123e+12
Tally is -68
TIMESTEP 21755 / EPSILON 0.958665500000019 / ACTION 0 / REWARD 0 / Q_MAX 3.749038e+12
Tally is -68
TIMESTEP 21756 / EPSILON 0.958663600000019 / ACTION 1 / REWARD 0 / Q_MAX 3.750150e+12
Tally is -68
TIMESTEP 21757 / EPSILON 0.958661700000019 / ACTION 1 / REWARD 0 / Q_MAX 3.877751e+12
Tally is -68
TIMESTEP 21758 / EPSILON 0.958659800000019 / ACTION 0 / REWARD 0 / Q_MAX 2.499629e+12
Tally is -68
TIMESTEP 21759 / EPSILON 0.958657900000019 / ACTION 2 / REWARD 0 / Q_MAX 2.550676e+12
Tally is -68
TIMESTEP 21760 / EPSILON 0.958656000000019 / ACTION 0 / REWARD 0 / Q_MAX 1.614192e+12
Tally is -68
TIMESTEP 21761 / EPSILON 0.958654100000019 / ACTION 2 / REWARD 0 / Q_MAX 9.284874e+11
Tally is -68
TIMESTEP 2

TIMESTEP 21842 / EPSILON 0.9585002000000191 / ACTION 2 / REWARD 0 / Q_MAX -4.628184e+11
Tally is -68
TIMESTEP 21843 / EPSILON 0.9584983000000191 / ACTION 1 / REWARD 0 / Q_MAX 4.783715e+10
Tally is -68
TIMESTEP 21844 / EPSILON 0.9584964000000191 / ACTION 0 / REWARD 0 / Q_MAX 8.828703e+11
Tally is -68
TIMESTEP 21845 / EPSILON 0.9584945000000191 / ACTION 2 / REWARD 0 / Q_MAX 2.333696e+12
Tally is -68
TIMESTEP 21846 / EPSILON 0.9584926000000191 / ACTION 1 / REWARD 0 / Q_MAX 2.705319e+12
Tally is -68
TIMESTEP 21847 / EPSILON 0.9584907000000191 / ACTION 1 / REWARD 0 / Q_MAX 1.063681e+12
Tally is -68
TIMESTEP 21848 / EPSILON 0.9584888000000191 / ACTION 1 / REWARD 0 / Q_MAX 6.278046e+11
Tally is -68
TIMESTEP 21849 / EPSILON 0.9584869000000191 / ACTION 2 / REWARD 0 / Q_MAX 1.001438e+12
Tally is -68
TIMESTEP 21850 / EPSILON 0.9584850000000191 / ACTION 2 / REWARD 0 / Q_MAX 1.908317e+12
Tally is -68
TIMESTEP 21851 / EPSILON 0.9584831000000191 / ACTION 0 / REWARD 0 / Q_MAX -1.540524e+11
Tally is -6

Tally is -68
TIMESTEP 21932 / EPSILON 0.9583292000000192 / ACTION 0 / REWARD 0 / Q_MAX -1.009099e+12
Tally is -68
TIMESTEP 21933 / EPSILON 0.9583273000000192 / ACTION 1 / REWARD 0 / Q_MAX -3.217661e+11
Tally is -68
TIMESTEP 21934 / EPSILON 0.9583254000000192 / ACTION 2 / REWARD 0 / Q_MAX -9.149567e+11
Tally is -69
TIMESTEP 21935 / EPSILON 0.9583235000000192 / ACTION 1 / REWARD -1 / Q_MAX -7.224338e+11
Tally is -69
TIMESTEP 21936 / EPSILON 0.9583216000000192 / ACTION 0 / REWARD 0 / Q_MAX -2.358136e+11
Tally is -69
TIMESTEP 21937 / EPSILON 0.9583197000000192 / ACTION 2 / REWARD 0 / Q_MAX 5.947898e+11
Tally is -69
TIMESTEP 21938 / EPSILON 0.9583178000000192 / ACTION 0 / REWARD 0 / Q_MAX 9.344834e+11
Tally is -69
TIMESTEP 21939 / EPSILON 0.9583159000000192 / ACTION 1 / REWARD 0 / Q_MAX 2.189879e+10
Tally is -69
TIMESTEP 21940 / EPSILON 0.9583140000000192 / ACTION 0 / REWARD 0 / Q_MAX 4.956332e+11
Tally is -69
TIMESTEP 21941 / EPSILON 0.9583121000000192 / ACTION 2 / REWARD 0 / Q_MAX 6.94796

TIMESTEP 22017 / EPSILON 0.9581677000000193 / ACTION 1 / REWARD 0 / Q_MAX 1.814330e+12
Tally is -69
TIMESTEP 22018 / EPSILON 0.9581658000000193 / ACTION 1 / REWARD 0 / Q_MAX 2.465787e+12
Tally is -69
TIMESTEP 22019 / EPSILON 0.9581639000000193 / ACTION 2 / REWARD 0 / Q_MAX 2.571397e+12
Tally is -69
TIMESTEP 22020 / EPSILON 0.9581620000000193 / ACTION 1 / REWARD 0 / Q_MAX 1.367077e+12
Tally is -69
TIMESTEP 22021 / EPSILON 0.9581601000000193 / ACTION 0 / REWARD 0 / Q_MAX 8.392025e+11
Tally is -69
TIMESTEP 22022 / EPSILON 0.9581582000000193 / ACTION 0 / REWARD 0 / Q_MAX 1.889719e+12
Tally is -69
TIMESTEP 22023 / EPSILON 0.9581563000000193 / ACTION 2 / REWARD 0 / Q_MAX 2.451072e+12
Tally is -69
TIMESTEP 22024 / EPSILON 0.9581544000000193 / ACTION 2 / REWARD 0 / Q_MAX 1.204790e+12
Tally is -69
TIMESTEP 22025 / EPSILON 0.9581525000000193 / ACTION 2 / REWARD 0 / Q_MAX -6.017833e+11
Tally is -69
TIMESTEP 22026 / EPSILON 0.9581506000000193 / ACTION 1 / REWARD 0 / Q_MAX 3.914489e+11
Tally is -69

Tally is -69
TIMESTEP 22128 / EPSILON 0.9579568000000194 / ACTION 2 / REWARD 0 / Q_MAX 1.888027e+12
Tally is -69
TIMESTEP 22129 / EPSILON 0.9579549000000194 / ACTION 1 / REWARD 0 / Q_MAX 3.779586e+12
Tally is -69
TIMESTEP 22130 / EPSILON 0.9579530000000194 / ACTION 1 / REWARD 0 / Q_MAX 1.797592e+12
Tally is -69
TIMESTEP 22131 / EPSILON 0.9579511000000194 / ACTION 2 / REWARD 0 / Q_MAX 3.075275e+12
Tally is -69
TIMESTEP 22132 / EPSILON 0.9579492000000194 / ACTION 1 / REWARD 0 / Q_MAX 2.891807e+12
Tally is -69
TIMESTEP 22133 / EPSILON 0.9579473000000194 / ACTION 2 / REWARD 0 / Q_MAX 2.763894e+12
Tally is -69
TIMESTEP 22134 / EPSILON 0.9579454000000194 / ACTION 2 / REWARD 0 / Q_MAX 2.846818e+12
Tally is -69
TIMESTEP 22135 / EPSILON 0.9579435000000194 / ACTION 1 / REWARD 0 / Q_MAX 2.545563e+12
Tally is -69
TIMESTEP 22136 / EPSILON 0.9579416000000194 / ACTION 0 / REWARD 0 / Q_MAX 8.270726e+11
Tally is -69
TIMESTEP 22137 / EPSILON 0.9579397000000194 / ACTION 1 / REWARD 0 / Q_MAX 1.106308e+12


TIMESTEP 22219 / EPSILON 0.9577839000000195 / ACTION 1 / REWARD 0 / Q_MAX 5.369099e+12
Tally is -69
TIMESTEP 22220 / EPSILON 0.9577820000000195 / ACTION 2 / REWARD 0 / Q_MAX 3.680050e+12
Tally is -69
TIMESTEP 22221 / EPSILON 0.9577801000000195 / ACTION 2 / REWARD 0 / Q_MAX 1.756313e+12
Tally is -69
TIMESTEP 22222 / EPSILON 0.9577782000000195 / ACTION 0 / REWARD 0 / Q_MAX 2.293956e+12
Tally is -69
TIMESTEP 22223 / EPSILON 0.9577763000000195 / ACTION 1 / REWARD 0 / Q_MAX 7.385762e+11
Tally is -69
TIMESTEP 22224 / EPSILON 0.9577744000000195 / ACTION 0 / REWARD 0 / Q_MAX 1.578958e+12
Tally is -69
TIMESTEP 22225 / EPSILON 0.9577725000000195 / ACTION 1 / REWARD 0 / Q_MAX 2.246933e+12
Tally is -69
TIMESTEP 22226 / EPSILON 0.9577706000000195 / ACTION 0 / REWARD 0 / Q_MAX 3.860604e+12
Tally is -69
TIMESTEP 22227 / EPSILON 0.9577687000000195 / ACTION 2 / REWARD 0 / Q_MAX 3.948694e+12
Tally is -69
TIMESTEP 22228 / EPSILON 0.9577668000000195 / ACTION 2 / REWARD 0 / Q_MAX 2.960916e+12
Tally is -69


TIMESTEP 22315 / EPSILON 0.9576015000000195 / ACTION 1 / REWARD 0 / Q_MAX -1.113206e+11
Tally is -69
TIMESTEP 22316 / EPSILON 0.9575996000000195 / ACTION 2 / REWARD 0 / Q_MAX 4.144395e+10
Tally is -69
TIMESTEP 22317 / EPSILON 0.9575977000000195 / ACTION 1 / REWARD 0 / Q_MAX -8.188616e+11
Tally is -69
TIMESTEP 22318 / EPSILON 0.9575958000000195 / ACTION 1 / REWARD 0 / Q_MAX 2.737877e+11
Tally is -69
TIMESTEP 22319 / EPSILON 0.9575939000000195 / ACTION 1 / REWARD 0 / Q_MAX -6.397896e+11
Tally is -69
TIMESTEP 22320 / EPSILON 0.9575920000000195 / ACTION 2 / REWARD 0 / Q_MAX 3.537916e+11
Tally is -69
TIMESTEP 22321 / EPSILON 0.9575901000000195 / ACTION 2 / REWARD 0 / Q_MAX 8.443285e+11
Tally is -69
TIMESTEP 22322 / EPSILON 0.9575882000000195 / ACTION 1 / REWARD 0 / Q_MAX 1.473961e+12
Tally is -69
TIMESTEP 22323 / EPSILON 0.9575863000000195 / ACTION 2 / REWARD 0 / Q_MAX 1.167062e+12
Tally is -69
TIMESTEP 22324 / EPSILON 0.9575844000000195 / ACTION 1 / REWARD 0 / Q_MAX 5.232608e+11
Tally is -

Tally is -69
TIMESTEP 22407 / EPSILON 0.9574267000000196 / ACTION 0 / REWARD 0 / Q_MAX 4.893318e+11
Tally is -69
TIMESTEP 22408 / EPSILON 0.9574248000000196 / ACTION 1 / REWARD 0 / Q_MAX 1.788172e+12
Tally is -69
TIMESTEP 22409 / EPSILON 0.9574229000000196 / ACTION 0 / REWARD 0 / Q_MAX 1.759655e+12
Tally is -69
TIMESTEP 22410 / EPSILON 0.9574210000000196 / ACTION 0 / REWARD 0 / Q_MAX 3.315928e+11
Tally is -69
TIMESTEP 22411 / EPSILON 0.9574191000000196 / ACTION 2 / REWARD 0 / Q_MAX 2.943490e+11
Tally is -69
TIMESTEP 22412 / EPSILON 0.9574172000000196 / ACTION 0 / REWARD 0 / Q_MAX -9.864302e+11
Tally is -69
TIMESTEP 22413 / EPSILON 0.9574153000000196 / ACTION 2 / REWARD 0 / Q_MAX -1.428373e+12
Tally is -69
TIMESTEP 22414 / EPSILON 0.9574134000000196 / ACTION 2 / REWARD 0 / Q_MAX -1.522150e+11
Tally is -69
TIMESTEP 22415 / EPSILON 0.9574115000000196 / ACTION 1 / REWARD 0 / Q_MAX 1.654556e+11
Tally is -69
TIMESTEP 22416 / EPSILON 0.9574096000000196 / ACTION 0 / REWARD 0 / Q_MAX 1.001552e+

Tally is -70
TIMESTEP 22498 / EPSILON 0.9572538000000197 / ACTION 1 / REWARD 0 / Q_MAX 1.522299e+12
Tally is -70
TIMESTEP 22499 / EPSILON 0.9572519000000197 / ACTION 2 / REWARD 0 / Q_MAX 7.938366e+11
Tally is -70
TIMESTEP 22500 / EPSILON 0.9572500000000197 / ACTION 0 / REWARD 0 / Q_MAX 1.275481e+12
Tally is -70
TIMESTEP 22501 / EPSILON 0.9572481000000197 / ACTION 1 / REWARD 0 / Q_MAX 2.157976e+12
Tally is -70
TIMESTEP 22502 / EPSILON 0.9572462000000197 / ACTION 1 / REWARD 0 / Q_MAX 1.475548e+12
Tally is -70
TIMESTEP 22503 / EPSILON 0.9572443000000197 / ACTION 0 / REWARD 0 / Q_MAX 1.084598e+12
Tally is -70
TIMESTEP 22504 / EPSILON 0.9572424000000197 / ACTION 2 / REWARD 0 / Q_MAX 2.282126e+12
Tally is -70
TIMESTEP 22505 / EPSILON 0.9572405000000197 / ACTION 1 / REWARD 0 / Q_MAX 1.375540e+12
Tally is -70
TIMESTEP 22506 / EPSILON 0.9572386000000197 / ACTION 0 / REWARD 0 / Q_MAX 5.368071e+11
Tally is -70
TIMESTEP 22507 / EPSILON 0.9572367000000197 / ACTION 2 / REWARD 0 / Q_MAX 1.766946e+12


Tally is -70
TIMESTEP 22585 / EPSILON 0.9570885000000198 / ACTION 1 / REWARD 0 / Q_MAX 2.115357e+12
Tally is -70
TIMESTEP 22586 / EPSILON 0.9570866000000198 / ACTION 0 / REWARD 0 / Q_MAX 2.482867e+12
Tally is -70
TIMESTEP 22587 / EPSILON 0.9570847000000198 / ACTION 2 / REWARD 0 / Q_MAX 8.351652e+11
Tally is -70
TIMESTEP 22588 / EPSILON 0.9570828000000198 / ACTION 2 / REWARD 0 / Q_MAX 2.036205e+12
Tally is -70
TIMESTEP 22589 / EPSILON 0.9570809000000198 / ACTION 0 / REWARD 0 / Q_MAX 1.566851e+12
Tally is -70
TIMESTEP 22590 / EPSILON 0.9570790000000198 / ACTION 0 / REWARD 0 / Q_MAX 1.265208e+12
Tally is -70
TIMESTEP 22591 / EPSILON 0.9570771000000198 / ACTION 1 / REWARD 0 / Q_MAX 1.653348e+12
Tally is -70
TIMESTEP 22592 / EPSILON 0.9570752000000198 / ACTION 0 / REWARD 0 / Q_MAX 2.304060e+12
Tally is -70
TIMESTEP 22593 / EPSILON 0.9570733000000198 / ACTION 2 / REWARD 0 / Q_MAX 2.769942e+12
Tally is -70
TIMESTEP 22594 / EPSILON 0.9570714000000198 / ACTION 0 / REWARD 0 / Q_MAX 2.190959e+12


Tally is -70
TIMESTEP 22676 / EPSILON 0.9569156000000199 / ACTION 2 / REWARD 0 / Q_MAX -6.726149e+11
Tally is -70
TIMESTEP 22677 / EPSILON 0.9569137000000199 / ACTION 2 / REWARD 0 / Q_MAX 2.488883e+11
Tally is -70
TIMESTEP 22678 / EPSILON 0.9569118000000199 / ACTION 2 / REWARD 0 / Q_MAX -4.929883e+10
Tally is -70
TIMESTEP 22679 / EPSILON 0.9569099000000199 / ACTION 1 / REWARD 0 / Q_MAX 1.978080e+11
Tally is -70
TIMESTEP 22680 / EPSILON 0.9569080000000199 / ACTION 1 / REWARD 0 / Q_MAX -7.351970e+11
Tally is -70
TIMESTEP 22681 / EPSILON 0.9569061000000199 / ACTION 0 / REWARD 0 / Q_MAX 6.903683e+11
Tally is -70
TIMESTEP 22682 / EPSILON 0.9569042000000199 / ACTION 0 / REWARD 0 / Q_MAX 1.268659e+12
Tally is -70
TIMESTEP 22683 / EPSILON 0.9569023000000199 / ACTION 1 / REWARD 0 / Q_MAX 2.312975e+12
Tally is -70
TIMESTEP 22684 / EPSILON 0.9569004000000199 / ACTION 1 / REWARD 0 / Q_MAX 2.186369e+12
Tally is -71
TIMESTEP 22685 / EPSILON 0.9568985000000199 / ACTION 1 / REWARD -1 / Q_MAX 2.027298e

TIMESTEP 22766 / EPSILON 0.9567446000000199 / ACTION 1 / REWARD 0 / Q_MAX 1.664354e+11
Tally is -71
TIMESTEP 22767 / EPSILON 0.9567427000000199 / ACTION 2 / REWARD 0 / Q_MAX 6.989057e+10
Tally is -71
TIMESTEP 22768 / EPSILON 0.9567408000000199 / ACTION 1 / REWARD 0 / Q_MAX -4.514130e+10
Tally is -71
TIMESTEP 22769 / EPSILON 0.9567389000000199 / ACTION 1 / REWARD 0 / Q_MAX 6.297386e+11
Tally is -71
TIMESTEP 22770 / EPSILON 0.9567370000000199 / ACTION 2 / REWARD 0 / Q_MAX 7.696739e+11
Tally is -71
TIMESTEP 22771 / EPSILON 0.9567351000000199 / ACTION 0 / REWARD 0 / Q_MAX 8.377793e+11
Tally is -71
TIMESTEP 22772 / EPSILON 0.9567332000000199 / ACTION 1 / REWARD 0 / Q_MAX 2.861279e+12
Tally is -71
TIMESTEP 22773 / EPSILON 0.9567313000000199 / ACTION 0 / REWARD 0 / Q_MAX 1.606089e+12
Tally is -71
TIMESTEP 22774 / EPSILON 0.9567294000000199 / ACTION 0 / REWARD 0 / Q_MAX 1.523382e+12
Tally is -71
TIMESTEP 22775 / EPSILON 0.9567275000000199 / ACTION 1 / REWARD 0 / Q_MAX 1.497095e+12
Tally is -71

Tally is -71
TIMESTEP 22850 / EPSILON 0.95658500000002 / ACTION 0 / REWARD 0 / Q_MAX 2.053026e+12
Tally is -71
TIMESTEP 22851 / EPSILON 0.95658310000002 / ACTION 2 / REWARD 0 / Q_MAX 1.676780e+12
Tally is -71
TIMESTEP 22852 / EPSILON 0.95658120000002 / ACTION 0 / REWARD 0 / Q_MAX -8.106173e+11
Tally is -71
TIMESTEP 22853 / EPSILON 0.95657930000002 / ACTION 1 / REWARD 0 / Q_MAX 1.020128e+12
Tally is -71
TIMESTEP 22854 / EPSILON 0.95657740000002 / ACTION 1 / REWARD 0 / Q_MAX 1.378187e+12
Tally is -71
TIMESTEP 22855 / EPSILON 0.95657550000002 / ACTION 0 / REWARD 0 / Q_MAX 2.488302e+12
Tally is -71
TIMESTEP 22856 / EPSILON 0.95657360000002 / ACTION 1 / REWARD 0 / Q_MAX 1.846040e+12
Tally is -71
TIMESTEP 22857 / EPSILON 0.95657170000002 / ACTION 2 / REWARD 0 / Q_MAX 7.738649e+11
Tally is -71
TIMESTEP 22858 / EPSILON 0.95656980000002 / ACTION 1 / REWARD 0 / Q_MAX 1.326293e+12
Tally is -71
TIMESTEP 22859 / EPSILON 0.95656790000002 / ACTION 1 / REWARD 0 / Q_MAX 5.479213e+11
Tally is -71
TIMEST

Tally is -71
TIMESTEP 22960 / EPSILON 0.9563760000000201 / ACTION 2 / REWARD 0 / Q_MAX 6.125386e+11
Tally is -71
TIMESTEP 22961 / EPSILON 0.9563741000000201 / ACTION 0 / REWARD 0 / Q_MAX 5.016125e+11
Tally is -71
TIMESTEP 22962 / EPSILON 0.9563722000000201 / ACTION 1 / REWARD 0 / Q_MAX 6.482672e+11
Tally is -71
TIMESTEP 22963 / EPSILON 0.9563703000000201 / ACTION 1 / REWARD 0 / Q_MAX -1.237860e+10
Tally is -71
TIMESTEP 22964 / EPSILON 0.9563684000000201 / ACTION 1 / REWARD 0 / Q_MAX 4.764472e+10
Tally is -71
TIMESTEP 22965 / EPSILON 0.9563665000000201 / ACTION 1 / REWARD 0 / Q_MAX 1.308029e+12
Tally is -71
TIMESTEP 22966 / EPSILON 0.9563646000000201 / ACTION 1 / REWARD 0 / Q_MAX 1.964113e+11
Tally is -71
TIMESTEP 22967 / EPSILON 0.9563627000000201 / ACTION 2 / REWARD 0 / Q_MAX 3.211924e+11
Tally is -71
TIMESTEP 22968 / EPSILON 0.9563608000000201 / ACTION 0 / REWARD 0 / Q_MAX 3.976668e+11
Tally is -71
TIMESTEP 22969 / EPSILON 0.9563589000000201 / ACTION 1 / REWARD 0 / Q_MAX 9.254738e+11

TIMESTEP 23065 / EPSILON 0.9561765000000202 / ACTION 1 / REWARD 0 / Q_MAX 6.607761e+11
Tally is -71
TIMESTEP 23066 / EPSILON 0.9561746000000202 / ACTION 2 / REWARD 0 / Q_MAX 1.245033e+12
Tally is -71
TIMESTEP 23067 / EPSILON 0.9561727000000202 / ACTION 2 / REWARD 0 / Q_MAX 1.410252e+12
Tally is -71
TIMESTEP 23068 / EPSILON 0.9561708000000202 / ACTION 0 / REWARD 0 / Q_MAX 2.175257e+11
Tally is -71
TIMESTEP 23069 / EPSILON 0.9561689000000202 / ACTION 0 / REWARD 0 / Q_MAX 4.949418e+11
Tally is -71
TIMESTEP 23070 / EPSILON 0.9561670000000202 / ACTION 1 / REWARD 0 / Q_MAX -4.400695e+11
Tally is -71
TIMESTEP 23071 / EPSILON 0.9561651000000202 / ACTION 1 / REWARD 0 / Q_MAX 1.853941e+12
Tally is -71
TIMESTEP 23072 / EPSILON 0.9561632000000202 / ACTION 2 / REWARD 0 / Q_MAX 9.624667e+11
Tally is -71
TIMESTEP 23073 / EPSILON 0.9561613000000202 / ACTION 1 / REWARD 0 / Q_MAX 1.052201e+12
Tally is -71
TIMESTEP 23074 / EPSILON 0.9561594000000202 / ACTION 1 / REWARD 0 / Q_MAX 5.497917e+11
Tally is -71

TIMESTEP 23160 / EPSILON 0.9559960000000203 / ACTION 0 / REWARD 0 / Q_MAX 6.530470e+11
Tally is -71
TIMESTEP 23161 / EPSILON 0.9559941000000203 / ACTION 0 / REWARD 0 / Q_MAX 1.165177e+12
Tally is -71
TIMESTEP 23162 / EPSILON 0.9559922000000203 / ACTION 1 / REWARD 0 / Q_MAX 3.095418e+11
Tally is -71
TIMESTEP 23163 / EPSILON 0.9559903000000203 / ACTION 1 / REWARD 0 / Q_MAX 7.996950e+10
Tally is -71
TIMESTEP 23164 / EPSILON 0.9559884000000203 / ACTION 1 / REWARD 0 / Q_MAX 4.518902e+10
Tally is -71
TIMESTEP 23165 / EPSILON 0.9559865000000203 / ACTION 2 / REWARD 0 / Q_MAX 5.713988e+11
Tally is -71
TIMESTEP 23166 / EPSILON 0.9559846000000203 / ACTION 2 / REWARD 0 / Q_MAX 4.291914e+11
Tally is -71
TIMESTEP 23167 / EPSILON 0.9559827000000203 / ACTION 0 / REWARD 0 / Q_MAX 1.761387e+11
Tally is -71
TIMESTEP 23168 / EPSILON 0.9559808000000203 / ACTION 0 / REWARD 0 / Q_MAX 2.388497e+11
Tally is -71
TIMESTEP 23169 / EPSILON 0.9559789000000203 / ACTION 2 / REWARD 0 / Q_MAX 1.384024e+11
Tally is -71


TIMESTEP 23269 / EPSILON 0.9557889000000204 / ACTION 0 / REWARD 0 / Q_MAX 2.032504e+12
Tally is -72
TIMESTEP 23270 / EPSILON 0.9557870000000204 / ACTION 1 / REWARD 0 / Q_MAX 3.870317e+12
Tally is -72
TIMESTEP 23271 / EPSILON 0.9557851000000204 / ACTION 2 / REWARD 0 / Q_MAX 1.135371e+12
Tally is -72
TIMESTEP 23272 / EPSILON 0.9557832000000204 / ACTION 2 / REWARD 0 / Q_MAX 4.241974e+11
Tally is -72
TIMESTEP 23273 / EPSILON 0.9557813000000204 / ACTION 0 / REWARD 0 / Q_MAX 9.296794e+11
Tally is -72
TIMESTEP 23274 / EPSILON 0.9557794000000204 / ACTION 1 / REWARD 0 / Q_MAX 1.290334e+12
Tally is -72
TIMESTEP 23275 / EPSILON 0.9557775000000204 / ACTION 1 / REWARD 0 / Q_MAX 3.611116e+11
Tally is -72
TIMESTEP 23276 / EPSILON 0.9557756000000204 / ACTION 0 / REWARD 0 / Q_MAX 4.183329e+11
Tally is -72
TIMESTEP 23277 / EPSILON 0.9557737000000204 / ACTION 2 / REWARD 0 / Q_MAX -3.305515e+11
Tally is -72
TIMESTEP 23278 / EPSILON 0.9557718000000204 / ACTION 1 / REWARD 0 / Q_MAX -7.794875e+11
Tally is -7

Tally is -72
TIMESTEP 23362 / EPSILON 0.9556122000000205 / ACTION 2 / REWARD 0 / Q_MAX 2.065452e+12
Tally is -72
TIMESTEP 23363 / EPSILON 0.9556103000000205 / ACTION 1 / REWARD 0 / Q_MAX 1.424537e+12
Tally is -72
TIMESTEP 23364 / EPSILON 0.9556084000000205 / ACTION 0 / REWARD 0 / Q_MAX 1.588125e+12
Tally is -72
TIMESTEP 23365 / EPSILON 0.9556065000000205 / ACTION 1 / REWARD 0 / Q_MAX 9.819368e+11
Tally is -72
TIMESTEP 23366 / EPSILON 0.9556046000000205 / ACTION 2 / REWARD 0 / Q_MAX 1.704612e+12
Tally is -72
TIMESTEP 23367 / EPSILON 0.9556027000000205 / ACTION 2 / REWARD 0 / Q_MAX 1.759777e+12
Tally is -72
TIMESTEP 23368 / EPSILON 0.9556008000000205 / ACTION 1 / REWARD 0 / Q_MAX 2.702631e+12
Tally is -72
TIMESTEP 23369 / EPSILON 0.9555989000000205 / ACTION 2 / REWARD 0 / Q_MAX 2.314308e+12
Tally is -72
TIMESTEP 23370 / EPSILON 0.9555970000000205 / ACTION 2 / REWARD 0 / Q_MAX 2.577606e+12
Tally is -72
TIMESTEP 23371 / EPSILON 0.9555951000000205 / ACTION 1 / REWARD 0 / Q_MAX 3.920580e+12


Tally is -73
TIMESTEP 23462 / EPSILON 0.9554222000000205 / ACTION 1 / REWARD 0 / Q_MAX 2.369620e+12
Tally is -73
TIMESTEP 23463 / EPSILON 0.9554203000000205 / ACTION 0 / REWARD 0 / Q_MAX 2.731727e+12
Tally is -73
TIMESTEP 23464 / EPSILON 0.9554184000000205 / ACTION 2 / REWARD 0 / Q_MAX 2.178220e+12
Tally is -73
TIMESTEP 23465 / EPSILON 0.9554165000000205 / ACTION 0 / REWARD 0 / Q_MAX 2.422440e+12
Tally is -73
TIMESTEP 23466 / EPSILON 0.9554146000000205 / ACTION 1 / REWARD 0 / Q_MAX 2.178169e+12
Tally is -73
TIMESTEP 23467 / EPSILON 0.9554127000000205 / ACTION 0 / REWARD 0 / Q_MAX 8.134791e+11
Tally is -73
TIMESTEP 23468 / EPSILON 0.9554108000000205 / ACTION 2 / REWARD 0 / Q_MAX 7.887298e+11
Tally is -73
TIMESTEP 23469 / EPSILON 0.9554089000000205 / ACTION 0 / REWARD 0 / Q_MAX 8.815851e+11
Tally is -73
TIMESTEP 23470 / EPSILON 0.9554070000000205 / ACTION 1 / REWARD 0 / Q_MAX 3.560462e+12
Tally is -73
TIMESTEP 23471 / EPSILON 0.9554051000000205 / ACTION 2 / REWARD 0 / Q_MAX 4.008401e+12


Tally is -73
TIMESTEP 23560 / EPSILON 0.9552360000000206 / ACTION 1 / REWARD 0 / Q_MAX 1.306791e+12
Tally is -73
TIMESTEP 23561 / EPSILON 0.9552341000000206 / ACTION 2 / REWARD 0 / Q_MAX 1.122673e+12
Tally is -73
TIMESTEP 23562 / EPSILON 0.9552322000000206 / ACTION 1 / REWARD 0 / Q_MAX 8.958318e+11
Tally is -73
TIMESTEP 23563 / EPSILON 0.9552303000000206 / ACTION 0 / REWARD 0 / Q_MAX 1.080822e+12
Tally is -73
TIMESTEP 23564 / EPSILON 0.9552284000000206 / ACTION 0 / REWARD 0 / Q_MAX 1.219619e+12
Tally is -73
TIMESTEP 23565 / EPSILON 0.9552265000000206 / ACTION 1 / REWARD 0 / Q_MAX 2.609031e+11
Tally is -73
TIMESTEP 23566 / EPSILON 0.9552246000000206 / ACTION 0 / REWARD 0 / Q_MAX -2.910587e+11
Tally is -73
TIMESTEP 23567 / EPSILON 0.9552227000000206 / ACTION 2 / REWARD 0 / Q_MAX -2.399515e+10
Tally is -73
TIMESTEP 23568 / EPSILON 0.9552208000000206 / ACTION 2 / REWARD 0 / Q_MAX 8.336913e+11
Tally is -73
TIMESTEP 23569 / EPSILON 0.9552189000000206 / ACTION 1 / REWARD 0 / Q_MAX 3.906502e+1

TIMESTEP 23658 / EPSILON 0.9550498000000207 / ACTION 0 / REWARD 0 / Q_MAX 6.153456e+12
Tally is -73
TIMESTEP 23659 / EPSILON 0.9550479000000207 / ACTION 2 / REWARD 0 / Q_MAX 5.463896e+12
Tally is -73
TIMESTEP 23660 / EPSILON 0.9550460000000207 / ACTION 0 / REWARD 0 / Q_MAX 5.874049e+12
Tally is -73
TIMESTEP 23661 / EPSILON 0.9550441000000207 / ACTION 2 / REWARD 0 / Q_MAX 3.912807e+12
Tally is -73
TIMESTEP 23662 / EPSILON 0.9550422000000207 / ACTION 2 / REWARD 0 / Q_MAX 2.455164e+12
Tally is -73
TIMESTEP 23663 / EPSILON 0.9550403000000207 / ACTION 0 / REWARD 0 / Q_MAX 2.863596e+12
Tally is -73
TIMESTEP 23664 / EPSILON 0.9550384000000207 / ACTION 2 / REWARD 0 / Q_MAX 1.596847e+12
Tally is -73
TIMESTEP 23665 / EPSILON 0.9550365000000207 / ACTION 1 / REWARD 0 / Q_MAX 1.966284e+12
Tally is -73
TIMESTEP 23666 / EPSILON 0.9550346000000207 / ACTION 1 / REWARD 0 / Q_MAX 1.668461e+12
Tally is -73
TIMESTEP 23667 / EPSILON 0.9550327000000207 / ACTION 0 / REWARD 0 / Q_MAX 3.827844e+12
Tally is -73


KeyboardInterrupt: 