In [6]:
import tensorflow as tf
import cv2
import sys
import pong_fun as game
import random
import time 
import numpy as np
from collections import deque

In [7]:
epsilon = 1e-9
iter_routing = 1
train_freq = 20

In [8]:
def squash(vector):
    vec_squared_norm = reduce_sum(tf.square(vector), -2, keepdims=True)
    scalar_factor = vec_squared_norm / (1 + vec_squared_norm) / tf.sqrt(vec_squared_norm + epsilon)
    vec_squashed = scalar_factor * vector  # element-wise
    return(vec_squashed)
def routing(input, b_IJ):
    # W: [1, num_caps_i, num_caps_j * len_v_j, len_u_j, 1]
    W = tf.get_variable('Weight', shape=(1, 1152, 160, 8, 1), dtype=tf.float32,
                        initializer=tf.random_normal_initializer(stddev=0.01))
    biases = tf.get_variable('bias', shape=(1, 1, 10, 16, 1))

    # Eq.2, calc u_hat
    # Since tf.matmul is a time-consuming op,
    # A better solution is using element-wise multiply, reduce_sum and reshape
    # ops instead. Matmul [a, b] x [b, c] is equal to a series ops as
    # element-wise multiply [a*c, b] * [a*c, b], reduce_sum at axis=1 and
    # reshape to [a, c]
    input = tf.tile(input, [1, 1, 160, 1, 1])
    #assert input.get_shape() == [cfg.batch_size, 1152, 160, 8, 1]

    u_hat = reduce_sum(W * input, axis=3, keepdims=True)
    u_hat = tf.reshape(u_hat, shape=[-1, 1152, 10, 16, 1])
    #assert u_hat.get_shape() == [cfg.batch_size, 1152, 10, 16, 1]

    # In forward, u_hat_stopped = u_hat; in backward, no gradient passed back from u_hat_stopped to u_hat
    u_hat_stopped = tf.stop_gradient(u_hat, name='stop_gradient')

    # line 3,for r iterations do
    for r_iter in range(iter_routing):
        with tf.variable_scope('iter_' + str(r_iter)):
            # line 4:
            # => [batch_size, 1152, 10, 1, 1]
            c_IJ = softmax(b_IJ, axis=2)

            # At last iteration, use `u_hat` in order to receive gradients from the following graph
            if r_iter == iter_routing - 1:
                # line 5:
                # weighting u_hat with c_IJ, element-wise in the last two dims
                # => [batch_size, 1152, 10, 16, 1]
                s_J = tf.multiply(c_IJ, u_hat)
                # then sum in the second dim, resulting in [batch_size, 1, 10, 16, 1]
                s_J = reduce_sum(s_J, axis=1, keepdims=True) + biases
                #assert s_J.get_shape() == [cfg.batch_size, 1, 10, 16, 1]

                # line 6:
                # squash using Eq.1,
                v_J = squash(s_J)
                #assert v_J.get_shape() == [cfg.batch_size, 1, 10, 16, 1]
            elif r_iter < iter_routing - 1:  # Inner iterations, do not apply backpropagation
                s_J = tf.multiply(c_IJ, u_hat_stopped)
                s_J = reduce_sum(s_J, axis=1, keepdims=True) + biases
                v_J = squash(s_J)

                # line 7:
                # reshape & tile v_j from [batch_size ,1, 10, 16, 1] to [batch_size, 1152, 10, 16, 1]
                # then matmul in the last tow dim: [16, 1].T x [16, 1] => [1, 1], reduce mean in the
                # batch_size dim, resulting in [1, 1152, 10, 1, 1]
                v_J_tiled = tf.tile(v_J, [1, 1152, 1, 1, 1])
                u_produce_v = reduce_sum(u_hat_stopped * v_J_tiled, axis=3, keepdims=True)
                #assert u_produce_v.get_shape() == [cfg.batch_size, 1152, 10, 1, 1]

                # b_IJ += tf.reduce_sum(u_produce_v, axis=0, keep_dims=True)
                b_IJ += u_produce_v

    return(v_J)
# For version compatibility
def reduce_sum(input_tensor, axis=None, keepdims=False):
    try:
        return tf.reduce_sum(input_tensor, axis=axis, keepdims=keepdims)
    except:
        return tf.reduce_sum(input_tensor, axis=axis, keep_dims=keepdims)
# For version compatibility
def softmax(logits, axis=None):
    try:
        return tf.nn.softmax(logits, axis=axis)
    except:
        return tf.nn.softmax(logits, dim=axis)

In [9]:
GAME = 'pong' # the name of the game being played for log files
ACTIONS = 6 # number of valid actions
GAMMA = 0.99 # decay rate of past observations
OBSERVE = 1000. # timesteps to observe before training
EXPLORE = 5000. # frames over which to anneal epsilon
FINAL_EPSILON = 0.05 # final value of epsilon
INITIAL_EPSILON = 1.0 # starting value of epsilon
REPLAY_MEMORY = 100000 # number of previous transitions to remember
BATCH = 32 # size of minibatch
K = 1 # only select an action every Kth frame, repeat prev for others

In [10]:
def createNetwork():
    # input layer
    s= tf.placeholder("float", [None, 84, 84, 4])
    coeff = tf.placeholder(tf.float32, shape=(None, 1152, 10, 1, 1))
    ####################### New Network Configuration #####################    
    w_initializer, b_initializer = tf.random_normal_initializer(0., 0.01), tf.constant_initializer(0.01)
    w1 = tf.get_variable('w1',[8, 8, 4, 256],initializer=w_initializer)
    b1 = tf.get_variable('b1',[256],initializer=b_initializer)
    # Conv1, [batch_size, 20, 20, 256]
    l1 = tf.nn.conv2d(s, w1, strides=[1, 4, 4, 1], padding="VALID")
    conv1 = tf.nn.relu(tf.nn.bias_add(l1, b1))
    conv1 = tf.reshape(conv1,[-1,20,20,256])
    capsules = tf.contrib.layers.conv2d(conv1, 32 * 8,kernel_size=9, stride=2, padding="VALID",
                    activation_fn = tf.nn.relu,
                    weights_initializer = tf.contrib.layers.xavier_initializer(uniform=False),
                    biases_initializer=tf.constant_initializer(0))
    
    capsules = tf.reshape(capsules, (-1, 1152, 8, 1)) #Reshape to(batch_szie, 1152, 8, 1)
    capsules = squash(capsules)
    input_fc = tf.reshape(capsules, shape=(-1, 1152, 1, capsules.shape[-2].value, 1))
    caps2 = routing(input_fc, coeff)
    vector_j = tf.reshape(caps2, shape=(-1, 160))
    fc1 = tf.contrib.layers.fully_connected(vector_j, num_outputs=30, activation_fn=tf.nn.relu)
    q_eval = tf.contrib.layers.fully_connected(fc1, num_outputs=ACTIONS, activation_fn=None)
    readout = q_eval
    return s, coeff, readout

In [11]:
def trainNetwork(s, coeff, readout, sess):
    tick = time.time()
    # define the cost function
    a = tf.placeholder("float", [None, ACTIONS])
    y = tf.placeholder("float", [None])
    readout_action = tf.reduce_sum(tf.multiply(readout, a), reduction_indices = 1)
    cost = tf.reduce_mean(tf.square(y - readout_action))
    train_step = tf.train.AdamOptimizer(1e-4).minimize(cost)

    # open up a game state to communicate with emulator
    game_state = game.GameState()
    # store the previous observations in replay memory
    D = deque()

    # printing
#     a_file = open("logs_" + GAME + "/readout.txt", 'w')
#     h_file = open("logs_" + GAME + "/hidden.txt", 'w')

    # get the first state by doing nothing and preprocess the image to 80x80x4
    do_nothing = np.zeros(ACTIONS)
    do_nothing[0] = 1
    x_t, r_0, terminal, bar1_score, bar2_score = game_state.frame_step(do_nothing)
    x_t = cv2.cvtColor(cv2.resize(x_t, (84, 84)), cv2.COLOR_BGR2GRAY)
    ret, x_t = cv2.threshold(x_t,1,255,cv2.THRESH_BINARY)
    s_t = np.stack((x_t, x_t, x_t, x_t), axis = 2)  
    # saving and loading networks
    saver = tf.train.Saver()
    #sess.run(tf.initialize_all_variables())
    sess.run(tf.global_variables_initializer())

    b_IJ1 = np.zeros((1, 1152, 10, 1, 1)).astype(np.float32) # batch_size=1
    b_IJ2 = np.zeros((BATCH, 1152, 10, 1, 1)).astype(np.float32) # batch_size=BATCH
    epsilon = INITIAL_EPSILON
    t = 0
    episode = 0
    while "pigs" != "fly":
        # choose an action epsilon greedily
        # readout_t = readout.eval(feed_dict = {s : [s_t].reshape((1,80,80,4))})[0]
        
        readout_t = readout.eval(feed_dict = {s:s_t.reshape((1,84,84,4)), coeff:b_IJ1})
        
        a_t = np.zeros([ACTIONS])
        action_index = 0
        if random.random() <= epsilon or t <= OBSERVE:
            action_index = random.randrange(ACTIONS)
            a_t[action_index] = 1
        else:
            action_index = np.argmax(readout_t)
            a_t[action_index] = 1

        # scale down epsilon
        if epsilon > FINAL_EPSILON and t > OBSERVE:
            epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE

        for i in range(0, K):
            # run the selected action and observe next state and reward
            x_t1_col, r_t, terminal, bar1_score, bar2_score = game_state.frame_step(a_t)
            if(terminal == 1):
                episode +=1
            x_t1 = cv2.cvtColor(cv2.resize(x_t1_col, (84, 84)), cv2.COLOR_BGR2GRAY)
            ret, x_t1 = cv2.threshold(x_t1,1,255,cv2.THRESH_BINARY)
            x_t1 = np.reshape(x_t1, (84, 84, 1))
            s_t1 = np.append(x_t1, s_t[:,:,0:3], axis = 2)

            # store the transition in D
            D.append((s_t, a_t, r_t, s_t1, terminal))
            if len(D) > REPLAY_MEMORY:
                D.popleft()
        
        # only train if done observing
        if t > OBSERVE and t%train_freq==0:
            # sample a minibatch to train on
            minibatch = random.sample(D, BATCH)

            # get the batch variables
            s_j_batch = [d[0] for d in minibatch]
            a_batch = [d[1] for d in minibatch]
            r_batch = [d[2] for d in minibatch]
            s_j1_batch = [d[3] for d in minibatch]

            y_batch = []
            readout_j1_batch = readout.eval(feed_dict = {s:s_j1_batch, coeff:b_IJ2 })
            #readout_j1_batch = readout.eval(feed_dict = {s : s_j1_batch})
            for i in range(0, len(minibatch)):
                # if terminal only equals reward
                if minibatch[i][4]:
                    y_batch.append(r_batch[i])
                else:
                    y_batch.append(r_batch[i] + GAMMA * np.max(readout_j1_batch[i]))

            # perform gradient step
            train_step.run(feed_dict = {
                y : y_batch,
                a : a_batch,
                s : s_j_batch,
                coeff: b_IJ2})

        # update the old values
        s_t = s_t1
        t += 1

        # save progress every 10000 iterations
        if t % 10000 == 0:
            saver.save(sess, 'saved_networks/' + GAME + '-dqn', global_step = t)
        if r_t!= 0:
            print ("TIMESTEP", t, "/ EPISODE", episode, "/ bar1_score", bar1_score, "/ bar2_score", bar2_score, "/ REWARD", r_t, "/ Q_MAX %e" % np.max(readout_t))

        if(bar1_score - bar2_score > 17): 
            print("Game_Ends_in Time:",int(time.time() - tick))
            break;   
        if(bar1_score - bar2_score > 16):
            print("Game_Mid_in Time:",int(time.time() - tick))

In [12]:
def playGame():
    tf.reset_default_graph()
    sess = tf.InteractiveSession()
    s, coeff, readout = createNetwork()
    trainNetwork(s, coeff, readout, sess)

In [None]:
def main():
    playGame()

if __name__ == "__main__":
    tick = time.time()
    main()
    print("Game_Ends_in Time:",int(time.time() - tick))
    print("____________ END HERE _____________")

TIMESTEP 297 / EPISODE 0 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX 5.432077e-02
TIMESTEP 343 / EPISODE 0 / bar1_score 0 / bar2_score 2 / REWARD -1 / Q_MAX 5.572042e-02
TIMESTEP 389 / EPISODE 0 / bar1_score 0 / bar2_score 3 / REWARD -1 / Q_MAX 5.557521e-02
TIMESTEP 435 / EPISODE 0 / bar1_score 0 / bar2_score 4 / REWARD -1 / Q_MAX 5.894655e-02
TIMESTEP 566 / EPISODE 0 / bar1_score 1 / bar2_score 4 / REWARD 1 / Q_MAX 5.353391e-02
TIMESTEP 695 / EPISODE 0 / bar1_score 1 / bar2_score 5 / REWARD -1 / Q_MAX 4.421923e-02
TIMESTEP 741 / EPISODE 0 / bar1_score 1 / bar2_score 6 / REWARD -1 / Q_MAX 4.946889e-02
TIMESTEP 787 / EPISODE 0 / bar1_score 1 / bar2_score 7 / REWARD -1 / Q_MAX 5.191492e-02
TIMESTEP 833 / EPISODE 0 / bar1_score 1 / bar2_score 8 / REWARD -1 / Q_MAX 4.733207e-02
TIMESTEP 879 / EPISODE 0 / bar1_score 1 / bar2_score 9 / REWARD -1 / Q_MAX 4.732193e-02
TIMESTEP 925 / EPISODE 0 / bar1_score 1 / bar2_score 10 / REWARD -1 / Q_MAX 5.138817e-02
TIMESTEP 971 / EPISODE 0 / bar1_

TIMESTEP 5868 / EPISODE 4 / bar1_score 2 / bar2_score 8 / REWARD -1 / Q_MAX 1.750634e+00
TIMESTEP 5914 / EPISODE 4 / bar1_score 2 / bar2_score 9 / REWARD -1 / Q_MAX 1.772846e+00
TIMESTEP 5960 / EPISODE 4 / bar1_score 2 / bar2_score 10 / REWARD -1 / Q_MAX 1.759824e+00
TIMESTEP 6006 / EPISODE 4 / bar1_score 2 / bar2_score 11 / REWARD -1 / Q_MAX 1.764301e+00
TIMESTEP 6052 / EPISODE 4 / bar1_score 2 / bar2_score 12 / REWARD -1 / Q_MAX 1.795268e+00
TIMESTEP 6098 / EPISODE 4 / bar1_score 2 / bar2_score 13 / REWARD -1 / Q_MAX 1.813326e+00
TIMESTEP 6144 / EPISODE 4 / bar1_score 2 / bar2_score 14 / REWARD -1 / Q_MAX 1.785871e+00
TIMESTEP 6190 / EPISODE 4 / bar1_score 2 / bar2_score 15 / REWARD -1 / Q_MAX 1.797716e+00
TIMESTEP 6236 / EPISODE 4 / bar1_score 2 / bar2_score 16 / REWARD -1 / Q_MAX 1.799374e+00
TIMESTEP 6282 / EPISODE 4 / bar1_score 2 / bar2_score 17 / REWARD -1 / Q_MAX 1.840274e+00
TIMESTEP 6328 / EPISODE 4 / bar1_score 2 / bar2_score 18 / REWARD -1 / Q_MAX 1.821532e+00
TIMESTEP 637

TIMESTEP 10100 / EPISODE 9 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX 1.845489e+00
TIMESTEP 10146 / EPISODE 9 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX 1.865047e+00
TIMESTEP 10192 / EPISODE 9 / bar1_score 0 / bar2_score 2 / REWARD -1 / Q_MAX 1.849055e+00
TIMESTEP 10238 / EPISODE 9 / bar1_score 0 / bar2_score 3 / REWARD -1 / Q_MAX 1.855969e+00
TIMESTEP 10284 / EPISODE 9 / bar1_score 0 / bar2_score 4 / REWARD -1 / Q_MAX 1.868932e+00
TIMESTEP 10330 / EPISODE 9 / bar1_score 0 / bar2_score 5 / REWARD -1 / Q_MAX 1.871479e+00
TIMESTEP 10376 / EPISODE 9 / bar1_score 0 / bar2_score 6 / REWARD -1 / Q_MAX 1.880121e+00
TIMESTEP 10422 / EPISODE 9 / bar1_score 0 / bar2_score 7 / REWARD -1 / Q_MAX 1.884903e+00
TIMESTEP 10468 / EPISODE 9 / bar1_score 0 / bar2_score 8 / REWARD -1 / Q_MAX 1.856364e+00
TIMESTEP 10514 / EPISODE 9 / bar1_score 0 / bar2_score 9 / REWARD -1 / Q_MAX 1.887941e+00
TIMESTEP 10560 / EPISODE 9 / bar1_score 0 / bar2_score 10 / REWARD -1 / Q_MAX 1.889826e+00
TIMESTEP 

TIMESTEP 14410 / EPISODE 13 / bar1_score 0 / bar2_score 10 / REWARD -1 / Q_MAX 1.773180e+00
TIMESTEP 14456 / EPISODE 13 / bar1_score 0 / bar2_score 11 / REWARD -1 / Q_MAX 1.747119e+00
TIMESTEP 14502 / EPISODE 13 / bar1_score 0 / bar2_score 12 / REWARD -1 / Q_MAX 1.694298e+00
TIMESTEP 14548 / EPISODE 13 / bar1_score 0 / bar2_score 13 / REWARD -1 / Q_MAX 1.752405e+00
TIMESTEP 14594 / EPISODE 13 / bar1_score 0 / bar2_score 14 / REWARD -1 / Q_MAX 1.684252e+00
TIMESTEP 14640 / EPISODE 13 / bar1_score 0 / bar2_score 15 / REWARD -1 / Q_MAX 1.731348e+00
TIMESTEP 14686 / EPISODE 13 / bar1_score 0 / bar2_score 16 / REWARD -1 / Q_MAX 1.721743e+00
TIMESTEP 14732 / EPISODE 13 / bar1_score 0 / bar2_score 17 / REWARD -1 / Q_MAX 1.752140e+00
TIMESTEP 14778 / EPISODE 13 / bar1_score 0 / bar2_score 18 / REWARD -1 / Q_MAX 1.695087e+00
TIMESTEP 14824 / EPISODE 13 / bar1_score 0 / bar2_score 19 / REWARD -1 / Q_MAX 1.716999e+00
TIMESTEP 14870 / EPISODE 14 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX 1.

TIMESTEP 18717 / EPISODE 18 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX 7.562318e-01
TIMESTEP 18763 / EPISODE 18 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX 1.022988e+00
TIMESTEP 18809 / EPISODE 18 / bar1_score 0 / bar2_score 2 / REWARD -1 / Q_MAX 7.956601e-01
TIMESTEP 18855 / EPISODE 18 / bar1_score 0 / bar2_score 3 / REWARD -1 / Q_MAX 9.642301e-01
TIMESTEP 18901 / EPISODE 18 / bar1_score 0 / bar2_score 4 / REWARD -1 / Q_MAX 6.317900e-01
TIMESTEP 18947 / EPISODE 18 / bar1_score 0 / bar2_score 5 / REWARD -1 / Q_MAX 7.259265e-01
TIMESTEP 18993 / EPISODE 18 / bar1_score 0 / bar2_score 6 / REWARD -1 / Q_MAX 5.988713e-01
TIMESTEP 19039 / EPISODE 18 / bar1_score 0 / bar2_score 7 / REWARD -1 / Q_MAX 6.056297e-01
TIMESTEP 19085 / EPISODE 18 / bar1_score 0 / bar2_score 8 / REWARD -1 / Q_MAX 5.005429e-01
TIMESTEP 19131 / EPISODE 18 / bar1_score 0 / bar2_score 9 / REWARD -1 / Q_MAX 7.166536e-01
TIMESTEP 19177 / EPISODE 18 / bar1_score 0 / bar2_score 10 / REWARD -1 / Q_MAX 3.896469e-0

TIMESTEP 24362 / EPISODE 22 / bar1_score 0 / bar2_score 4 / REWARD -1 / Q_MAX 1.453021e-01
TIMESTEP 24408 / EPISODE 22 / bar1_score 0 / bar2_score 5 / REWARD -1 / Q_MAX 6.156236e-02
TIMESTEP 24454 / EPISODE 22 / bar1_score 0 / bar2_score 6 / REWARD -1 / Q_MAX 1.006253e-01
TIMESTEP 24500 / EPISODE 22 / bar1_score 0 / bar2_score 7 / REWARD -1 / Q_MAX 4.259358e-02
TIMESTEP 24629 / EPISODE 22 / bar1_score 1 / bar2_score 7 / REWARD 1 / Q_MAX 3.560825e-01
TIMESTEP 25010 / EPISODE 22 / bar1_score 2 / bar2_score 7 / REWARD 1 / Q_MAX 1.284367e+00
TIMESTEP 25139 / EPISODE 22 / bar1_score 2 / bar2_score 8 / REWARD -1 / Q_MAX 3.506041e-01
TIMESTEP 25185 / EPISODE 22 / bar1_score 2 / bar2_score 9 / REWARD -1 / Q_MAX 1.555950e-01
TIMESTEP 25231 / EPISODE 22 / bar1_score 2 / bar2_score 10 / REWARD -1 / Q_MAX 9.604405e-02
TIMESTEP 25277 / EPISODE 22 / bar1_score 2 / bar2_score 11 / REWARD -1 / Q_MAX 4.012000e-02
TIMESTEP 25493 / EPISODE 22 / bar1_score 2 / bar2_score 12 / REWARD -1 / Q_MAX 1.258624e-0

TIMESTEP 30007 / EPISODE 26 / bar1_score 1 / bar2_score 9 / REWARD -1 / Q_MAX 1.180973e-01
TIMESTEP 30053 / EPISODE 26 / bar1_score 1 / bar2_score 10 / REWARD -1 / Q_MAX 2.021727e-01
TIMESTEP 30099 / EPISODE 26 / bar1_score 1 / bar2_score 11 / REWARD -1 / Q_MAX 9.463099e-03
TIMESTEP 30145 / EPISODE 26 / bar1_score 1 / bar2_score 12 / REWARD -1 / Q_MAX 1.630988e-01
TIMESTEP 30191 / EPISODE 26 / bar1_score 1 / bar2_score 13 / REWARD -1 / Q_MAX 6.209858e-03
TIMESTEP 30237 / EPISODE 26 / bar1_score 1 / bar2_score 14 / REWARD -1 / Q_MAX 1.855178e-01
TIMESTEP 30283 / EPISODE 26 / bar1_score 1 / bar2_score 15 / REWARD -1 / Q_MAX 4.314234e-02
TIMESTEP 30329 / EPISODE 26 / bar1_score 1 / bar2_score 16 / REWARD -1 / Q_MAX 2.379767e-01
TIMESTEP 30375 / EPISODE 26 / bar1_score 1 / bar2_score 17 / REWARD -1 / Q_MAX 4.286413e-02
TIMESTEP 30421 / EPISODE 26 / bar1_score 1 / bar2_score 18 / REWARD -1 / Q_MAX 2.146032e-01
TIMESTEP 30467 / EPISODE 26 / bar1_score 1 / bar2_score 19 / REWARD -1 / Q_MAX 4.

TIMESTEP 34313 / EPISODE 30 / bar1_score 0 / bar2_score 18 / REWARD -1 / Q_MAX 3.848639e-01
TIMESTEP 34359 / EPISODE 30 / bar1_score 0 / bar2_score 19 / REWARD -1 / Q_MAX 4.971894e-01
TIMESTEP 34405 / EPISODE 31 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX 4.715449e-01
TIMESTEP 34451 / EPISODE 31 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX 6.201456e-01
TIMESTEP 34667 / EPISODE 31 / bar1_score 0 / bar2_score 2 / REWARD -1 / Q_MAX 4.625811e-01
TIMESTEP 34713 / EPISODE 31 / bar1_score 0 / bar2_score 3 / REWARD -1 / Q_MAX 3.959948e-01
TIMESTEP 34759 / EPISODE 31 / bar1_score 0 / bar2_score 4 / REWARD -1 / Q_MAX 2.801125e-01
TIMESTEP 34805 / EPISODE 31 / bar1_score 0 / bar2_score 5 / REWARD -1 / Q_MAX 5.304052e-01
TIMESTEP 34851 / EPISODE 31 / bar1_score 0 / bar2_score 6 / REWARD -1 / Q_MAX 2.852614e-01
TIMESTEP 34897 / EPISODE 31 / bar1_score 0 / bar2_score 7 / REWARD -1 / Q_MAX 3.342793e-01
TIMESTEP 34943 / EPISODE 31 / bar1_score 0 / bar2_score 8 / REWARD -1 / Q_MAX 2.408130e-

TIMESTEP 40634 / EPISODE 35 / bar1_score 0 / bar2_score 4 / REWARD -1 / Q_MAX -1.142972e-01
TIMESTEP 40680 / EPISODE 35 / bar1_score 0 / bar2_score 5 / REWARD -1 / Q_MAX -7.055755e-02
TIMESTEP 40811 / EPISODE 35 / bar1_score 1 / bar2_score 5 / REWARD 1 / Q_MAX 6.055011e-01
TIMESTEP 40940 / EPISODE 35 / bar1_score 1 / bar2_score 6 / REWARD -1 / Q_MAX 1.348721e-01
TIMESTEP 40986 / EPISODE 35 / bar1_score 1 / bar2_score 7 / REWARD -1 / Q_MAX -1.502848e-01
TIMESTEP 41032 / EPISODE 35 / bar1_score 1 / bar2_score 8 / REWARD -1 / Q_MAX -8.713339e-02
TIMESTEP 41078 / EPISODE 35 / bar1_score 1 / bar2_score 9 / REWARD -1 / Q_MAX -1.188495e-02
TIMESTEP 41124 / EPISODE 35 / bar1_score 1 / bar2_score 10 / REWARD -1 / Q_MAX 3.051452e-02
TIMESTEP 41170 / EPISODE 35 / bar1_score 1 / bar2_score 11 / REWARD -1 / Q_MAX 5.843036e-02
TIMESTEP 41216 / EPISODE 35 / bar1_score 1 / bar2_score 12 / REWARD -1 / Q_MAX 1.318717e-01
TIMESTEP 41262 / EPISODE 35 / bar1_score 1 / bar2_score 13 / REWARD -1 / Q_MAX 2.52

TIMESTEP 46029 / EPISODE 39 / bar1_score 1 / bar2_score 8 / REWARD 1 / Q_MAX 1.378677e+00
TIMESTEP 46158 / EPISODE 39 / bar1_score 1 / bar2_score 9 / REWARD -1 / Q_MAX 5.385173e-01
TIMESTEP 46204 / EPISODE 39 / bar1_score 1 / bar2_score 10 / REWARD -1 / Q_MAX 2.431911e-01
TIMESTEP 46250 / EPISODE 39 / bar1_score 1 / bar2_score 11 / REWARD -1 / Q_MAX 3.050806e-02
TIMESTEP 46296 / EPISODE 39 / bar1_score 1 / bar2_score 12 / REWARD -1 / Q_MAX 6.249023e-02
TIMESTEP 46342 / EPISODE 39 / bar1_score 1 / bar2_score 13 / REWARD -1 / Q_MAX -4.332213e-02
TIMESTEP 46388 / EPISODE 39 / bar1_score 1 / bar2_score 14 / REWARD -1 / Q_MAX -9.935632e-02
TIMESTEP 46434 / EPISODE 39 / bar1_score 1 / bar2_score 15 / REWARD -1 / Q_MAX -1.515787e-01
TIMESTEP 46480 / EPISODE 39 / bar1_score 1 / bar2_score 16 / REWARD -1 / Q_MAX -1.229086e-01
TIMESTEP 46526 / EPISODE 39 / bar1_score 1 / bar2_score 17 / REWARD -1 / Q_MAX -1.589173e-01
TIMESTEP 46655 / EPISODE 39 / bar1_score 2 / bar2_score 17 / REWARD 1 / Q_MAX 

TIMESTEP 53258 / EPISODE 43 / bar1_score 0 / bar2_score 5 / REWARD -1 / Q_MAX 5.568653e-01
TIMESTEP 53555 / EPISODE 43 / bar1_score 1 / bar2_score 5 / REWARD 1 / Q_MAX 1.680803e+00
TIMESTEP 53684 / EPISODE 43 / bar1_score 1 / bar2_score 6 / REWARD -1 / Q_MAX 4.450262e-01
TIMESTEP 53730 / EPISODE 43 / bar1_score 1 / bar2_score 7 / REWARD -1 / Q_MAX 5.819950e-01
TIMESTEP 53776 / EPISODE 43 / bar1_score 1 / bar2_score 8 / REWARD -1 / Q_MAX 3.580932e-01
TIMESTEP 53905 / EPISODE 43 / bar1_score 2 / bar2_score 8 / REWARD 1 / Q_MAX 1.820768e+00
TIMESTEP 54034 / EPISODE 43 / bar1_score 2 / bar2_score 9 / REWARD -1 / Q_MAX 1.505799e+00
TIMESTEP 54165 / EPISODE 43 / bar1_score 3 / bar2_score 9 / REWARD 1 / Q_MAX 1.971153e+00
TIMESTEP 54294 / EPISODE 43 / bar1_score 3 / bar2_score 10 / REWARD -1 / Q_MAX 1.699378e+00
TIMESTEP 54340 / EPISODE 43 / bar1_score 3 / bar2_score 11 / REWARD -1 / Q_MAX 1.254316e+00
TIMESTEP 54556 / EPISODE 43 / bar1_score 3 / bar2_score 12 / REWARD -1 / Q_MAX 1.252304e+00

TIMESTEP 59658 / EPISODE 47 / bar1_score 1 / bar2_score 9 / REWARD 1 / Q_MAX 2.440398e+00
TIMESTEP 59955 / EPISODE 47 / bar1_score 1 / bar2_score 10 / REWARD -1 / Q_MAX 2.204106e+00
TIMESTEP 60001 / EPISODE 47 / bar1_score 1 / bar2_score 11 / REWARD -1 / Q_MAX 1.812380e+00
TIMESTEP 60047 / EPISODE 47 / bar1_score 1 / bar2_score 12 / REWARD -1 / Q_MAX 2.288057e+00
TIMESTEP 60093 / EPISODE 47 / bar1_score 1 / bar2_score 13 / REWARD -1 / Q_MAX 1.913383e+00
TIMESTEP 60139 / EPISODE 47 / bar1_score 1 / bar2_score 14 / REWARD -1 / Q_MAX 2.010189e+00
TIMESTEP 60185 / EPISODE 47 / bar1_score 1 / bar2_score 15 / REWARD -1 / Q_MAX 1.965687e+00
TIMESTEP 60231 / EPISODE 47 / bar1_score 1 / bar2_score 16 / REWARD -1 / Q_MAX 2.269712e+00
TIMESTEP 60277 / EPISODE 47 / bar1_score 1 / bar2_score 17 / REWARD -1 / Q_MAX 1.977503e+00
TIMESTEP 60323 / EPISODE 47 / bar1_score 1 / bar2_score 18 / REWARD -1 / Q_MAX 2.389852e+00
TIMESTEP 60369 / EPISODE 47 / bar1_score 1 / bar2_score 19 / REWARD -1 / Q_MAX 1.9

TIMESTEP 67903 / EPISODE 51 / bar1_score 1 / bar2_score 8 / REWARD -1 / Q_MAX 1.031041e+00
TIMESTEP 67949 / EPISODE 51 / bar1_score 1 / bar2_score 9 / REWARD -1 / Q_MAX 1.005658e+00
TIMESTEP 68080 / EPISODE 51 / bar1_score 2 / bar2_score 9 / REWARD 1 / Q_MAX 1.868395e+00
TIMESTEP 68209 / EPISODE 51 / bar1_score 2 / bar2_score 10 / REWARD -1 / Q_MAX 1.163726e+00
TIMESTEP 68255 / EPISODE 51 / bar1_score 2 / bar2_score 11 / REWARD -1 / Q_MAX 7.551835e-01
TIMESTEP 68301 / EPISODE 51 / bar1_score 2 / bar2_score 12 / REWARD -1 / Q_MAX 6.316646e-01
TIMESTEP 68347 / EPISODE 51 / bar1_score 2 / bar2_score 13 / REWARD -1 / Q_MAX 4.535344e-01
TIMESTEP 68560 / EPISODE 51 / bar1_score 2 / bar2_score 14 / REWARD -1 / Q_MAX 4.420430e-02
TIMESTEP 68691 / EPISODE 51 / bar1_score 3 / bar2_score 14 / REWARD 1 / Q_MAX 9.654494e-01
TIMESTEP 68989 / EPISODE 51 / bar1_score 3 / bar2_score 15 / REWARD -1 / Q_MAX 5.133685e-01
TIMESTEP 69035 / EPISODE 51 / bar1_score 3 / bar2_score 16 / REWARD -1 / Q_MAX -5.529

TIMESTEP 76064 / EPISODE 55 / bar1_score 0 / bar2_score 3 / REWARD -1 / Q_MAX 1.477939e+00
TIMESTEP 76110 / EPISODE 55 / bar1_score 0 / bar2_score 4 / REWARD -1 / Q_MAX 1.493278e+00
TIMESTEP 76156 / EPISODE 55 / bar1_score 0 / bar2_score 5 / REWARD -1 / Q_MAX 1.471877e+00
TIMESTEP 76202 / EPISODE 55 / bar1_score 0 / bar2_score 6 / REWARD -1 / Q_MAX 1.284216e+00
TIMESTEP 76248 / EPISODE 55 / bar1_score 0 / bar2_score 7 / REWARD -1 / Q_MAX 1.219297e+00
TIMESTEP 76294 / EPISODE 55 / bar1_score 0 / bar2_score 8 / REWARD -1 / Q_MAX 1.220341e+00
TIMESTEP 76340 / EPISODE 55 / bar1_score 0 / bar2_score 9 / REWARD -1 / Q_MAX 1.131372e+00
TIMESTEP 76469 / EPISODE 55 / bar1_score 1 / bar2_score 9 / REWARD 1 / Q_MAX 2.102273e+00
TIMESTEP 76850 / EPISODE 55 / bar1_score 2 / bar2_score 9 / REWARD 1 / Q_MAX 1.717816e+00
TIMESTEP 76979 / EPISODE 55 / bar1_score 2 / bar2_score 10 / REWARD -1 / Q_MAX 5.481012e-01
TIMESTEP 77025 / EPISODE 55 / bar1_score 2 / bar2_score 11 / REWARD -1 / Q_MAX 3.604468e-01

TIMESTEP 83792 / EPISODE 58 / bar1_score 4 / bar2_score 19 / REWARD 1 / Q_MAX 1.422203e+00
TIMESTEP 83921 / EPISODE 59 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX 1.068823e-01
TIMESTEP 84050 / EPISODE 59 / bar1_score 1 / bar2_score 0 / REWARD 1 / Q_MAX 1.428907e+00
TIMESTEP 84179 / EPISODE 59 / bar1_score 1 / bar2_score 1 / REWARD -1 / Q_MAX 9.195888e-02
TIMESTEP 84225 / EPISODE 59 / bar1_score 1 / bar2_score 2 / REWARD -1 / Q_MAX -1.200066e-01
TIMESTEP 84271 / EPISODE 59 / bar1_score 1 / bar2_score 3 / REWARD -1 / Q_MAX -2.371202e-02
TIMESTEP 84400 / EPISODE 59 / bar1_score 2 / bar2_score 3 / REWARD 1 / Q_MAX 1.555145e+00
TIMESTEP 84529 / EPISODE 59 / bar1_score 2 / bar2_score 4 / REWARD -1 / Q_MAX 4.631709e-02
TIMESTEP 84575 / EPISODE 59 / bar1_score 2 / bar2_score 5 / REWARD -1 / Q_MAX 6.331070e-02
TIMESTEP 84788 / EPISODE 59 / bar1_score 2 / bar2_score 6 / REWARD -1 / Q_MAX 4.715785e-01
TIMESTEP 84834 / EPISODE 59 / bar1_score 2 / bar2_score 7 / REWARD -1 / Q_MAX 5.623037e-01

TIMESTEP 91777 / EPISODE 62 / bar1_score 6 / bar2_score 17 / REWARD 1 / Q_MAX 2.352682e+00
TIMESTEP 91906 / EPISODE 62 / bar1_score 6 / bar2_score 18 / REWARD -1 / Q_MAX 1.865490e+00
TIMESTEP 91952 / EPISODE 62 / bar1_score 6 / bar2_score 19 / REWARD -1 / Q_MAX 1.370791e+00
TIMESTEP 91998 / EPISODE 63 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX 1.015761e+00
TIMESTEP 92044 / EPISODE 63 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX 1.296107e+00
TIMESTEP 92342 / EPISODE 63 / bar1_score 1 / bar2_score 1 / REWARD 1 / Q_MAX 2.006364e+00
TIMESTEP 92471 / EPISODE 63 / bar1_score 1 / bar2_score 2 / REWARD -1 / Q_MAX 5.132386e-01
TIMESTEP 92517 / EPISODE 63 / bar1_score 1 / bar2_score 3 / REWARD -1 / Q_MAX 8.498440e-01
TIMESTEP 92563 / EPISODE 63 / bar1_score 1 / bar2_score 4 / REWARD -1 / Q_MAX 7.172071e-01
TIMESTEP 92609 / EPISODE 63 / bar1_score 1 / bar2_score 5 / REWARD -1 / Q_MAX 8.296441e-01
TIMESTEP 92655 / EPISODE 63 / bar1_score 1 / bar2_score 6 / REWARD -1 / Q_MAX 8.552026e-0

TIMESTEP 99507 / EPISODE 66 / bar1_score 3 / bar2_score 14 / REWARD -1 / Q_MAX 3.183298e-02
TIMESTEP 99553 / EPISODE 66 / bar1_score 3 / bar2_score 15 / REWARD -1 / Q_MAX -3.839172e-02
TIMESTEP 99851 / EPISODE 66 / bar1_score 4 / bar2_score 15 / REWARD 1 / Q_MAX 1.397203e+00
TIMESTEP 99980 / EPISODE 66 / bar1_score 4 / bar2_score 16 / REWARD -1 / Q_MAX 2.849456e-01
TIMESTEP 100109 / EPISODE 66 / bar1_score 5 / bar2_score 16 / REWARD 1 / Q_MAX 1.671831e+00
TIMESTEP 100238 / EPISODE 66 / bar1_score 5 / bar2_score 17 / REWARD -1 / Q_MAX 9.482387e-02
TIMESTEP 100284 / EPISODE 66 / bar1_score 5 / bar2_score 18 / REWARD -1 / Q_MAX 3.699630e-03
TIMESTEP 100330 / EPISODE 66 / bar1_score 5 / bar2_score 19 / REWARD -1 / Q_MAX -5.517529e-03
TIMESTEP 100461 / EPISODE 66 / bar1_score 6 / bar2_score 19 / REWARD 1 / Q_MAX 1.461918e+00
TIMESTEP 100590 / EPISODE 67 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX -6.159531e-02
TIMESTEP 100719 / EPISODE 67 / bar1_score 1 / bar2_score 0 / REWARD 1 / Q_M

TIMESTEP 108620 / EPISODE 70 / bar1_score 2 / bar2_score 4 / REWARD -1 / Q_MAX 1.156627e+00
TIMESTEP 108666 / EPISODE 70 / bar1_score 2 / bar2_score 5 / REWARD -1 / Q_MAX 1.479694e+00
TIMESTEP 108795 / EPISODE 70 / bar1_score 3 / bar2_score 5 / REWARD 1 / Q_MAX 2.625345e+00
TIMESTEP 108924 / EPISODE 70 / bar1_score 3 / bar2_score 6 / REWARD -1 / Q_MAX 1.251756e+00
TIMESTEP 108970 / EPISODE 70 / bar1_score 3 / bar2_score 7 / REWARD -1 / Q_MAX 1.093047e+00
TIMESTEP 109016 / EPISODE 70 / bar1_score 3 / bar2_score 8 / REWARD -1 / Q_MAX 1.342420e+00
TIMESTEP 109148 / EPISODE 70 / bar1_score 4 / bar2_score 8 / REWARD 1 / Q_MAX 2.552212e+00
TIMESTEP 109277 / EPISODE 70 / bar1_score 4 / bar2_score 9 / REWARD -1 / Q_MAX 1.245205e+00
TIMESTEP 109406 / EPISODE 70 / bar1_score 5 / bar2_score 9 / REWARD 1 / Q_MAX 2.500408e+00
TIMESTEP 109535 / EPISODE 70 / bar1_score 5 / bar2_score 10 / REWARD -1 / Q_MAX 1.072595e+00
TIMESTEP 109664 / EPISODE 70 / bar1_score 6 / bar2_score 10 / REWARD 1 / Q_MAX 2.5

TIMESTEP 117471 / EPISODE 73 / bar1_score 1 / bar2_score 14 / REWARD 1 / Q_MAX 2.182863e+00
TIMESTEP 117600 / EPISODE 73 / bar1_score 1 / bar2_score 15 / REWARD -1 / Q_MAX 1.371404e-01
TIMESTEP 117729 / EPISODE 73 / bar1_score 2 / bar2_score 15 / REWARD 1 / Q_MAX 2.445343e+00
TIMESTEP 117858 / EPISODE 73 / bar1_score 2 / bar2_score 16 / REWARD -1 / Q_MAX 4.427068e-01
TIMESTEP 117904 / EPISODE 73 / bar1_score 2 / bar2_score 17 / REWARD -1 / Q_MAX 1.936118e-01
TIMESTEP 117950 / EPISODE 73 / bar1_score 2 / bar2_score 18 / REWARD -1 / Q_MAX 3.931002e-01
TIMESTEP 117996 / EPISODE 73 / bar1_score 2 / bar2_score 19 / REWARD -1 / Q_MAX 3.535533e-01
TIMESTEP 118042 / EPISODE 74 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX 7.953932e-02
TIMESTEP 118171 / EPISODE 74 / bar1_score 1 / bar2_score 0 / REWARD 1 / Q_MAX 2.571423e+00
TIMESTEP 118300 / EPISODE 74 / bar1_score 1 / bar2_score 1 / REWARD -1 / Q_MAX 7.403429e-01
TIMESTEP 118346 / EPISODE 74 / bar1_score 1 / bar2_score 2 / REWARD -1 / Q_M

TIMESTEP 127913 / EPISODE 76 / bar1_score 9 / bar2_score 18 / REWARD 1 / Q_MAX 2.699259e+00
TIMESTEP 128042 / EPISODE 76 / bar1_score 9 / bar2_score 19 / REWARD -1 / Q_MAX 4.516152e-01
TIMESTEP 128171 / EPISODE 76 / bar1_score 10 / bar2_score 19 / REWARD 1 / Q_MAX 2.651962e+00
TIMESTEP 128300 / EPISODE 77 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX 1.013028e+00
TIMESTEP 128429 / EPISODE 77 / bar1_score 1 / bar2_score 0 / REWARD 1 / Q_MAX 2.737438e+00
TIMESTEP 128558 / EPISODE 77 / bar1_score 1 / bar2_score 1 / REWARD -1 / Q_MAX 8.220792e-01
TIMESTEP 128604 / EPISODE 77 / bar1_score 1 / bar2_score 2 / REWARD -1 / Q_MAX 5.539276e-01
TIMESTEP 128650 / EPISODE 77 / bar1_score 1 / bar2_score 3 / REWARD -1 / Q_MAX 6.905426e-01
TIMESTEP 128696 / EPISODE 77 / bar1_score 1 / bar2_score 4 / REWARD -1 / Q_MAX 4.539474e-01
TIMESTEP 128742 / EPISODE 77 / bar1_score 1 / bar2_score 5 / REWARD -1 / Q_MAX 6.302423e-01
TIMESTEP 128872 / EPISODE 77 / bar1_score 2 / bar2_score 5 / REWARD 1 / Q_MAX 2

TIMESTEP 138763 / EPISODE 80 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX 5.986143e-02
TIMESTEP 138809 / EPISODE 80 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX 5.529682e-02
TIMESTEP 138855 / EPISODE 80 / bar1_score 0 / bar2_score 2 / REWARD -1 / Q_MAX 3.183792e-01
TIMESTEP 138901 / EPISODE 80 / bar1_score 0 / bar2_score 3 / REWARD -1 / Q_MAX 2.095414e-01
TIMESTEP 138947 / EPISODE 80 / bar1_score 0 / bar2_score 4 / REWARD -1 / Q_MAX 3.757897e-01
TIMESTEP 139078 / EPISODE 80 / bar1_score 1 / bar2_score 4 / REWARD 1 / Q_MAX 2.789571e+00
TIMESTEP 139207 / EPISODE 80 / bar1_score 1 / bar2_score 5 / REWARD -1 / Q_MAX 9.746968e-01
TIMESTEP 139253 / EPISODE 80 / bar1_score 1 / bar2_score 6 / REWARD -1 / Q_MAX 1.010511e+00
TIMESTEP 139299 / EPISODE 80 / bar1_score 1 / bar2_score 7 / REWARD -1 / Q_MAX 9.387559e-01
TIMESTEP 139428 / EPISODE 80 / bar1_score 2 / bar2_score 7 / REWARD 1 / Q_MAX 2.947161e+00
TIMESTEP 139810 / EPISODE 80 / bar1_score 3 / bar2_score 7 / REWARD 1 / Q_MAX 2.58

TIMESTEP 151384 / EPISODE 82 / bar1_score 8 / bar2_score 11 / REWARD -1 / Q_MAX 1.279679e+00
TIMESTEP 151515 / EPISODE 82 / bar1_score 9 / bar2_score 11 / REWARD 1 / Q_MAX 2.706026e+00
TIMESTEP 151644 / EPISODE 82 / bar1_score 9 / bar2_score 12 / REWARD -1 / Q_MAX 1.445696e+00
TIMESTEP 151774 / EPISODE 82 / bar1_score 10 / bar2_score 12 / REWARD 1 / Q_MAX 2.929018e+00
TIMESTEP 152155 / EPISODE 82 / bar1_score 11 / bar2_score 12 / REWARD 1 / Q_MAX 2.862700e+00
TIMESTEP 152536 / EPISODE 82 / bar1_score 12 / bar2_score 12 / REWARD 1 / Q_MAX 2.770138e+00
TIMESTEP 152665 / EPISODE 82 / bar1_score 12 / bar2_score 13 / REWARD -1 / Q_MAX 9.418316e-01
TIMESTEP 152711 / EPISODE 82 / bar1_score 12 / bar2_score 14 / REWARD -1 / Q_MAX 3.595563e-01
TIMESTEP 152757 / EPISODE 82 / bar1_score 12 / bar2_score 15 / REWARD -1 / Q_MAX 8.426490e-01
TIMESTEP 152886 / EPISODE 82 / bar1_score 13 / bar2_score 15 / REWARD 1 / Q_MAX 2.786442e+00
TIMESTEP 153015 / EPISODE 82 / bar1_score 13 / bar2_score 16 / REWAR

TIMESTEP 163253 / EPISODE 85 / bar1_score 4 / bar2_score 7 / REWARD 1 / Q_MAX 2.935690e+00
TIMESTEP 163382 / EPISODE 85 / bar1_score 4 / bar2_score 8 / REWARD -1 / Q_MAX 8.586147e-01
TIMESTEP 163513 / EPISODE 85 / bar1_score 5 / bar2_score 8 / REWARD 1 / Q_MAX 2.792222e+00
TIMESTEP 163894 / EPISODE 85 / bar1_score 6 / bar2_score 8 / REWARD 1 / Q_MAX 2.716476e+00
TIMESTEP 164023 / EPISODE 85 / bar1_score 6 / bar2_score 9 / REWARD -1 / Q_MAX 5.653787e-01
TIMESTEP 164069 / EPISODE 85 / bar1_score 6 / bar2_score 10 / REWARD -1 / Q_MAX 1.598084e+00
TIMESTEP 164368 / EPISODE 85 / bar1_score 7 / bar2_score 10 / REWARD 1 / Q_MAX 2.640782e+00
TIMESTEP 164497 / EPISODE 85 / bar1_score 7 / bar2_score 11 / REWARD -1 / Q_MAX 9.957752e-01
TIMESTEP 164626 / EPISODE 85 / bar1_score 8 / bar2_score 11 / REWARD 1 / Q_MAX 3.005620e+00
TIMESTEP 164755 / EPISODE 85 / bar1_score 8 / bar2_score 12 / REWARD -1 / Q_MAX 1.040859e+00
TIMESTEP 164801 / EPISODE 85 / bar1_score 8 / bar2_score 13 / REWARD -1 / Q_MAX 

TIMESTEP 177231 / EPISODE 88 / bar1_score 0 / bar2_score 0 / REWARD 1 / Q_MAX 2.367148e+00
TIMESTEP 177528 / EPISODE 88 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX 2.998556e-01
TIMESTEP 177829 / EPISODE 88 / bar1_score 1 / bar2_score 1 / REWARD 1 / Q_MAX 2.579422e+00
TIMESTEP 177958 / EPISODE 88 / bar1_score 1 / bar2_score 2 / REWARD -1 / Q_MAX 1.274708e+00
TIMESTEP 178087 / EPISODE 88 / bar1_score 2 / bar2_score 2 / REWARD 1 / Q_MAX 3.149834e+00
TIMESTEP 178216 / EPISODE 88 / bar1_score 2 / bar2_score 3 / REWARD -1 / Q_MAX 2.032910e+00
TIMESTEP 178262 / EPISODE 88 / bar1_score 2 / bar2_score 4 / REWARD -1 / Q_MAX 1.611176e+00
TIMESTEP 178308 / EPISODE 88 / bar1_score 2 / bar2_score 5 / REWARD -1 / Q_MAX 1.840687e+00
TIMESTEP 178354 / EPISODE 88 / bar1_score 2 / bar2_score 6 / REWARD -1 / Q_MAX 1.586629e+00
TIMESTEP 178400 / EPISODE 88 / bar1_score 2 / bar2_score 7 / REWARD -1 / Q_MAX 1.595152e+00
TIMESTEP 178529 / EPISODE 88 / bar1_score 3 / bar2_score 7 / REWARD 1 / Q_MAX 3.051

TIMESTEP 190267 / EPISODE 90 / bar1_score 9 / bar2_score 14 / REWARD -1 / Q_MAX 5.270506e-01
TIMESTEP 190396 / EPISODE 90 / bar1_score 10 / bar2_score 14 / REWARD 1 / Q_MAX 2.786563e+00
TIMESTEP 190777 / EPISODE 90 / bar1_score 11 / bar2_score 14 / REWARD 1 / Q_MAX 2.709008e+00
TIMESTEP 190906 / EPISODE 90 / bar1_score 11 / bar2_score 15 / REWARD -1 / Q_MAX 5.257683e-01
TIMESTEP 191035 / EPISODE 90 / bar1_score 12 / bar2_score 15 / REWARD 1 / Q_MAX 2.908283e+00
TIMESTEP 191164 / EPISODE 90 / bar1_score 12 / bar2_score 16 / REWARD -1 / Q_MAX 7.885819e-01
TIMESTEP 191295 / EPISODE 90 / bar1_score 13 / bar2_score 16 / REWARD 1 / Q_MAX 2.789431e+00
TIMESTEP 191424 / EPISODE 90 / bar1_score 13 / bar2_score 17 / REWARD -1 / Q_MAX 3.882473e-01
TIMESTEP 191553 / EPISODE 90 / bar1_score 14 / bar2_score 17 / REWARD 1 / Q_MAX 2.749007e+00
TIMESTEP 191934 / EPISODE 90 / bar1_score 15 / bar2_score 17 / REWARD 1 / Q_MAX 2.704824e+00
TIMESTEP 192232 / EPISODE 90 / bar1_score 15 / bar2_score 18 / REWA

TIMESTEP 204227 / EPISODE 93 / bar1_score 2 / bar2_score 2 / REWARD -1 / Q_MAX 6.743733e-01
TIMESTEP 204356 / EPISODE 93 / bar1_score 3 / bar2_score 2 / REWARD 1 / Q_MAX 2.739064e+00
TIMESTEP 204485 / EPISODE 93 / bar1_score 3 / bar2_score 3 / REWARD -1 / Q_MAX 7.647310e-01
TIMESTEP 204531 / EPISODE 93 / bar1_score 3 / bar2_score 4 / REWARD -1 / Q_MAX 5.695748e-01
TIMESTEP 204577 / EPISODE 93 / bar1_score 3 / bar2_score 5 / REWARD -1 / Q_MAX 6.426992e-01
TIMESTEP 204706 / EPISODE 93 / bar1_score 4 / bar2_score 5 / REWARD 1 / Q_MAX 2.540017e+00
TIMESTEP 204835 / EPISODE 93 / bar1_score 4 / bar2_score 6 / REWARD -1 / Q_MAX 1.400559e+00
TIMESTEP 204881 / EPISODE 93 / bar1_score 4 / bar2_score 7 / REWARD -1 / Q_MAX 5.996598e-01
TIMESTEP 204927 / EPISODE 93 / bar1_score 4 / bar2_score 8 / REWARD -1 / Q_MAX 5.765226e-01
TIMESTEP 205056 / EPISODE 93 / bar1_score 5 / bar2_score 8 / REWARD 1 / Q_MAX 2.636328e+00
TIMESTEP 205437 / EPISODE 93 / bar1_score 6 / bar2_score 8 / REWARD 1 / Q_MAX 2.687