In [1]:
import gym
import random
import numpy as np
import tensorflow as tf
from collections import deque
from skimage.color import rgb2gray
from skimage.transform import resize
from keras.models import Model
from keras.optimizers import RMSprop
from keras.layers import Input, Dense, Flatten, Lambda, merge
from keras.layers.convolutional import Conv2D
from keras import backend as K

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
# The Deterministic-v4 version of the Atari environments is exactly equivalent to what DeepMind used in their paper.
# Specifically, "-v4" essentially implements frame skipping so that the agent acts on every 4th frame. 
# This reduces the number of frames to process and thus reduces training time 
# without sacrificing agent's game performance.
env = gym.make('BreakoutDeterministic-v4')

In [3]:
EPISODES = 50000
MODEL_PATH = "model/breakout_dueling_ddqn.h5"
TENSORBOARD_LOG_PATH = 'summary/breakout_dueling_ddqn'
MODEL_CHECKPOINT_FREQUENCY = 100

# actions:
NO_ACTION_NO_BALL = 0
NO_ACTION = 1
MOVE_RIGHT = 2
MOVE_LEFT = 3

In [4]:
# 210x160x3 (RGB color) -> 84x84 (grayscale)
# also transforms floats to 8-bit ints to conserve memory
def preprocess(observed_state):
    return np.uint8(resize(rgb2gray(observed_state), (84, 84), mode='constant') * 255)

In [5]:
def get_initial_state_history(observed_state):
    # No preceding frames at the start of an episode, so just copy 4 times
    state = preprocess(observed_state)
    history = np.stack((state, state, state, state), axis=2)
    history = np.reshape([history], (1, 84, 84, 4))
    return state, history

In [None]:
class DuelingDDQNAgent:
    def __init__(self, action_size):
        self.render = False
        self.load_model = False

        # environment settings
        self.state_size = (84, 84, 4)
        self.action_size = action_size

        # epsilon-greedy policy parameters
        self.epsilon = 1.
        self.epsilon_start, self.epsilon_end = 1.0, 0.1
        self.exploration_steps = 1000000.
        self.epsilon_decay_step = (self.epsilon_start - self.epsilon_end) / self.exploration_steps

        # training parameters
        self.batch_size = 32
        self.train_start = 50000
        self.update_target_rate = 10000
        self.discount_factor = 0.99
        self.memory = deque(maxlen=400000)
        self.no_op_steps = 30

        # build
        self.model = self.build_model()
        self.target_model = self.build_model()
        self.update_target_model()

        self.optimizer = self.optimizer()

        self.sess = tf.InteractiveSession()
        K.set_session(self.sess)

        self.avg_q_max, self.avg_loss = 0, 0
        self.summary_placeholders, self.update_ops, self.summary_op = self.setup_summary()
        self.summary_writer = tf.summary.FileWriter(TENSORBOARD_LOG_PATH, self.sess.graph)
        self.sess.run(tf.global_variables_initializer())

        if self.load_model:
            self.model.load_weights(MODEL_PATH)

    # if error is in [-1; 1] cost is quadratic to error, otherwise – linear
    def optimizer(self):
        a = K.placeholder(shape=(None,), dtype='int32')
        y = K.placeholder(shape=(None,), dtype='float32')

        py_x = self.model.output

        a_one_hot = K.one_hot(a, self.action_size)
        q_value = K.sum(py_x * a_one_hot, axis=1)
        error = K.abs(y - q_value)

        quadratic_part = K.clip(error, 0.0, 1.0)
        linear_part = error - quadratic_part
        loss = K.mean(0.5 * K.square(quadratic_part) + linear_part)

        optimizer = RMSprop(lr=0.00025, epsilon=0.01)
        updates = optimizer.get_updates(self.model.trainable_weights, [], loss)
        train = K.function([self.model.input, a, y], [loss], updates=updates)

        return train

    # frame representation -> CNN -> advantage/value streams -> merge two streams -> Q-value of each action
    def build_model(self):
        input = Input(shape=self.state_size)
        shared = Conv2D(32, (8, 8), strides=(4, 4), activation='relu')(input)
        shared = Conv2D(64, (4, 4), strides=(2, 2), activation='relu')(shared)
        shared = Conv2D(64, (3, 3), strides=(1, 1), activation='relu')(shared)
        flatten = Flatten()(shared)

        # stream that estimates state-dependent action advantages
        advantage_fc = Dense(512, activation='relu')(flatten)
        advantage = Dense(self.action_size)(advantage_fc)
        advantage = Lambda(lambda a: a[:, :] - K.mean(a[:, :], keepdims=True),
                           output_shape=(self.action_size,))(advantage)

        # stream which estimates state values
        value_fc = Dense(512, activation='relu')(flatten)
        value = Dense(1)(value_fc)
        value = Lambda(lambda s: K.expand_dims(s[:, 0], -1),
                       output_shape=(self.action_size,))(value)

        # merge two streams to produce Q-value
        q_value = merge([value, advantage], mode='sum')
        model = Model(inputs=input, outputs=q_value)
        model.summary()

        return model

    def update_target_model(self):
        self.target_model.set_weights(self.model.get_weights())

    # epsilon-greedy policy
    def get_action(self, history):
        history = np.float32(history / 255.0)
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        else:
            q_value = self.model.predict(history)
            return np.argmax(q_value[0])

    # save sample <s, a, r, s'> to the replay memory
    def replay_memory(self, history, action, reward, next_history, dead):
        self.memory.append((history, action, reward, next_history, dead))

    # sample from replay memory (batches of specified size)
    def train_replay(self):
        if len(self.memory) < self.train_start:
            return
        if self.epsilon > self.epsilon_end:
            self.epsilon -= self.epsilon_decay_step

        mini_batch = random.sample(self.memory, self.batch_size)

        history = np.zeros((self.batch_size, self.state_size[0],
                            self.state_size[1], self.state_size[2]))
        next_history = np.zeros((self.batch_size, self.state_size[0],
                                 self.state_size[1], self.state_size[2]))
        target = np.zeros((self.batch_size,))
        action, reward, dead = [], [], []

        for i in range(self.batch_size):
            history[i] = np.float32(mini_batch[i][0] / 255.)
            next_history[i] = np.float32(mini_batch[i][3] / 255.)
            action.append(mini_batch[i][1])
            reward.append(mini_batch[i][2])
            dead.append(mini_batch[i][4])

        value = self.model.predict(history)
        target_value = self.target_model.predict(next_history)

        # get max Q-value at s' from target model
        for i in range(self.batch_size):
            if dead[i]:
                target[i] = reward[i]
            else:
                # key DDQN idea: select action from model, update value from target model
                target[i] = reward[i] + self.discount_factor * target_value[i][np.argmax(value[i])]

        loss = self.optimizer([history, action, target])
        self.avg_loss += loss[0]

    def setup_summary(self):
        episode_total_reward = tf.Variable(0.)
        episode_avg_max_q = tf.Variable(0.)
        episode_duration = tf.Variable(0.)
        episode_avg_loss = tf.Variable(0.)

        tf.summary.scalar('Total Reward/Episode', episode_total_reward)
        tf.summary.scalar('Average Max Q/Episode', episode_avg_max_q)
        tf.summary.scalar('Duration/Episode', episode_duration)
        tf.summary.scalar('Average Loss/Episode', episode_avg_loss)

        summary_vars = [episode_total_reward, episode_avg_max_q,
                        episode_duration, episode_avg_loss]
        summary_placeholders = [tf.placeholder(tf.float32) for _ in
                                range(len(summary_vars))]
        update_ops = [summary_vars[i].assign(summary_placeholders[i]) for i in
                      range(len(summary_vars))]
        summary_op = tf.summary.merge_all()
        return summary_placeholders, update_ops, summary_op

In [None]:
agent = DuelingDDQNAgent(action_size=3)

scores, episodes, global_step = [], [], 0

for e in range(EPISODES):
    done = False
    dead = False
    # 1 episode = 5 lives
    step, score, start_life = 0, 0, 5
    observed_state = env.reset()

    # One of the ideas from DeepMind's papers: don't take any action for
    # some random number of steps at the start of an episode
    for _ in range(random.randint(1, agent.no_op_steps)):
        observed_state, _, _, _ = env.step(NO_ACTION)

    state, history = get_initial_state_history(observed_state)

    while not done:
        if agent.render:
            env.render()
        global_step += 1
        step += 1

        # get action for the current history
        action = agent.get_action(history)

        # model returns one of the three actions (from [0; 2]),
        # but 'real' actions are 1 through 3, because 0 is no-op (no action, no ball)
        real_action = action + 1

        observed_state, reward, done, info = env.step(real_action)

        # build next state / history
        next_state = preprocess(observed_state)
        next_state = np.reshape([next_state], (1, 84, 84, 1))
        next_history = np.append(next_state, history[:, :, :, :3], axis=3)

        agent.avg_q_max += np.amax(
            agent.model.predict(np.float32(history / 255.))[0])

        # check if the agent is 'dead' and update remaining lives
        if start_life > info['ale.lives']:
            dead = True
            start_life = info['ale.lives']

        reward = np.clip(reward, -1., 1.)

        # save the sample <s, a, r, s', d> to the replay memory and train model
        agent.replay_memory(history, action, reward, next_history, dead)
        agent.train_replay()

        # every once in a while, update the target model
        if global_step % agent.update_target_rate == 0:
            agent.update_target_model()

        score += reward

        if dead:
            dead = False

            # reset history
            _, history = get_initial_state_history(observed_state)
        else:
            history = next_history

        # log progress to console and TensorBoard on episode end
        if done:
            if global_step > agent.train_start:
                stats = [score, agent.avg_q_max / float(step), step, agent.avg_loss / float(step)]
                for i in range(len(stats)):
                    agent.sess.run(agent.update_ops[i], feed_dict={
                        agent.summary_placeholders[i]: float(stats[i])
                    })
                summary_str = agent.sess.run(agent.summary_op)
                agent.summary_writer.add_summary(summary_str, e + 1)

            print("episode:", e, "  score:", score, "  memory length:",
                    len(agent.memory), "  epsilon:", agent.epsilon,
                    "  global_step:", global_step, "  average_q:",
                    agent.avg_q_max / float(step), "  average loss:",
                    agent.avg_loss / float(step))

            agent.avg_q_max, agent.avg_loss = 0, 0

    if e % MODEL_CHECKPOINT_FREQUENCY == 0:
        agent.model.save_weights(MODEL_PATH)

  name=name)


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 84, 84, 4)    0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 20, 20, 32)   8224        input_1[0][0]                    
__________________________________________________________________________________________________
conv2d_2 (Conv2D)               (None, 9, 9, 64)     32832       conv2d_1[0][0]                   
__________________________________________________________________________________________________
conv2d_3 (Conv2D)               (None, 7, 7, 64)     36928       conv2d_2[0][0]                   
__________________________________________________________________________________________________
flatten_1 

episode: 15   score: 1.0   memory length: 2935   epsilon: 1.0   global_step: 2935   average_q: 0.01634719452775288   average loss: 0.0
episode: 16   score: 1.0   memory length: 3103   epsilon: 1.0   global_step: 3103   average_q: 0.01888675812543148   average loss: 0.0
episode: 17   score: 1.0   memory length: 3251   epsilon: 1.0   global_step: 3251   average_q: 0.01846491955094845   average loss: 0.0
episode: 18   score: 1.0   memory length: 3420   epsilon: 1.0   global_step: 3420   average_q: 0.018748351802619603   average loss: 0.0
episode: 19   score: 1.0   memory length: 3592   epsilon: 1.0   global_step: 3592   average_q: 0.017118361746069302   average loss: 0.0
episode: 20   score: 0.0   memory length: 3714   epsilon: 1.0   global_step: 3714   average_q: 0.01637965846104456   average loss: 0.0
episode: 21   score: 2.0   memory length: 3904   epsilon: 1.0   global_step: 3904   average_q: 0.02081153277111681   average loss: 0.0
episode: 22   score: 0.0   memory length: 4010   epsi

episode: 76   score: 2.0   memory length: 12804   epsilon: 1.0   global_step: 12804   average_q: 0.020354713350965108   average loss: 0.0
episode: 77   score: 0.0   memory length: 12897   epsilon: 1.0   global_step: 12897   average_q: 0.015966180822141067   average loss: 0.0
episode: 78   score: 0.0   memory length: 13003   epsilon: 1.0   global_step: 13003   average_q: 0.015659592070458632   average loss: 0.0
episode: 79   score: 1.0   memory length: 13150   epsilon: 1.0   global_step: 13150   average_q: 0.020116582392480504   average loss: 0.0
episode: 80   score: 1.0   memory length: 13295   epsilon: 1.0   global_step: 13295   average_q: 0.019339052292293515   average loss: 0.0
episode: 81   score: 2.0   memory length: 13497   epsilon: 1.0   global_step: 13497   average_q: 0.020629665330236795   average loss: 0.0
episode: 82   score: 1.0   memory length: 13619   epsilon: 1.0   global_step: 13619   average_q: 0.018081292624539527   average loss: 0.0
episode: 83   score: 2.0   memory 

episode: 136   score: 1.0   memory length: 21831   epsilon: 1.0   global_step: 21831   average_q: 0.017232694335538767   average loss: 0.0
episode: 137   score: 1.0   memory length: 21987   epsilon: 1.0   global_step: 21987   average_q: 0.019207610569607753   average loss: 0.0
episode: 138   score: 0.0   memory length: 22111   epsilon: 1.0   global_step: 22111   average_q: 0.016169302030316284   average loss: 0.0
episode: 139   score: 0.0   memory length: 22252   epsilon: 1.0   global_step: 22252   average_q: 0.015783355888375577   average loss: 0.0
episode: 140   score: 1.0   memory length: 22422   epsilon: 1.0   global_step: 22422   average_q: 0.018725748316330067   average loss: 0.0
episode: 141   score: 0.0   memory length: 22543   epsilon: 1.0   global_step: 22543   average_q: 0.016336081526434617   average loss: 0.0
episode: 142   score: 0.0   memory length: 22669   epsilon: 1.0   global_step: 22669   average_q: 0.016110043233585735   average loss: 0.0
episode: 143   score: 2.0  

episode: 196   score: 0.0   memory length: 30715   epsilon: 1.0   global_step: 30715   average_q: 0.016566293314099312   average loss: 0.0
episode: 197   score: 1.0   memory length: 30867   epsilon: 1.0   global_step: 30867   average_q: 0.01665432647017664   average loss: 0.0
episode: 198   score: 3.0   memory length: 31108   epsilon: 1.0   global_step: 31108   average_q: 0.018049749750566682   average loss: 0.0
episode: 199   score: 1.0   memory length: 31246   epsilon: 1.0   global_step: 31246   average_q: 0.017881950059824663   average loss: 0.0
episode: 200   score: 1.0   memory length: 31397   epsilon: 1.0   global_step: 31397   average_q: 0.017950793376673532   average loss: 0.0
episode: 201   score: 0.0   memory length: 31515   epsilon: 1.0   global_step: 31515   average_q: 0.01597705511060559   average loss: 0.0
episode: 202   score: 1.0   memory length: 31665   epsilon: 1.0   global_step: 31665   average_q: 0.016487601411839328   average loss: 0.0
episode: 203   score: 1.0   m

episode: 256   score: 0.0   memory length: 40055   epsilon: 1.0   global_step: 40055   average_q: 0.015557874596732505   average loss: 0.0
episode: 257   score: 1.0   memory length: 40214   epsilon: 1.0   global_step: 40214   average_q: 0.01898442137306966   average loss: 0.0
episode: 258   score: 1.0   memory length: 40368   epsilon: 1.0   global_step: 40368   average_q: 0.019320298754307744   average loss: 0.0
episode: 259   score: 3.0   memory length: 40585   epsilon: 1.0   global_step: 40585   average_q: 0.022184643800014173   average loss: 0.0
episode: 260   score: 1.0   memory length: 40756   epsilon: 1.0   global_step: 40756   average_q: 0.01687282470888213   average loss: 0.0
episode: 261   score: 0.0   memory length: 40869   epsilon: 1.0   global_step: 40869   average_q: 0.016091805190441354   average loss: 0.0
episode: 262   score: 0.0   memory length: 40983   epsilon: 1.0   global_step: 40983   average_q: 0.016189131068817357   average loss: 0.0
episode: 263   score: 0.0   m

episode: 316   score: 0.0   memory length: 49910   epsilon: 1.0   global_step: 49910   average_q: 0.015905217609542888   average loss: 0.0
episode: 317   score: 2.0   memory length: 50089   epsilon: 0.9999190000000027   global_step: 50089   average_q: 0.021856125386941698   average loss: 0.0018296426047608642
episode: 318   score: 0.0   memory length: 50206   epsilon: 0.9998137000000061   global_step: 50206   average_q: 0.0143027257007093   average loss: 0.004081033620493075
episode: 319   score: 2.0   memory length: 50408   epsilon: 0.9996319000000121   global_step: 50408   average_q: 0.018244516939537988   average loss: 0.003518002555444969
episode: 320   score: 2.0   memory length: 50592   epsilon: 0.9994663000000176   global_step: 50592   average_q: 0.010195150398193737   average loss: 0.0031129499546631964
episode: 321   score: 3.0   memory length: 50801   epsilon: 0.9992782000000238   global_step: 50801   average_q: 0.02228063443657813   average loss: 0.0038709921726524446
episod

episode: 365   score: 1.0   memory length: 57556   epsilon: 0.9931987000002239   global_step: 57556   average_q: 0.01229332838151974   average loss: 0.0037622524616369225
episode: 366   score: 2.0   memory length: 57774   epsilon: 0.9930025000002304   global_step: 57774   average_q: 0.01139056154872812   average loss: 0.004015934646233307
episode: 367   score: 0.0   memory length: 57886   epsilon: 0.9929017000002337   global_step: 57886   average_q: 0.009165222754485771   average loss: 0.0036371082401720806
episode: 368   score: 0.0   memory length: 57994   epsilon: 0.9928045000002369   global_step: 57994   average_q: 0.01780799510931872   average loss: 0.0035700932167957136
episode: 369   score: 1.0   memory length: 58136   epsilon: 0.9926767000002411   global_step: 58136   average_q: 0.01991391314265751   average loss: 0.00378678351617823
episode: 370   score: 1.0   memory length: 58302   epsilon: 0.992527300000246   global_step: 58302   average_q: 0.008140197051801237   average loss

episode: 414   score: 0.0   memory length: 65405   epsilon: 0.9861346000004565   global_step: 65405   average_q: 0.039312979491712814   average loss: 0.0026609791171837175
episode: 415   score: 1.0   memory length: 65558   epsilon: 0.985996900000461   global_step: 65558   average_q: 0.04367379009041911   average loss: 0.00415717524001712
episode: 416   score: 0.0   memory length: 65662   epsilon: 0.9859033000004641   global_step: 65662   average_q: 0.039271417557476804   average loss: 0.003595079528185334
episode: 417   score: 0.0   memory length: 65777   epsilon: 0.9857998000004675   global_step: 65777   average_q: 0.03903011501483295   average loss: 0.0033795891109904283
episode: 418   score: 4.0   memory length: 66031   epsilon: 0.985571200000475   global_step: 66031   average_q: 0.04650260275596474   average loss: 0.004278366765768044
episode: 419   score: 1.0   memory length: 66160   epsilon: 0.9854551000004789   global_step: 66160   average_q: 0.0380969803866952   average loss: 0

episode: 463   score: 1.0   memory length: 73620   epsilon: 0.9787411000006999   global_step: 73620   average_q: 0.041600639958092024   average loss: 0.004153517819259564
episode: 464   score: 1.0   memory length: 73766   epsilon: 0.9786097000007042   global_step: 73766   average_q: 0.03920690493326481   average loss: 0.0028784748983837257
episode: 465   score: 1.0   memory length: 73934   epsilon: 0.9784585000007092   global_step: 73934   average_q: 0.04547955918436249   average loss: 0.003156822261682161
episode: 466   score: 2.0   memory length: 74107   epsilon: 0.9783028000007143   global_step: 74107   average_q: 0.04799176868848029   average loss: 0.0038659792655822045
episode: 467   score: 1.0   memory length: 74236   epsilon: 0.9781867000007182   global_step: 74236   average_q: 0.04318736843013948   average loss: 0.0038556620522758184
episode: 468   score: 2.0   memory length: 74417   epsilon: 0.9780238000007235   global_step: 74417   average_q: 0.048334973293114765   average lo

episode: 512   score: 2.0   memory length: 81532   epsilon: 0.9716203000009344   global_step: 81532   average_q: 0.057464741294582684   average loss: 0.004487998490881615
episode: 513   score: 0.0   memory length: 81640   epsilon: 0.9715231000009376   global_step: 81640   average_q: 0.058192658238112926   average loss: 0.0051360421823658355
episode: 514   score: 0.0   memory length: 81756   epsilon: 0.971418700000941   global_step: 81756   average_q: 0.052632039123824956   average loss: 0.0025706429016226734
episode: 515   score: 1.0   memory length: 81923   epsilon: 0.9712684000009459   global_step: 81923   average_q: 0.0595261381414836   average loss: 0.004550553121061204
episode: 516   score: 1.0   memory length: 82070   epsilon: 0.9711361000009503   global_step: 82070   average_q: 0.05538965520911476   average loss: 0.0025430392226838128
episode: 517   score: 4.0   memory length: 82341   epsilon: 0.9708922000009583   global_step: 82341   average_q: 0.06194338465033862   average los

episode: 561   score: 0.0   memory length: 88865   epsilon: 0.9650206000011516   global_step: 88865   average_q: 0.05536641751967587   average loss: 0.0034514669912876573
episode: 562   score: 1.0   memory length: 89024   epsilon: 0.9648775000011564   global_step: 89024   average_q: 0.05579925541701557   average loss: 0.003885818617589897
episode: 563   score: 1.0   memory length: 89160   epsilon: 0.9647551000011604   global_step: 89160   average_q: 0.05766441755215911   average loss: 0.003103712202052039
episode: 564   score: 0.0   memory length: 89281   epsilon: 0.964646200001164   global_step: 89281   average_q: 0.05673269080844792   average loss: 0.004079021753835503
episode: 565   score: 1.0   memory length: 89445   epsilon: 0.9644986000011688   global_step: 89445   average_q: 0.05493334489988118   average loss: 0.002958989628864845
episode: 566   score: 2.0   memory length: 89620   epsilon: 0.964341100001174   global_step: 89620   average_q: 0.06010672145656177   average loss: 0.

episode: 610   score: 2.0   memory length: 96504   epsilon: 0.958145500001378   global_step: 96504   average_q: 0.06102998084026182   average loss: 0.003136205572783417
episode: 611   score: 1.0   memory length: 96660   epsilon: 0.9580051000013826   global_step: 96660   average_q: 0.05582099107022469   average loss: 0.00265655869409679
episode: 612   score: 0.0   memory length: 96757   epsilon: 0.9579178000013855   global_step: 96757   average_q: 0.05970668208967779   average loss: 0.004253144672430636
episode: 613   score: 3.0   memory length: 97007   epsilon: 0.9576928000013929   global_step: 97007   average_q: 0.06326970072090626   average loss: 0.0034945875315788725
episode: 614   score: 4.0   memory length: 97300   epsilon: 0.9574291000014016   global_step: 97300   average_q: 0.063687556534497   average loss: 0.0034960773821263134
episode: 615   score: 1.0   memory length: 97458   epsilon: 0.9572869000014063   global_step: 97458   average_q: 0.05890905253494842   average loss: 0.0

episode: 658   score: 2.0   memory length: 104409   epsilon: 0.9510310000016122   global_step: 104409   average_q: 0.06433820543311984   average loss: 0.003462025605700258
episode: 659   score: 0.0   memory length: 104540   epsilon: 0.9509131000016161   global_step: 104540   average_q: 0.056061556577   average loss: 0.001776376214303398
episode: 660   score: 0.0   memory length: 104651   epsilon: 0.9508132000016194   global_step: 104651   average_q: 0.06358223787701882   average loss: 0.003303701550169183
episode: 661   score: 1.0   memory length: 104826   epsilon: 0.9506557000016246   global_step: 104826   average_q: 0.06395443614040103   average loss: 0.0032459614501645514
episode: 662   score: 1.0   memory length: 104984   epsilon: 0.9505135000016293   global_step: 104984   average_q: 0.06732767183758036   average loss: 0.0032068188109469356
episode: 663   score: 2.0   memory length: 105164   epsilon: 0.9503515000016346   global_step: 105164   average_q: 0.06400303012794918   averag

episode: 706   score: 1.0   memory length: 112197   epsilon: 0.944021800001843   global_step: 112197   average_q: 0.07075048632742394   average loss: 0.004437886330855517
episode: 707   score: 0.0   memory length: 112304   epsilon: 0.9439255000018462   global_step: 112304   average_q: 0.0628369510452324   average loss: 0.002698787718493966
episode: 708   score: 0.0   memory length: 112432   epsilon: 0.94381030000185   global_step: 112432   average_q: 0.06496147005236708   average loss: 0.0036157258597313557
episode: 709   score: 2.0   memory length: 112624   epsilon: 0.9436375000018556   global_step: 112624   average_q: 0.06543223328966026   average loss: 0.0030554333480902565
episode: 710   score: 0.0   memory length: 112736   epsilon: 0.943536700001859   global_step: 112736   average_q: 0.06331246213189193   average loss: 0.002970628464863369
episode: 711   score: 0.0   memory length: 112858   epsilon: 0.9434269000018626   global_step: 112858   average_q: 0.06247867509478428   averag

episode: 754   score: 3.0   memory length: 119977   epsilon: 0.9370198000020735   global_step: 119977   average_q: 0.07833559662430432   average loss: 0.003961841291939301
episode: 755   score: 1.0   memory length: 120109   epsilon: 0.9369010000020774   global_step: 120109   average_q: 0.06102054374236049   average loss: 0.0023407320975882776
episode: 756   score: 2.0   memory length: 120292   epsilon: 0.9367363000020829   global_step: 120292   average_q: 0.06486852432453567   average loss: 0.0035708951739182792
episode: 757   score: 0.0   memory length: 120410   epsilon: 0.9366301000020864   global_step: 120410   average_q: 0.05955499233835834   average loss: 0.0031307061003837465
episode: 758   score: 0.0   memory length: 120516   epsilon: 0.9365347000020895   global_step: 120516   average_q: 0.059300981954021274   average loss: 0.0025632975725098078
episode: 759   score: 1.0   memory length: 120651   epsilon: 0.9364132000020935   global_step: 120651   average_q: 0.0630414164452641  

episode: 802   score: 1.0   memory length: 127613   epsilon: 0.9301474000022998   global_step: 127613   average_q: 0.06466904493129771   average loss: 0.0037500231831625297
episode: 803   score: 0.0   memory length: 127715   epsilon: 0.9300556000023028   global_step: 127715   average_q: 0.06206254993437552   average loss: 0.0034810930740372986
episode: 804   score: 0.0   memory length: 127845   epsilon: 0.9299386000023067   global_step: 127845   average_q: 0.06064680943695398   average loss: 0.003478324413810277
episode: 805   score: 0.0   memory length: 127970   epsilon: 0.9298261000023104   global_step: 127970   average_q: 0.060487965762615205   average loss: 0.0025367232831704314
episode: 806   score: 2.0   memory length: 128179   epsilon: 0.9296380000023166   global_step: 128179   average_q: 0.07135184832879801   average loss: 0.0035152746791854124
episode: 807   score: 0.0   memory length: 128300   epsilon: 0.9295291000023201   global_step: 128300   average_q: 0.06086344336552068 

episode: 850   score: 0.0   memory length: 135424   epsilon: 0.9231175000025312   global_step: 135424   average_q: 0.05984358053787479   average loss: 0.00292649145481911
episode: 851   score: 0.0   memory length: 135529   epsilon: 0.9230230000025343   global_step: 135529   average_q: 0.06232270403277306   average loss: 0.003916554103792199
episode: 852   score: 1.0   memory length: 135671   epsilon: 0.9228952000025386   global_step: 135671   average_q: 0.06188945776559937   average loss: 0.0029683316623011474
episode: 853   score: 0.0   memory length: 135769   epsilon: 0.9228070000025415   global_step: 135769   average_q: 0.05889395905696616   average loss: 0.002565694814045261
episode: 854   score: 1.0   memory length: 135913   epsilon: 0.9226774000025457   global_step: 135913   average_q: 0.06559612671844661   average loss: 0.002637572894684581
episode: 855   score: 2.0   memory length: 136136   epsilon: 0.9224767000025523   global_step: 136136   average_q: 0.0672643568717578   aver

episode: 898   score: 1.0   memory length: 143082   epsilon: 0.9162253000027581   global_step: 143082   average_q: 0.09318317340221256   average loss: 0.0024836421426641664
episode: 899   score: 1.0   memory length: 143215   epsilon: 0.9161056000027621   global_step: 143215   average_q: 0.0905237281624984   average loss: 0.0028471368988451794
episode: 900   score: 0.0   memory length: 143316   epsilon: 0.9160147000027651   global_step: 143316   average_q: 0.08983932627309667   average loss: 0.0026870578880658683
episode: 901   score: 1.0   memory length: 143449   epsilon: 0.915895000002769   global_step: 143449   average_q: 0.08576686554273269   average loss: 0.0030279409671917315
episode: 902   score: 2.0   memory length: 143643   epsilon: 0.9157204000027748   global_step: 143643   average_q: 0.09426512196660042   average loss: 0.0024760032053198246
episode: 903   score: 0.0   memory length: 143753   epsilon: 0.915621400002778   global_step: 143753   average_q: 0.08851404908028516   a

episode: 946   score: 2.0   memory length: 150606   epsilon: 0.9094537000029811   global_step: 150606   average_q: 0.0931417101967162   average loss: 0.00287940913188142
episode: 947   score: 0.0   memory length: 150723   epsilon: 0.9093484000029846   global_step: 150723   average_q: 0.08738089282797952   average loss: 0.002208877601776457
episode: 948   score: 0.0   memory length: 150821   epsilon: 0.9092602000029875   global_step: 150821   average_q: 0.09033551720940337   average loss: 0.0037025383803758853
episode: 949   score: 0.0   memory length: 150952   epsilon: 0.9091423000029913   global_step: 150952   average_q: 0.0879729867891501   average loss: 0.0028737279342763273
episode: 950   score: 1.0   memory length: 151097   epsilon: 0.9090118000029956   global_step: 151097   average_q: 0.09052319578055677   average loss: 0.002630196502637961
episode: 951   score: 2.0   memory length: 151293   epsilon: 0.9088354000030014   global_step: 151293   average_q: 0.09242806133186938   aver

episode: 994   score: 0.0   memory length: 158081   epsilon: 0.9027262000032026   global_step: 158081   average_q: 0.0845066647976637   average loss: 0.0025147184781720475
episode: 995   score: 0.0   memory length: 158183   epsilon: 0.9026344000032056   global_step: 158183   average_q: 0.0852107876802192   average loss: 0.0019581745055606294
episode: 996   score: 1.0   memory length: 158347   epsilon: 0.9024868000032105   global_step: 158347   average_q: 0.10838985397684865   average loss: 0.0018564442081652687
episode: 997   score: 3.0   memory length: 158565   epsilon: 0.9022906000032169   global_step: 158565   average_q: 0.11775455070198129   average loss: 0.002331472047536333
episode: 998   score: 1.0   memory length: 158708   epsilon: 0.9021619000032212   global_step: 158708   average_q: 0.07581625607880679   average loss: 0.001747396449232596
episode: 999   score: 2.0   memory length: 158924   epsilon: 0.9019675000032276   global_step: 158924   average_q: 0.11432534690808367   av

episode: 1042   score: 0.0   memory length: 165403   epsilon: 0.8961364000034195   global_step: 165403   average_q: 0.08666860150794188   average loss: 0.0022083110009740873
episode: 1043   score: 0.0   memory length: 165513   epsilon: 0.8960374000034228   global_step: 165513   average_q: 0.08701513219963421   average loss: 0.0023352368640189524
episode: 1044   score: 2.0   memory length: 165688   epsilon: 0.895879900003428   global_step: 165688   average_q: 0.11316700147730964   average loss: 0.0022760446430246314
episode: 1045   score: 0.0   memory length: 165789   epsilon: 0.895789000003431   global_step: 165789   average_q: 0.08455812554843355   average loss: 0.0017828450396372869
episode: 1046   score: 2.0   memory length: 166007   epsilon: 0.8955928000034374   global_step: 166007   average_q: 0.10386800191818027   average loss: 0.00195563465272953
episode: 1047   score: 4.0   memory length: 166304   epsilon: 0.8953255000034462   global_step: 166304   average_q: 0.1158021200285214

episode: 1090   score: 0.0   memory length: 172705   epsilon: 0.8895646000036359   global_step: 172705   average_q: 0.08014073872413391   average loss: 0.0024469040448214705
episode: 1091   score: 2.0   memory length: 172882   epsilon: 0.8894053000036412   global_step: 172882   average_q: 0.10218391062344535   average loss: 0.00183831178576011
episode: 1092   score: 4.0   memory length: 173172   epsilon: 0.8891443000036497   global_step: 173172   average_q: 0.10314184065008986   average loss: 0.0026048476204433143
episode: 1093   score: 0.0   memory length: 173271   epsilon: 0.8890552000036527   global_step: 173271   average_q: 0.07999991565340697   average loss: 0.0015513472038156863
episode: 1094   score: 3.0   memory length: 173519   epsilon: 0.88883200000366   global_step: 173519   average_q: 0.12145280082439704   average loss: 0.0023951300493118034
episode: 1095   score: 4.0   memory length: 173764   epsilon: 0.8886115000036673   global_step: 173764   average_q: 0.0939755417102453

episode: 1138   score: 2.0   memory length: 180670   epsilon: 0.8823961000038719   global_step: 180670   average_q: 0.11510636379836865   average loss: 0.0020963000522056493
episode: 1139   score: 1.0   memory length: 180845   epsilon: 0.8822386000038771   global_step: 180845   average_q: 0.11469243845769338   average loss: 0.002234188258818384
episode: 1140   score: 0.0   memory length: 180962   epsilon: 0.8821333000038806   global_step: 180962   average_q: 0.10818939598707053   average loss: 0.0018115646079788085
episode: 1141   score: 0.0   memory length: 181063   epsilon: 0.8820424000038836   global_step: 181063   average_q: 0.10970298134454406   average loss: 0.0024448443829248505
episode: 1142   score: 0.0   memory length: 181163   epsilon: 0.8819524000038865   global_step: 181163   average_q: 0.11281916432082653   average loss: 0.0018785541347097023
episode: 1143   score: 5.0   memory length: 181468   epsilon: 0.8816779000038956   global_step: 181468   average_q: 0.1369455754268

episode: 1186   score: 1.0   memory length: 188779   epsilon: 0.8750980000041122   global_step: 188779   average_q: 0.11327574290335178   average loss: 0.00123042424452251
episode: 1187   score: 0.0   memory length: 188894   epsilon: 0.8749945000041156   global_step: 188894   average_q: 0.10633158897576125   average loss: 0.0016242543820977596
episode: 1188   score: 0.0   memory length: 189010   epsilon: 0.874890100004119   global_step: 189010   average_q: 0.1068661511586658   average loss: 0.0015822552126776607
episode: 1189   score: 2.0   memory length: 189211   epsilon: 0.874709200004125   global_step: 189211   average_q: 0.12276606467455181   average loss: 0.002286234130939256
episode: 1190   score: 2.0   memory length: 189418   epsilon: 0.8745229000041311   global_step: 189418   average_q: 0.13919058296343553   average loss: 0.0020240070078893007
episode: 1191   score: 1.0   memory length: 189573   epsilon: 0.8743834000041357   global_step: 189573   average_q: 0.11660959619668222 

episode: 1234   score: 0.0   memory length: 197137   epsilon: 0.8675758000043599   global_step: 197137   average_q: 0.10192462082347299   average loss: 0.001529653420049695
episode: 1235   score: 3.0   memory length: 197404   epsilon: 0.8673355000043678   global_step: 197404   average_q: 0.1508116740803147   average loss: 0.0017039821616342127
episode: 1236   score: 1.0   memory length: 197556   epsilon: 0.8671987000043723   global_step: 197556   average_q: 0.12164669074608307   average loss: 0.0015100724078461337
episode: 1237   score: 0.0   memory length: 197676   epsilon: 0.8670907000043758   global_step: 197676   average_q: 0.10542892447362344   average loss: 0.002612569839736049
episode: 1238   score: 0.0   memory length: 197778   epsilon: 0.8669989000043788   global_step: 197778   average_q: 0.09980826547332838   average loss: 0.0016253116836214402
episode: 1239   score: 0.0   memory length: 197906   epsilon: 0.8668837000043826   global_step: 197906   average_q: 0.100956181093351

episode: 1282   score: 0.0   memory length: 204481   epsilon: 0.8609662000045775   global_step: 204481   average_q: 0.09596441243964482   average loss: 0.0016659041522087328
episode: 1283   score: 0.0   memory length: 204597   epsilon: 0.8608618000045809   global_step: 204597   average_q: 0.09732944235719483   average loss: 0.0018576725839199659
episode: 1284   score: 1.0   memory length: 204769   epsilon: 0.860707000004586   global_step: 204769   average_q: 0.1026681439387937   average loss: 0.0015953575240166894
episode: 1285   score: 3.0   memory length: 204988   epsilon: 0.8605099000045925   global_step: 204988   average_q: 0.15413640340849688   average loss: 0.0013396359873491979
episode: 1286   score: 1.0   memory length: 205137   epsilon: 0.8603758000045969   global_step: 205137   average_q: 0.12422949548235676   average loss: 0.0013896317247886541
episode: 1287   score: 1.0   memory length: 205295   epsilon: 0.8602336000046016   global_step: 205295   average_q: 0.13501022027546

episode: 1330   score: 3.0   memory length: 211894   epsilon: 0.8542945000047971   global_step: 211894   average_q: 0.16033045122536219   average loss: 0.0022068029808706784
episode: 1331   score: 3.0   memory length: 212113   epsilon: 0.8540974000048036   global_step: 212113   average_q: 0.12935311974081548   average loss: 0.002045610242772895
episode: 1332   score: 1.0   memory length: 212267   epsilon: 0.8539588000048082   global_step: 212267   average_q: 0.1524437265107771   average loss: 0.0018744014390455498
episode: 1333   score: 3.0   memory length: 212505   epsilon: 0.8537446000048152   global_step: 212505   average_q: 0.14578581620164277   average loss: 0.002088970075389067
episode: 1334   score: 2.0   memory length: 212684   epsilon: 0.8535835000048205   global_step: 212684   average_q: 0.13237524829680028   average loss: 0.001778367070378586
episode: 1335   score: 3.0   memory length: 212895   epsilon: 0.8533936000048268   global_step: 212895   average_q: 0.1497085159070683

episode: 1378   score: 1.0   memory length: 219953   epsilon: 0.8470414000050359   global_step: 219953   average_q: 0.11616454361692856   average loss: 0.0008501856948800866
episode: 1379   score: 1.0   memory length: 220116   epsilon: 0.8468947000050407   global_step: 220116   average_q: 0.10568708224804854   average loss: 0.0025285867042527063
episode: 1380   score: 0.0   memory length: 220224   epsilon: 0.846797500005044   global_step: 220224   average_q: 0.09305264918064629   average loss: 0.0026010483138465875
episode: 1381   score: 2.0   memory length: 220417   epsilon: 0.8466238000050497   global_step: 220417   average_q: 0.1494817938752125   average loss: 0.0025328753618695386
episode: 1382   score: 0.0   memory length: 220527   epsilon: 0.8465248000050529   global_step: 220527   average_q: 0.09511902000416408   average loss: 0.002322088308441876
episode: 1383   score: 2.0   memory length: 220725   epsilon: 0.8463466000050588   global_step: 220725   average_q: 0.138183300543313

episode: 1426   score: 2.0   memory length: 227382   epsilon: 0.840355300005256   global_step: 227382   average_q: 0.12034883131119577   average loss: 0.0012407978679770527
episode: 1427   score: 1.0   memory length: 227548   epsilon: 0.840205900005261   global_step: 227548   average_q: 0.10533111971096103   average loss: 0.0014122086849089233
episode: 1428   score: 1.0   memory length: 227697   epsilon: 0.8400718000052654   global_step: 227697   average_q: 0.12183018276255403   average loss: 0.0015077729055684547
episode: 1429   score: 0.0   memory length: 227830   epsilon: 0.8399521000052693   global_step: 227830   average_q: 0.09233135578775764   average loss: 0.0017479931064960162
episode: 1430   score: 4.0   memory length: 228101   epsilon: 0.8397082000052773   global_step: 228101   average_q: 0.1449842552252361   average loss: 0.001763505515139
episode: 1431   score: 0.0   memory length: 228209   epsilon: 0.8396110000052805   global_step: 228209   average_q: 0.09363937101982257  

episode: 1474   score: 3.0   memory length: 235184   epsilon: 0.8333335000054872   global_step: 235184   average_q: 0.1924216633387765   average loss: 0.0016800573487924966
episode: 1475   score: 0.0   memory length: 235311   epsilon: 0.833219200005491   global_step: 235311   average_q: 0.08434329181909561   average loss: 0.0015156264999522974
episode: 1476   score: 1.0   memory length: 235481   epsilon: 0.833066200005496   global_step: 235481   average_q: 0.11353881819283261   average loss: 0.001267986898592961
episode: 1477   score: 1.0   memory length: 235608   epsilon: 0.8329519000054998   global_step: 235608   average_q: 0.09475018817374087   average loss: 0.0018542113601457875
episode: 1478   score: 0.0   memory length: 235717   epsilon: 0.832853800005503   global_step: 235717   average_q: 0.09541742667692517   average loss: 0.0015311481533524017
episode: 1479   score: 1.0   memory length: 235886   epsilon: 0.832701700005508   global_step: 235886   average_q: 0.10141276574787303 

episode: 1522   score: 1.0   memory length: 242643   epsilon: 0.8266204000057082   global_step: 242643   average_q: 0.17092856818011828   average loss: 0.0017193485428514896
episode: 1523   score: 0.0   memory length: 242763   epsilon: 0.8265124000057118   global_step: 242763   average_q: 0.08862090812375148   average loss: 0.001170459006910581
episode: 1524   score: 3.0   memory length: 242977   epsilon: 0.8263198000057181   global_step: 242977   average_q: 0.15848995491026718   average loss: 0.0018835865532785979
episode: 1525   score: 0.0   memory length: 243094   epsilon: 0.8262145000057216   global_step: 243094   average_q: 0.09526214977869621   average loss: 0.0015216863431329003
episode: 1526   score: 2.0   memory length: 243274   epsilon: 0.8260525000057269   global_step: 243274   average_q: 0.18509383201599122   average loss: 0.0015816333869022653
episode: 1527   score: 3.0   memory length: 243492   epsilon: 0.8258563000057334   global_step: 243492   average_q: 0.1780825559084

episode: 1570   score: 3.0   memory length: 250540   epsilon: 0.8195131000059422   global_step: 250540   average_q: 0.12776537740137428   average loss: 0.0017483068938710339
episode: 1571   score: 1.0   memory length: 250693   epsilon: 0.8193754000059468   global_step: 250693   average_q: 0.12255429919638665   average loss: 0.002061485840267124
episode: 1572   score: 1.0   memory length: 250866   epsilon: 0.8192197000059519   global_step: 250866   average_q: 0.11683542518257406   average loss: 0.0023380646750773707
episode: 1573   score: 0.0   memory length: 250986   epsilon: 0.8191117000059555   global_step: 250986   average_q: 0.0983448968268931   average loss: 0.0018534655602707062
episode: 1574   score: 2.0   memory length: 251188   epsilon: 0.8189299000059614   global_step: 251188   average_q: 0.1115350654293405   average loss: 0.001974299976852605
episode: 1575   score: 0.0   memory length: 251298   epsilon: 0.8188309000059647   global_step: 251298   average_q: 0.0960134032775055

episode: 1618   score: 1.0   memory length: 258866   epsilon: 0.812019700006189   global_step: 258866   average_q: 0.126150610968999   average loss: 0.0010070504247791346
episode: 1619   score: 1.0   memory length: 259038   epsilon: 0.811864900006194   global_step: 259038   average_q: 0.15947449679464795   average loss: 0.0013936325027993177
episode: 1620   score: 1.0   memory length: 259202   epsilon: 0.8117173000061989   global_step: 259202   average_q: 0.1586995341519757   average loss: 0.0015496459534072684
episode: 1621   score: 2.0   memory length: 259385   epsilon: 0.8115526000062043   global_step: 259385   average_q: 0.1681730030867897   average loss: 0.0011830483842951238
episode: 1622   score: 1.0   memory length: 259530   epsilon: 0.8114221000062086   global_step: 259530   average_q: 0.11546775634432661   average loss: 0.0016453729553553597
episode: 1623   score: 0.0   memory length: 259647   epsilon: 0.8113168000062121   global_step: 259647   average_q: 0.09318787232041359 

episode: 1666   score: 3.0   memory length: 266851   epsilon: 0.8048332000064256   global_step: 266851   average_q: 0.19283843632493772   average loss: 0.001035376624751274
episode: 1667   score: 3.0   memory length: 267065   epsilon: 0.8046406000064319   global_step: 267065   average_q: 0.18369247663383173   average loss: 0.0014434015888965005
episode: 1668   score: 1.0   memory length: 267228   epsilon: 0.8044939000064367   global_step: 267228   average_q: 0.11747663296713419   average loss: 0.0015589856762086196
episode: 1669   score: 2.0   memory length: 267429   epsilon: 0.8043130000064427   global_step: 267429   average_q: 0.22118651620413535   average loss: 0.0016779721557019065
episode: 1670   score: 0.0   memory length: 267546   epsilon: 0.8042077000064461   global_step: 267546   average_q: 0.09094831535322034   average loss: 0.0015169608856463789
episode: 1671   score: 1.0   memory length: 267696   epsilon: 0.8040727000064506   global_step: 267696   average_q: 0.1394169116268

episode: 1714   score: 0.0   memory length: 274783   epsilon: 0.7976944000066606   global_step: 274783   average_q: 0.09676896223644597   average loss: 0.0015259046411785775
episode: 1715   score: 2.0   memory length: 274992   epsilon: 0.7975063000066668   global_step: 274992   average_q: 0.10635702533014653   average loss: 0.001323634356046686
episode: 1716   score: 2.0   memory length: 275173   epsilon: 0.7973434000066721   global_step: 275173   average_q: 0.14449218008152687   average loss: 0.0015567921148918606
episode: 1717   score: 0.0   memory length: 275292   epsilon: 0.7972363000066757   global_step: 275292   average_q: 0.09707680566846824   average loss: 0.0009566775993678812
episode: 1718   score: 2.0   memory length: 275485   epsilon: 0.7970626000066814   global_step: 275485   average_q: 0.10745817081221028   average loss: 0.0015187095750426062
episode: 1719   score: 3.0   memory length: 275707   epsilon: 0.796862800006688   global_step: 275707   average_q: 0.13680557428380

episode: 1762   score: 1.0   memory length: 283118   epsilon: 0.7901929000069076   global_step: 283118   average_q: 0.1434024580680638   average loss: 0.0016791441047768504
episode: 1763   score: 2.0   memory length: 283278   epsilon: 0.7900489000069123   global_step: 283278   average_q: 0.18256756183691322   average loss: 0.0015217551679597818
episode: 1764   score: 2.0   memory length: 283481   epsilon: 0.7898662000069183   global_step: 283481   average_q: 0.17159065837269932   average loss: 0.0018856941246421287
episode: 1765   score: 1.0   memory length: 283656   epsilon: 0.7897087000069235   global_step: 283656   average_q: 0.17007173963955471   average loss: 0.001519160016240286
episode: 1766   score: 1.0   memory length: 283802   epsilon: 0.7895773000069278   global_step: 283802   average_q: 0.1419376702086158   average loss: 0.0012070038667357725
episode: 1767   score: 3.0   memory length: 284013   epsilon: 0.7893874000069341   global_step: 284013   average_q: 0.185801868631517

episode: 1810   score: 2.0   memory length: 291598   epsilon: 0.7825609000071588   global_step: 291598   average_q: 0.18946387778170667   average loss: 0.0015350607351518112
episode: 1811   score: 1.0   memory length: 291731   epsilon: 0.7824412000071628   global_step: 291731   average_q: 0.14339166002950274   average loss: 0.0018766929503377962
episode: 1812   score: 3.0   memory length: 291968   epsilon: 0.7822279000071698   global_step: 291968   average_q: 0.20367079215587947   average loss: 0.0014171411266060692
episode: 1813   score: 1.0   memory length: 292101   epsilon: 0.7821082000071737   global_step: 292101   average_q: 0.13854830731686793   average loss: 0.0017583690246933007
episode: 1814   score: 0.0   memory length: 292216   epsilon: 0.7820047000071771   global_step: 292216   average_q: 0.09849207874225534   average loss: 0.001047278576879762
episode: 1815   score: 4.0   memory length: 292513   epsilon: 0.781737400007186   global_step: 292513   average_q: 0.19473281206707

episode: 1858   score: 4.0   memory length: 300012   epsilon: 0.7749883000074081   global_step: 300012   average_q: 0.1866236783455323   average loss: 0.0012698452145920856
episode: 1859   score: 1.0   memory length: 300158   epsilon: 0.7748569000074125   global_step: 300158   average_q: 0.2055682223035048   average loss: 0.0017292783007564737
episode: 1860   score: 3.0   memory length: 300397   epsilon: 0.7746418000074196   global_step: 300397   average_q: 0.11725674125377603   average loss: 0.001948425290473127
episode: 1861   score: 2.0   memory length: 300574   epsilon: 0.7744825000074248   global_step: 300574   average_q: 0.22446991174908007   average loss: 0.0016773577982929434
episode: 1862   score: 0.0   memory length: 300688   epsilon: 0.7743799000074282   global_step: 300688   average_q: 0.09575539494031354   average loss: 0.0013651239484412752
episode: 1863   score: 2.0   memory length: 300861   epsilon: 0.7742242000074333   global_step: 300861   average_q: 0.201229512734564

episode: 1906   score: 1.0   memory length: 308551   epsilon: 0.7673032000076612   global_step: 308551   average_q: 0.13838058687317714   average loss: 0.0012221480216709527
episode: 1907   score: 2.0   memory length: 308731   epsilon: 0.7671412000076665   global_step: 308731   average_q: 0.20826487673653496   average loss: 0.0010276912138376954
episode: 1908   score: 0.0   memory length: 308841   epsilon: 0.7670422000076698   global_step: 308841   average_q: 0.09237836565483701   average loss: 0.0010486147527858107
episode: 1909   score: 2.0   memory length: 309039   epsilon: 0.7668640000076756   global_step: 309039   average_q: 0.2006365992316995   average loss: 0.0011215169458106782
episode: 1910   score: 0.0   memory length: 309156   epsilon: 0.7667587000076791   global_step: 309156   average_q: 0.0996456035436728   average loss: 0.0011670107825268179
episode: 1911   score: 2.0   memory length: 309360   epsilon: 0.7665751000076851   global_step: 309360   average_q: 0.20712802781924

episode: 1954   score: 1.0   memory length: 316604   epsilon: 0.7600555000078998   global_step: 316604   average_q: 0.2176657073199749   average loss: 0.0009884787239311014
episode: 1955   score: 3.0   memory length: 316824   epsilon: 0.7598575000079063   global_step: 316824   average_q: 0.24903659969568254   average loss: 0.0012660886092238468
episode: 1956   score: 0.0   memory length: 316950   epsilon: 0.75974410000791   global_step: 316950   average_q: 0.09560092645032066   average loss: 0.0013380833262335583
episode: 1957   score: 0.0   memory length: 317068   epsilon: 0.7596379000079135   global_step: 317068   average_q: 0.0989034913985406   average loss: 0.0015893439519721266
episode: 1958   score: 2.0   memory length: 317272   epsilon: 0.7594543000079196   global_step: 317272   average_q: 0.18710041897115753   average loss: 0.0016886708487850138
episode: 1959   score: 2.0   memory length: 317456   epsilon: 0.759288700007925   global_step: 317456   average_q: 0.17322051122217721

episode: 2002   score: 1.0   memory length: 324603   epsilon: 0.7528564000081368   global_step: 324603   average_q: 0.1607747479363924   average loss: 0.0015187861611443301
episode: 2003   score: 2.0   memory length: 324787   epsilon: 0.7526908000081423   global_step: 324787   average_q: 0.20838641826792256   average loss: 0.001336490010109391
episode: 2004   score: 2.0   memory length: 324987   epsilon: 0.7525108000081482   global_step: 324987   average_q: 0.13717365814372898   average loss: 0.0011086555238580332
episode: 2005   score: 1.0   memory length: 325129   epsilon: 0.7523830000081524   global_step: 325129   average_q: 0.1453725249624588   average loss: 0.0018129924194998776
episode: 2006   score: 3.0   memory length: 325365   epsilon: 0.7521706000081594   global_step: 325365   average_q: 0.20429102812846334   average loss: 0.0012291412856599998
episode: 2007   score: 4.0   memory length: 325622   epsilon: 0.751939300008167   global_step: 325622   average_q: 0.20167226344347  

episode: 2050   score: 6.0   memory length: 333604   epsilon: 0.7447555000084035   global_step: 333604   average_q: 0.12976875766867496   average loss: 0.0015416960819801533
episode: 2051   score: 2.0   memory length: 333807   epsilon: 0.7445728000084095   global_step: 333807   average_q: 0.1651121515854092   average loss: 0.0013233468772283195
episode: 2052   score: 3.0   memory length: 334042   epsilon: 0.7443613000084165   global_step: 334042   average_q: 0.12519874212114102   average loss: 0.0014325864428835129
episode: 2053   score: 2.0   memory length: 334240   epsilon: 0.7441831000084224   global_step: 334240   average_q: 0.13266693539164884   average loss: 0.0014808838081668424
episode: 2054   score: 2.0   memory length: 334428   epsilon: 0.7440139000084279   global_step: 334428   average_q: 0.20636394102760452   average loss: 0.001246270995812046
episode: 2055   score: 3.0   memory length: 334639   epsilon: 0.7438240000084342   global_step: 334639   average_q: 0.19959782275825

episode: 2098   score: 2.0   memory length: 341999   epsilon: 0.7372000000086523   global_step: 341999   average_q: 0.20873891284896268   average loss: 0.0016613497201534403
episode: 2099   score: 5.0   memory length: 342304   epsilon: 0.7369255000086613   global_step: 342304   average_q: 0.24161436686017473   average loss: 0.0016253041110042537
episode: 2100   score: 2.0   memory length: 342490   epsilon: 0.7367581000086668   global_step: 342490   average_q: 0.16219082713047023   average loss: 0.0015845847240510216
episode: 2101   score: 1.0   memory length: 342672   epsilon: 0.7365943000086722   global_step: 342672   average_q: 0.17597947597176164   average loss: 0.0015893012006907177
episode: 2102   score: 1.0   memory length: 342835   epsilon: 0.736447600008677   global_step: 342835   average_q: 0.19329794107770626   average loss: 0.0016487507885921907
episode: 2103   score: 0.0   memory length: 342963   epsilon: 0.7363324000086808   global_step: 342963   average_q: 0.0753071006038

episode: 2146   score: 1.0   memory length: 350696   epsilon: 0.72937270000891   global_step: 350696   average_q: 0.15977267714605914   average loss: 0.0016661692770213064
episode: 2147   score: 2.0   memory length: 350899   epsilon: 0.729190000008916   global_step: 350899   average_q: 0.14346682861117013   average loss: 0.00177809273632842
episode: 2148   score: 1.0   memory length: 351057   epsilon: 0.7290478000089207   global_step: 351057   average_q: 0.17933148397957976   average loss: 0.001753492726792605
episode: 2149   score: 1.0   memory length: 351192   epsilon: 0.7289263000089247   global_step: 351192   average_q: 0.1593066406229304   average loss: 0.0019006336024850262
episode: 2150   score: 1.0   memory length: 351327   epsilon: 0.7288048000089287   global_step: 351327   average_q: 0.13864012219839625   average loss: 0.0017960122805864862
episode: 2151   score: 1.0   memory length: 351481   epsilon: 0.7286662000089332   global_step: 351481   average_q: 0.1523764994624373   

episode: 2194   score: 2.0   memory length: 359731   epsilon: 0.7212412000091777   global_step: 359731   average_q: 0.18134100434075984   average loss: 0.0011513658483386633
episode: 2195   score: 2.0   memory length: 359908   epsilon: 0.7210819000091829   global_step: 359908   average_q: 0.24865920799600202   average loss: 0.00183155548845623
episode: 2196   score: 0.0   memory length: 360013   epsilon: 0.720987400009186   global_step: 360013   average_q: 0.11101208967821939   average loss: 0.0013029502635444736
episode: 2197   score: 2.0   memory length: 360202   epsilon: 0.7208173000091916   global_step: 360202   average_q: 0.21268630799931035   average loss: 0.0018951358875258526
episode: 2198   score: 0.0   memory length: 360326   epsilon: 0.7207057000091953   global_step: 360326   average_q: 0.1092747132023496   average loss: 0.0017564083256377584
episode: 2199   score: 1.0   memory length: 360455   epsilon: 0.7205896000091991   global_step: 360455   average_q: 0.1834479635509178

episode: 2242   score: 1.0   memory length: 368431   epsilon: 0.7134112000094355   global_step: 368431   average_q: 0.16824528470635414   average loss: 0.001206117276645576
episode: 2243   score: 1.0   memory length: 368573   epsilon: 0.7132834000094397   global_step: 368573   average_q: 0.17696417418216737   average loss: 0.0012260094633642388
episode: 2244   score: 5.0   memory length: 368894   epsilon: 0.7129945000094492   global_step: 368894   average_q: 0.2423855984740168   average loss: 0.001294710044984813
episode: 2245   score: 1.0   memory length: 369030   epsilon: 0.7128721000094532   global_step: 369030   average_q: 0.19311865093186498   average loss: 0.0011546701978873257
episode: 2246   score: 4.0   memory length: 369287   epsilon: 0.7126408000094608   global_step: 369287   average_q: 0.20018476668150972   average loss: 0.0016473198560603753
episode: 2247   score: 1.0   memory length: 369433   epsilon: 0.7125094000094652   global_step: 369433   average_q: 0.179404467931144

episode: 2290   score: 0.0   memory length: 378216   epsilon: 0.7046047000097254   global_step: 378216   average_q: 0.11720193693722326   average loss: 0.0014204047420923781
episode: 2291   score: 3.0   memory length: 378420   epsilon: 0.7044211000097315   global_step: 378420   average_q: 0.25593181540641713   average loss: 0.001564930259537148
episode: 2292   score: 2.0   memory length: 378589   epsilon: 0.7042690000097365   global_step: 378589   average_q: 0.23337722364702873   average loss: 0.001567310740766198
episode: 2293   score: 1.0   memory length: 378739   epsilon: 0.7041340000097409   global_step: 378739   average_q: 0.2189242657025655   average loss: 0.0012813297238123292
episode: 2294   score: 2.0   memory length: 378967   epsilon: 0.7039288000097477   global_step: 378967   average_q: 0.16079001651521316   average loss: 0.001676207458155675
episode: 2295   score: 1.0   memory length: 379119   epsilon: 0.7037920000097522   global_step: 379119   average_q: 0.1687340605758915

episode: 2338   score: 5.0   memory length: 388044   epsilon: 0.6957595000100166   global_step: 388044   average_q: 0.28900328080387827   average loss: 0.0015117914703012452
episode: 2339   score: 0.0   memory length: 388170   epsilon: 0.6956461000100204   global_step: 388170   average_q: 0.1101848931894416   average loss: 0.0014311626304882496
episode: 2340   score: 2.0   memory length: 388343   epsilon: 0.6954904000100255   global_step: 388343   average_q: 0.24334176886030015   average loss: 0.0012906826338616355
episode: 2341   score: 3.0   memory length: 388563   epsilon: 0.695292400010032   global_step: 388563   average_q: 0.3067216941240159   average loss: 0.0011597875008274887
episode: 2342   score: 1.0   memory length: 388699   epsilon: 0.695170000010036   global_step: 388699   average_q: 0.18533995070987763   average loss: 0.0013337481392655343
episode: 2343   score: 2.0   memory length: 388885   epsilon: 0.6950026000100415   global_step: 388885   average_q: 0.2548284142488433

episode: 2386   score: 5.0   memory length: 397353   epsilon: 0.6873814000102925   global_step: 397353   average_q: 0.23188323036412753   average loss: 0.0016664160160265358
episode: 2387   score: 1.0   memory length: 397482   epsilon: 0.6872653000102963   global_step: 397482   average_q: 0.23759514249341432   average loss: 0.001786596073395421
episode: 2388   score: 2.0   memory length: 397682   epsilon: 0.6870853000103022   global_step: 397682   average_q: 0.2706395445764065   average loss: 0.001587678577125189
episode: 2389   score: 1.0   memory length: 397828   epsilon: 0.6869539000103065   global_step: 397828   average_q: 0.1992816098595727   average loss: 0.0016340992078268805
episode: 2390   score: 1.0   memory length: 397994   epsilon: 0.6868045000103115   global_step: 397994   average_q: 0.130790283701506   average loss: 0.0016130433410493073
episode: 2391   score: 4.0   memory length: 398240   epsilon: 0.6865831000103187   global_step: 398240   average_q: 0.32517816983466225 

episode: 2434   score: 2.0   memory length: 400000   epsilon: 0.6786208000105809   global_step: 407087   average_q: 0.27780251202179107   average loss: 0.001498895306572385
episode: 2435   score: 2.0   memory length: 400000   epsilon: 0.6784435000105867   global_step: 407284   average_q: 0.24381935112367425   average loss: 0.0019599591016351025
episode: 2436   score: 2.0   memory length: 400000   epsilon: 0.6782770000105922   global_step: 407469   average_q: 0.2750474943150137   average loss: 0.0016395890780856732
episode: 2437   score: 1.0   memory length: 400000   epsilon: 0.678163600010596   global_step: 407595   average_q: 0.22430894711232258   average loss: 0.001743638664525601
episode: 2438   score: 5.0   memory length: 400000   epsilon: 0.6778981000106047   global_step: 407890   average_q: 0.2579803345567089   average loss: 0.0015513508656097702
episode: 2439   score: 3.0   memory length: 400000   epsilon: 0.6777019000106111   global_step: 408108   average_q: 0.2707976294390926 

episode: 2482   score: 2.0   memory length: 400000   epsilon: 0.6700699000108624   global_step: 416588   average_q: 0.23546692949082507   average loss: 0.0014123255664966756
episode: 2483   score: 3.0   memory length: 400000   epsilon: 0.6698611000108693   global_step: 416820   average_q: 0.2920999103868059   average loss: 0.0015453487234606592
episode: 2484   score: 2.0   memory length: 400000   epsilon: 0.6697081000108743   global_step: 416990   average_q: 0.26689853856120915   average loss: 0.0021029827810707504
episode: 2485   score: 5.0   memory length: 400000   epsilon: 0.6694336000108834   global_step: 417295   average_q: 0.2865467060784825   average loss: 0.0017191243191921443
episode: 2486   score: 5.0   memory length: 400000   epsilon: 0.6691636000108923   global_step: 417595   average_q: 0.3111435014009476   average loss: 0.0015563605014176573
episode: 2487   score: 2.0   memory length: 400000   epsilon: 0.6690115000108973   global_step: 417764   average_q: 0.279679044287585

episode: 2530   score: 0.0   memory length: 400000   epsilon: 0.6611185000111571   global_step: 426534   average_q: 0.1402305520011806   average loss: 0.0016987885139636038
episode: 2531   score: 3.0   memory length: 400000   epsilon: 0.6609241000111635   global_step: 426750   average_q: 0.2942449157126248   average loss: 0.0016333778963868368
episode: 2532   score: 4.0   memory length: 400000   epsilon: 0.6606721000111718   global_step: 427030   average_q: 0.3013868312950113   average loss: 0.001599423060231077
episode: 2533   score: 3.0   memory length: 400000   epsilon: 0.660454300011179   global_step: 427272   average_q: 0.21817355640694375   average loss: 0.00182562641302039
episode: 2534   score: 2.0   memory length: 400000   epsilon: 0.6603130000111836   global_step: 427429   average_q: 0.30417812931214927   average loss: 0.0018265567493593842
episode: 2535   score: 1.0   memory length: 400000   epsilon: 0.6601942000111876   global_step: 427561   average_q: 0.24314490911748374  

episode: 2578   score: 1.0   memory length: 400000   epsilon: 0.6523966000114443   global_step: 436225   average_q: 0.2995447470553663   average loss: 0.0014016802086562708
episode: 2579   score: 2.0   memory length: 400000   epsilon: 0.6522418000114494   global_step: 436397   average_q: 0.3167955790278177   average loss: 0.001691333543635136
episode: 2580   score: 3.0   memory length: 400000   epsilon: 0.6520645000114552   global_step: 436594   average_q: 0.35051035905625616   average loss: 0.0016815219076555979
episode: 2581   score: 1.0   memory length: 400000   epsilon: 0.6519349000114595   global_step: 436738   average_q: 0.3009960482724839   average loss: 0.00190798756981773
episode: 2582   score: 2.0   memory length: 400000   epsilon: 0.6517954000114641   global_step: 436893   average_q: 0.34002134807167517   average loss: 0.0019078738955525501
episode: 2583   score: 1.0   memory length: 400000   epsilon: 0.651675700011468   global_step: 437026   average_q: 0.2527571970638924   

episode: 2626   score: 2.0   memory length: 400000   epsilon: 0.6438601000117253   global_step: 445710   average_q: 0.32800803597970507   average loss: 0.0018846208146481522
episode: 2627   score: 6.0   memory length: 400000   epsilon: 0.6435424000117358   global_step: 446063   average_q: 0.32694232520560285   average loss: 0.0017751628659479584
episode: 2628   score: 0.0   memory length: 400000   epsilon: 0.6434551000117387   global_step: 446160   average_q: 0.20592085456418008   average loss: 0.002280907134451068
episode: 2629   score: 3.0   memory length: 400000   epsilon: 0.643264300011745   global_step: 446372   average_q: 0.3218721178237279   average loss: 0.0017754716048003964
episode: 2630   score: 3.0   memory length: 400000   epsilon: 0.6430834000117509   global_step: 446573   average_q: 0.3840800540438339   average loss: 0.001763882476964442
episode: 2631   score: 3.0   memory length: 400000   epsilon: 0.6428935000117572   global_step: 446784   average_q: 0.3469084213836498 

episode: 2674   score: 2.0   memory length: 400000   epsilon: 0.6353857000120043   global_step: 455126   average_q: 0.3641040606945047   average loss: 0.0018960184416837408
episode: 2675   score: 7.0   memory length: 400000   epsilon: 0.6350122000120166   global_step: 455541   average_q: 0.3373939306800624   average loss: 0.0017885812783350113
episode: 2676   score: 4.0   memory length: 400000   epsilon: 0.6347728000120245   global_step: 455807   average_q: 0.3435204939958744   average loss: 0.001741443216910978
episode: 2677   score: 3.0   memory length: 400000   epsilon: 0.6345982000120303   global_step: 456001   average_q: 0.3962806397184883   average loss: 0.0019140321486533533
episode: 2678   score: 0.0   memory length: 400000   epsilon: 0.6345127000120331   global_step: 456096   average_q: 0.18501727698664916   average loss: 0.002364253793769565
episode: 2679   score: 0.0   memory length: 400000   epsilon: 0.6344074000120365   global_step: 456213   average_q: 0.18393616626660028 

episode: 2722   score: 5.0   memory length: 400000   epsilon: 0.6260221000123126   global_step: 465530   average_q: 0.3470940265543522   average loss: 0.001906825527330048
episode: 2723   score: 3.0   memory length: 400000   epsilon: 0.6257944000123201   global_step: 465783   average_q: 0.35350153751288477   average loss: 0.002269444502305733
episode: 2724   score: 2.0   memory length: 400000   epsilon: 0.6256342000123254   global_step: 465961   average_q: 0.4026900381477696   average loss: 0.002475776608406796
episode: 2725   score: 2.0   memory length: 400000   epsilon: 0.6254857000123303   global_step: 466126   average_q: 0.36061345871650813   average loss: 0.002191448353661337
episode: 2726   score: 2.0   memory length: 400000   epsilon: 0.6253156000123359   global_step: 466315   average_q: 0.345297059566571   average loss: 0.0018844994822290366
episode: 2727   score: 5.0   memory length: 400000   epsilon: 0.6250537000123445   global_step: 466606   average_q: 0.388009687520794   av

episode: 2770   score: 2.0   memory length: 400000   epsilon: 0.6168241000126155   global_step: 475750   average_q: 0.40523340385998524   average loss: 0.0022986545188970883
episode: 2771   score: 3.0   memory length: 400000   epsilon: 0.6166315000126218   global_step: 475964   average_q: 0.42838376523735366   average loss: 0.002015163224798662
episode: 2772   score: 3.0   memory length: 400000   epsilon: 0.6164317000126284   global_step: 476186   average_q: 0.40228845006240915   average loss: 0.002152759487020179
episode: 2773   score: 4.0   memory length: 400000   epsilon: 0.6161878000126364   global_step: 476457   average_q: 0.4330405993833863   average loss: 0.002260887198497256
episode: 2774   score: 2.0   memory length: 400000   epsilon: 0.6160312000126416   global_step: 476631   average_q: 0.38840546279117977   average loss: 0.0021099410760457276
episode: 2775   score: 4.0   memory length: 400000   epsilon: 0.6157882000126496   global_step: 476901   average_q: 0.4428898145203237

episode: 2818   score: 2.0   memory length: 400000   epsilon: 0.6068566000129436   global_step: 486825   average_q: 0.36034229334405404   average loss: 0.0022015642366052805
episode: 2819   score: 4.0   memory length: 400000   epsilon: 0.6066244000129513   global_step: 487083   average_q: 0.4290913848654013   average loss: 0.002326992521326007
episode: 2820   score: 3.0   memory length: 400000   epsilon: 0.6064417000129573   global_step: 487286   average_q: 0.4801130488267159   average loss: 0.002444437395590885
episode: 2821   score: 3.0   memory length: 400000   epsilon: 0.606267100012963   global_step: 487480   average_q: 0.4390500341878109   average loss: 0.002165543106288563
episode: 2822   score: 3.0   memory length: 400000   epsilon: 0.6060664000129696   global_step: 487703   average_q: 0.4184519143716637   average loss: 0.00221299950042301
episode: 2823   score: 3.0   memory length: 400000   epsilon: 0.6058585000129765   global_step: 487934   average_q: 0.35935089819423566   av

episode: 2866   score: 1.0   memory length: 400000   epsilon: 0.5983867000132225   global_step: 496236   average_q: 0.35563339425842894   average loss: 0.0021598201800975226
episode: 2867   score: 1.0   memory length: 400000   epsilon: 0.5982769000132261   global_step: 496358   average_q: 0.3473713363627674   average loss: 0.0023068801259438767
episode: 2868   score: 8.0   memory length: 400000   epsilon: 0.5978809000132391   global_step: 496798   average_q: 0.5311007721518929   average loss: 0.0022780675582577134
episode: 2869   score: 4.0   memory length: 400000   epsilon: 0.5976262000132475   global_step: 497081   average_q: 0.40106161474167984   average loss: 0.002316122525371611
episode: 2870   score: 3.0   memory length: 400000   epsilon: 0.5974390000132537   global_step: 497289   average_q: 0.4297344352548512   average loss: 0.0023351457425283464
episode: 2871   score: 2.0   memory length: 400000   epsilon: 0.5972725000132592   global_step: 497474   average_q: 0.3928136692860642

episode: 2914   score: 4.0   memory length: 400000   epsilon: 0.5891230000135275   global_step: 506529   average_q: 0.4903248133513417   average loss: 0.0020165175377724713
episode: 2915   score: 6.0   memory length: 400000   epsilon: 0.5888278000135372   global_step: 506857   average_q: 0.4801775796946547   average loss: 0.00221313123318662
episode: 2916   score: 2.0   memory length: 400000   epsilon: 0.5886793000135421   global_step: 507022   average_q: 0.43336156228500783   average loss: 0.002541654729672397
episode: 2917   score: 7.0   memory length: 400000   epsilon: 0.588315700013554   global_step: 507426   average_q: 0.4820579801049858   average loss: 0.0022147168571908615
episode: 2918   score: 2.0   memory length: 400000   epsilon: 0.58813570001356   global_step: 507626   average_q: 0.4329496532678604   average loss: 0.002448145205125911
episode: 2919   score: 1.0   memory length: 400000   epsilon: 0.5880178000135639   global_step: 507757   average_q: 0.3939233003539666   aver

episode: 2962   score: 4.0   memory length: 400000   epsilon: 0.5798323000138333   global_step: 516852   average_q: 0.5301473053266942   average loss: 0.002488842163722315
episode: 2963   score: 4.0   memory length: 400000   epsilon: 0.5795974000138411   global_step: 517113   average_q: 0.5135146951823856   average loss: 0.002235119805985284
episode: 2964   score: 3.0   memory length: 400000   epsilon: 0.5793922000138478   global_step: 517341   average_q: 0.48399784127577095   average loss: 0.0024712248827869956
episode: 2965   score: 5.0   memory length: 400000   epsilon: 0.579114100013857   global_step: 517650   average_q: 0.46589603983950845   average loss: 0.002308328555083855
episode: 2966   score: 4.0   memory length: 400000   epsilon: 0.578869300013865   global_step: 517922   average_q: 0.479519398910377   average loss: 0.002501468494515661
episode: 2967   score: 4.0   memory length: 400000   epsilon: 0.5786488000138723   global_step: 518167   average_q: 0.4887051868499542   ave

episode: 3010   score: 2.0   memory length: 400000   epsilon: 0.5702545000141487   global_step: 527494   average_q: 0.46907045955290905   average loss: 0.0023512264100655827
episode: 3011   score: 6.0   memory length: 400000   epsilon: 0.569971000014158   global_step: 527809   average_q: 0.49210396400756307   average loss: 0.002363330381183279
episode: 3012   score: 2.0   memory length: 400000   epsilon: 0.5698162000141631   global_step: 527981   average_q: 0.4659589900700159   average loss: 0.002109192148477738
episode: 3013   score: 3.0   memory length: 400000   epsilon: 0.5696335000141691   global_step: 528184   average_q: 0.53583104709356   average loss: 0.002626764172719094
episode: 3014   score: 2.0   memory length: 400000   epsilon: 0.5694715000141745   global_step: 528364   average_q: 0.4519705242787798   average loss: 0.0022910855134897347
episode: 3015   score: 2.0   memory length: 400000   epsilon: 0.5693095000141798   global_step: 528544   average_q: 0.5080901246103976   av

episode: 3058   score: 1.0   memory length: 400000   epsilon: 0.5615857000144341   global_step: 537126   average_q: 0.4485527947847921   average loss: 0.002386918060985068
episode: 3059   score: 2.0   memory length: 400000   epsilon: 0.5614417000144388   global_step: 537286   average_q: 0.5334081914625131   average loss: 0.002601556133959093
episode: 3060   score: 2.0   memory length: 400000   epsilon: 0.5612950000144437   global_step: 537449   average_q: 0.4630739385022159   average loss: 0.0024794804384400556
episode: 3061   score: 4.0   memory length: 400000   epsilon: 0.5610601000144514   global_step: 537710   average_q: 0.5302209587575033   average loss: 0.0027199656547803675
episode: 3062   score: 3.0   memory length: 400000   epsilon: 0.5608351000144588   global_step: 537960   average_q: 0.502135883629322   average loss: 0.0024179440937004983
episode: 3063   score: 3.0   memory length: 400000   epsilon: 0.5606560000144647   global_step: 538159   average_q: 0.5161309889400724   a

episode: 3106   score: 5.0   memory length: 400000   epsilon: 0.5513014000147727   global_step: 548553   average_q: 0.5502453254401497   average loss: 0.0027483459011918713
episode: 3107   score: 3.0   memory length: 400000   epsilon: 0.5511133000147789   global_step: 548762   average_q: 0.5762488852883355   average loss: 0.0026706573001942352
episode: 3108   score: 6.0   memory length: 400000   epsilon: 0.5508100000147889   global_step: 549099   average_q: 0.5409435308607466   average loss: 0.002398108095307185
episode: 3109   score: 2.0   memory length: 400000   epsilon: 0.5506435000147943   global_step: 549284   average_q: 0.5247199172506462   average loss: 0.0022942856975164063
episode: 3110   score: 0.0   memory length: 400000   epsilon: 0.5505427000147977   global_step: 549396   average_q: 0.4015646356024912   average loss: 0.0022712994702617706
episode: 3111   score: 1.0   memory length: 400000   epsilon: 0.5504329000148013   global_step: 549518   average_q: 0.3962053151526412  

episode: 3154   score: 4.0   memory length: 400000   epsilon: 0.5412592000151033   global_step: 559711   average_q: 0.6229087035622327   average loss: 0.0028511217005284233
episode: 3155   score: 5.0   memory length: 400000   epsilon: 0.5409541000151133   global_step: 560050   average_q: 0.6004488191245931   average loss: 0.003125676046215564
episode: 3156   score: 6.0   memory length: 400000   epsilon: 0.5406589000151231   global_step: 560378   average_q: 0.5264505025271963   average loss: 0.003665552663555789
episode: 3157   score: 0.0   memory length: 400000   epsilon: 0.5405716000151259   global_step: 560475   average_q: 0.30991740079270197   average loss: 0.003301972522731563
episode: 3158   score: 7.0   memory length: 400000   epsilon: 0.5401792000151389   global_step: 560911   average_q: 0.6328247311533591   average loss: 0.0036991765755207337
episode: 3159   score: 2.0   memory length: 400000   epsilon: 0.540022600015144   global_step: 561085   average_q: 0.5060862033445944   a

episode: 3202   score: 3.0   memory length: 400000   epsilon: 0.531365500015429   global_step: 570704   average_q: 0.5503632954094145   average loss: 0.004031145240490635
episode: 3203   score: 3.0   memory length: 400000   epsilon: 0.531182800015435   global_step: 570907   average_q: 0.6133869239335576   average loss: 0.0037488323261300697
episode: 3204   score: 1.0   memory length: 400000   epsilon: 0.5310586000154391   global_step: 571045   average_q: 0.44770729768535367   average loss: 0.004049571647740685
episode: 3205   score: 2.0   memory length: 400000   epsilon: 0.5308939000154446   global_step: 571228   average_q: 0.5327161885187274   average loss: 0.003722864228282938
episode: 3206   score: 2.0   memory length: 400000   epsilon: 0.5307454000154495   global_step: 571393   average_q: 0.5206539606506174   average loss: 0.003815441970558216
episode: 3207   score: 5.0   memory length: 400000   epsilon: 0.5304412000154595   global_step: 571731   average_q: 0.6304356102671849   ave

episode: 3250   score: 4.0   memory length: 400000   epsilon: 0.5206609000157815   global_step: 582598   average_q: 0.6016198915875509   average loss: 0.003715056460595322
episode: 3251   score: 2.0   memory length: 400000   epsilon: 0.5205025000157867   global_step: 582774   average_q: 0.5156638079153543   average loss: 0.0037537094914031595
episode: 3252   score: 3.0   memory length: 400000   epsilon: 0.5203216000157926   global_step: 582975   average_q: 0.4969827873419173   average loss: 0.0040231724628780165
episode: 3253   score: 1.0   memory length: 400000   epsilon: 0.5201929000157969   global_step: 583118   average_q: 0.445605319019381   average loss: 0.004198285611020727
episode: 3254   score: 10.0   memory length: 400000   epsilon: 0.5197501000158115   global_step: 583610   average_q: 0.5824437598447975   average loss: 0.0038592211149227843
episode: 3255   score: 3.0   memory length: 400000   epsilon: 0.5195539000158179   global_step: 583828   average_q: 0.6480023215670104   

episode: 3298   score: 3.0   memory length: 400000   epsilon: 0.5104297000161183   global_step: 593966   average_q: 0.5052039096334958   average loss: 0.004267411984302439
episode: 3299   score: 4.0   memory length: 400000   epsilon: 0.5101984000161259   global_step: 594223   average_q: 0.4640710214913935   average loss: 0.004226473471283884
episode: 3300   score: 4.0   memory length: 400000   epsilon: 0.5099554000161339   global_step: 594493   average_q: 0.40462265033964756   average loss: 0.004256071801797522
episode: 3301   score: 2.0   memory length: 400000   epsilon: 0.5097934000161393   global_step: 594673   average_q: 0.5287286076694727   average loss: 0.003978520278987061
episode: 3302   score: 6.0   memory length: 400000   epsilon: 0.5094820000161495   global_step: 595019   average_q: 0.5728169915217884   average loss: 0.004161286250233241
episode: 3303   score: 6.0   memory length: 400000   epsilon: 0.5092066000161586   global_step: 595325   average_q: 0.5243252593463932   av

episode: 3346   score: 3.0   memory length: 400000   epsilon: 0.4989853000164325   global_step: 606682   average_q: 0.5362783341075886   average loss: 0.004211599698332562
episode: 3347   score: 3.0   memory length: 400000   epsilon: 0.49878640001642677   global_step: 606903   average_q: 0.6206664413095734   average loss: 0.003953816832203368
episode: 3348   score: 6.0   memory length: 400000   epsilon: 0.49848040001641797   global_step: 607243   average_q: 0.6837581015246756   average loss: 0.004060805330450153
episode: 3349   score: 9.0   memory length: 400000   epsilon: 0.4980844000164066   global_step: 607683   average_q: 0.555024043140425   average loss: 0.0041305523192435925
episode: 3350   score: 4.0   memory length: 400000   epsilon: 0.49785670001640003   global_step: 607936   average_q: 0.5910099621111226   average loss: 0.004563297624304051
episode: 3351   score: 6.0   memory length: 400000   epsilon: 0.4975678000163917   global_step: 608257   average_q: 0.41593908173876387  

episode: 3394   score: 7.0   memory length: 400000   epsilon: 0.48652390001607415   global_step: 620528   average_q: 0.5472627418669495   average loss: 0.004758652013323798
episode: 3395   score: 3.0   memory length: 400000   epsilon: 0.4863160000160682   global_step: 620759   average_q: 0.5856904904499198   average loss: 0.004584098370590557
episode: 3396   score: 2.0   memory length: 400000   epsilon: 0.4861639000160638   global_step: 620928   average_q: 0.6413033478535138   average loss: 0.005971150433495861
episode: 3397   score: 2.0   memory length: 400000   epsilon: 0.4860217000160597   global_step: 621086   average_q: 0.5431117263233548   average loss: 0.004961971700474431
episode: 3398   score: 8.0   memory length: 400000   epsilon: 0.4856590000160493   global_step: 621489   average_q: 0.619167247163155   average loss: 0.0051434652341954944
episode: 3399   score: 3.0   memory length: 400000   epsilon: 0.48542140001604245   global_step: 621753   average_q: 0.6673285011766534   a

episode: 3442   score: 4.0   memory length: 400000   epsilon: 0.474277600015722   global_step: 634135   average_q: 0.7597010140146675   average loss: 0.004815459928569657
episode: 3443   score: 6.0   memory length: 400000   epsilon: 0.47396710001571307   global_step: 634480   average_q: 0.5855019522533901   average loss: 0.004976265115892865
episode: 3444   score: 5.0   memory length: 400000   epsilon: 0.47370160001570544   global_step: 634775   average_q: 0.5379848286509514   average loss: 0.005334458442136519
episode: 3445   score: 7.0   memory length: 400000   epsilon: 0.473373100015696   global_step: 635140   average_q: 0.7864009350946505   average loss: 0.004816227917533929
episode: 3446   score: 5.0   memory length: 400000   epsilon: 0.4731274000156889   global_step: 635413   average_q: 0.5887851335872443   average loss: 0.004930111716267731
episode: 3447   score: 4.0   memory length: 400000   epsilon: 0.4729006000156824   global_step: 635665   average_q: 0.6278707193772471   ave

episode: 3490   score: 12.0   memory length: 400000   epsilon: 0.46201780001536946   global_step: 647757   average_q: 0.6962883157492095   average loss: 0.004830091118210648
episode: 3491   score: 6.0   memory length: 400000   epsilon: 0.4617136000153607   global_step: 648095   average_q: 0.7661986071506196   average loss: 0.005310932411977569
episode: 3492   score: 5.0   memory length: 400000   epsilon: 0.46143730001535277   global_step: 648402   average_q: 0.8016181742523703   average loss: 0.004896226259219955
episode: 3493   score: 8.0   memory length: 400000   epsilon: 0.4610656000153421   global_step: 648815   average_q: 0.7699618272429228   average loss: 0.004879678316531617
episode: 3494   score: 5.0   memory length: 400000   epsilon: 0.4608082000153347   global_step: 649101   average_q: 0.7700913688102802   average loss: 0.004939471579472684
episode: 3495   score: 7.0   memory length: 400000   epsilon: 0.46044280001532417   global_step: 649507   average_q: 0.6211416405703634  

episode: 3538   score: 3.0   memory length: 400000   epsilon: 0.4480993000149692   global_step: 663222   average_q: 0.5848291400981986   average loss: 0.004751705251249444
episode: 3539   score: 3.0   memory length: 400000   epsilon: 0.4479031000149636   global_step: 663440   average_q: 0.6916903578633562   average loss: 0.004496587080445243
episode: 3540   score: 3.0   memory length: 400000   epsilon: 0.44770780001495797   global_step: 663657   average_q: 0.6849313420222102   average loss: 0.004446953827459451
episode: 3541   score: 5.0   memory length: 400000   epsilon: 0.44746030001495085   global_step: 663932   average_q: 0.5963263010436838   average loss: 0.004529328950765458
episode: 3542   score: 7.0   memory length: 400000   epsilon: 0.44712640001494125   global_step: 664303   average_q: 0.6439367872524776   average loss: 0.004598203270490259
episode: 3543   score: 10.0   memory length: 400000   epsilon: 0.44666110001492787   global_step: 664820   average_q: 0.7464594802381452 

episode: 3586   score: 7.0   memory length: 400000   epsilon: 0.4348522000145883   global_step: 677941   average_q: 0.7507104533670963   average loss: 0.0043445424975937065
episode: 3587   score: 9.0   memory length: 400000   epsilon: 0.4344490000145767   global_step: 678389   average_q: 0.6191039202468736   average loss: 0.0044022772767026
episode: 3588   score: 5.0   memory length: 400000   epsilon: 0.43418350001456907   global_step: 678684   average_q: 0.7536806097475149   average loss: 0.004323410278707111
episode: 3589   score: 7.0   memory length: 400000   epsilon: 0.43382440001455874   global_step: 679083   average_q: 0.684750905610565   average loss: 0.004762460490633829
episode: 3590   score: 6.0   memory length: 400000   epsilon: 0.43352020001455   global_step: 679421   average_q: 0.6668262930928602   average loss: 0.004606916510276827
episode: 3591   score: 10.0   memory length: 400000   epsilon: 0.4330999000145379   global_step: 679888   average_q: 0.5978894107665954   aver

episode: 3634   score: 8.0   memory length: 400000   epsilon: 0.4203298000141707   global_step: 694077   average_q: 0.863464389636059   average loss: 0.005433981378581563
episode: 3635   score: 4.0   memory length: 400000   epsilon: 0.42010570001416425   global_step: 694326   average_q: 0.8464249159437586   average loss: 0.0046736284602811205
episode: 3636   score: 5.0   memory length: 400000   epsilon: 0.41984470001415675   global_step: 694616   average_q: 0.8833382821288602   average loss: 0.005219155352097005
episode: 3637   score: 3.0   memory length: 400000   epsilon: 0.41966020001415144   global_step: 694821   average_q: 0.8476093802510238   average loss: 0.005208385274064069
episode: 3638   score: 4.0   memory length: 400000   epsilon: 0.4194253000141447   global_step: 695082   average_q: 0.6535630368866683   average loss: 0.005499031348555381
episode: 3639   score: 7.0   memory length: 400000   epsilon: 0.41907250001413454   global_step: 695474   average_q: 0.6954887954100054  

episode: 3682   score: 6.0   memory length: 400000   epsilon: 0.4066534000137774   global_step: 709273   average_q: 0.6930569999747807   average loss: 0.0049806350450371465
episode: 3683   score: 4.0   memory length: 400000   epsilon: 0.40643560001377116   global_step: 709515   average_q: 0.7478797037985699   average loss: 0.005466390726799222
episode: 3684   score: 4.0   memory length: 400000   epsilon: 0.4061701000137635   global_step: 709810   average_q: 0.9350731413243181   average loss: 0.005054503721698045
episode: 3685   score: 2.0   memory length: 400000   epsilon: 0.4060189000137592   global_step: 709978   average_q: 0.7486894810128779   average loss: 0.0049529127940158586
episode: 3686   score: 8.0   memory length: 400000   epsilon: 0.4056517000137486   global_step: 710386   average_q: 0.7877211876797909   average loss: 0.005547408688917309
episode: 3687   score: 2.0   memory length: 400000   epsilon: 0.40550050001374427   global_step: 710554   average_q: 0.7529038542083332  

episode: 3730   score: 5.0   memory length: 400000   epsilon: 0.3937330000134059   global_step: 723629   average_q: 0.8683984405344183   average loss: 0.004833787664009089
episode: 3731   score: 7.0   memory length: 400000   epsilon: 0.39342160001339693   global_step: 723975   average_q: 0.8971436534486065   average loss: 0.005017482327711849
episode: 3732   score: 5.0   memory length: 400000   epsilon: 0.3931696000133897   global_step: 724255   average_q: 0.9480665447456497   average loss: 0.005180561533779837
episode: 3733   score: 3.0   memory length: 400000   epsilon: 0.3929797000133842   global_step: 724466   average_q: 0.8143021847117003   average loss: 0.005296763936410795
episode: 3734   score: 5.0   memory length: 400000   epsilon: 0.39273310001337713   global_step: 724740   average_q: 0.9211625886442018   average loss: 0.0055879019772256625
episode: 3735   score: 3.0   memory length: 400000   epsilon: 0.39256390001337227   global_step: 724928   average_q: 0.8901582196988957  

episode: 3778   score: 9.0   memory length: 400000   epsilon: 0.3806164000130287   global_step: 738203   average_q: 0.8802596500840797   average loss: 0.0046875399105102975
episode: 3779   score: 6.0   memory length: 400000   epsilon: 0.3803311000130205   global_step: 738520   average_q: 0.9789104600810102   average loss: 0.005050062122409546
episode: 3780   score: 9.0   memory length: 400000   epsilon: 0.3799135000130085   global_step: 738984   average_q: 0.9267024509608746   average loss: 0.004670692160038141
episode: 3781   score: 3.0   memory length: 400000   epsilon: 0.3796948000130022   global_step: 739227   average_q: 0.9488430154421692   average loss: 0.004971895069161759
episode: 3782   score: 3.0   memory length: 400000   epsilon: 0.37949860001299657   global_step: 739445   average_q: 0.8709862271853543   average loss: 0.005232977479538205
episode: 3783   score: 4.0   memory length: 400000   epsilon: 0.3792664000129899   global_step: 739703   average_q: 0.7602581959362178   a

episode: 3826   score: 4.0   memory length: 400000   epsilon: 0.3681559000126704   global_step: 752048   average_q: 0.8011109522680049   average loss: 0.004858881941206915
episode: 3827   score: 8.0   memory length: 400000   epsilon: 0.36780310001266026   global_step: 752440   average_q: 1.024438168321337   average loss: 0.004760313432097045
episode: 3828   score: 3.0   memory length: 400000   epsilon: 0.3676231000126551   global_step: 752640   average_q: 0.9398745432496071   average loss: 0.004723042124533094
episode: 3829   score: 3.0   memory length: 400000   epsilon: 0.36743410001264964   global_step: 752850   average_q: 0.8041626457657133   average loss: 0.004462418092935834
episode: 3830   score: 12.0   memory length: 400000   epsilon: 0.3669292000126351   global_step: 753411   average_q: 0.9725742596143495   average loss: 0.0048832977553763144
episode: 3831   score: 4.0   memory length: 400000   epsilon: 0.3666988000126285   global_step: 753667   average_q: 0.8576982090016827   

episode: 3874   score: 3.0   memory length: 400000   epsilon: 0.35606980001232286   global_step: 765477   average_q: 0.9280914533976934   average loss: 0.0051613631120054565
episode: 3875   score: 3.0   memory length: 400000   epsilon: 0.3558844000123175   global_step: 765683   average_q: 0.7749769459071668   average loss: 0.005225053737765105
episode: 3876   score: 3.0   memory length: 400000   epsilon: 0.3557053000123124   global_step: 765882   average_q: 0.8541905502578122   average loss: 0.004755792583586957
episode: 3877   score: 7.0   memory length: 400000   epsilon: 0.35540830001230383   global_step: 766212   average_q: 0.9369689955855861   average loss: 0.005313494489140188
episode: 3878   score: 3.0   memory length: 400000   epsilon: 0.35523100001229874   global_step: 766409   average_q: 0.9459142456502478   average loss: 0.005066721163853189
episode: 3879   score: 6.0   memory length: 400000   epsilon: 0.35495380001229077   global_step: 766717   average_q: 0.9650469931302132 

episode: 3922   score: 5.0   memory length: 400000   epsilon: 0.34378660001196965   global_step: 779125   average_q: 0.8493185133900322   average loss: 0.004488895963359124
episode: 3923   score: 2.0   memory length: 400000   epsilon: 0.34364440001196556   global_step: 779283   average_q: 0.8152566865275178   average loss: 0.005412041125273851
episode: 3924   score: 6.0   memory length: 400000   epsilon: 0.34334920001195707   global_step: 779611   average_q: 0.9518165954547685   average loss: 0.004943710250322844
episode: 3925   score: 6.0   memory length: 400000   epsilon: 0.34305670001194866   global_step: 779936   average_q: 0.8899076846012702   average loss: 0.004946718314089454
episode: 3926   score: 7.0   memory length: 400000   epsilon: 0.3427480000119398   global_step: 780279   average_q: 0.9244767724598809   average loss: 0.005304962331795679
episode: 3927   score: 9.0   memory length: 400000   epsilon: 0.3423349000119279   global_step: 780738   average_q: 0.9919578332397153  

episode: 3970   score: 10.0   memory length: 400000   epsilon: 0.3299788000115726   global_step: 794467   average_q: 0.9553719303889838   average loss: 0.004494198470995603
episode: 3971   score: 7.0   memory length: 400000   epsilon: 0.3296386000115628   global_step: 794845   average_q: 1.0039211498208778   average loss: 0.004527400700016952
episode: 3972   score: 4.0   memory length: 400000   epsilon: 0.32943160001155686   global_step: 795075   average_q: 0.8903547540954921   average loss: 0.004615126909060485
episode: 3973   score: 3.0   memory length: 400000   epsilon: 0.3292417000115514   global_step: 795286   average_q: 0.8019446428353187   average loss: 0.004333229190383053
episode: 3974   score: 8.0   memory length: 400000   epsilon: 0.3288520000115402   global_step: 795719   average_q: 0.9949692320465766   average loss: 0.004658753722615747
episode: 3975   score: 3.0   memory length: 400000   epsilon: 0.3286540000115345   global_step: 795939   average_q: 0.7190613137727434   a

episode: 4018   score: 5.0   memory length: 400000   epsilon: 0.3163366000111803   global_step: 809625   average_q: 0.8904862812710153   average loss: 0.004320645639723308
episode: 4019   score: 8.0   memory length: 400000   epsilon: 0.3159685000111697   global_step: 810034   average_q: 0.8410807396205539   average loss: 0.004489627354385554
episode: 4020   score: 5.0   memory length: 400000   epsilon: 0.3157138000111624   global_step: 810317   average_q: 0.814442840144828   average loss: 0.004490267705869969
episode: 4021   score: 6.0   memory length: 400000   epsilon: 0.31543480001115437   global_step: 810627   average_q: 0.7880185736763862   average loss: 0.004268035440022246
episode: 4022   score: 6.0   memory length: 400000   epsilon: 0.31514950001114617   global_step: 810944   average_q: 0.9355700402417769   average loss: 0.0046003576208710996
episode: 4023   score: 2.0   memory length: 400000   epsilon: 0.3150073000111421   global_step: 811102   average_q: 0.8109622086527981   a

episode: 4066   score: 11.0   memory length: 400000   epsilon: 0.30231280001077704   global_step: 825207   average_q: 0.7667500669133149   average loss: 0.004130029455692233
episode: 4067   score: 7.0   memory length: 400000   epsilon: 0.30195190001076666   global_step: 825608   average_q: 0.9251870183724715   average loss: 0.004020318943000354
episode: 4068   score: 3.0   memory length: 400000   epsilon: 0.3017647000107613   global_step: 825816   average_q: 0.7979174595899307   average loss: 0.0037855489849439785
episode: 4069   score: 4.0   memory length: 400000   epsilon: 0.3015082000107539   global_step: 826101   average_q: 0.9793471703403874   average loss: 0.004159517729810129
episode: 4070   score: 8.0   memory length: 400000   epsilon: 0.30116170001074394   global_step: 826486   average_q: 0.907546781719505   average loss: 0.0036759276729222243
episode: 4071   score: 4.0   memory length: 400000   epsilon: 0.30093220001073734   global_step: 826741   average_q: 0.9731757934186973

episode: 4114   score: 4.0   memory length: 400000   epsilon: 0.2890522000103957   global_step: 839941   average_q: 0.7092669132080945   average loss: 0.00407876296657761
episode: 4115   score: 2.0   memory length: 400000   epsilon: 0.2889055000103915   global_step: 840104   average_q: 0.8085999461405117   average loss: 0.003829749666034398
episode: 4116   score: 2.0   memory length: 400000   epsilon: 0.28876060001038734   global_step: 840265   average_q: 0.8205623463814303   average loss: 0.004106879063667903
episode: 4117   score: 6.0   memory length: 400000   epsilon: 0.2884537000103785   global_step: 840606   average_q: 0.8790835335282636   average loss: 0.0041739521216999885
episode: 4118   score: 11.0   memory length: 400000   epsilon: 0.28797490001036474   global_step: 841138   average_q: 0.9824897186424499   average loss: 0.0039836702300708295
episode: 4119   score: 4.0   memory length: 400000   epsilon: 0.287740000010358   global_step: 841399   average_q: 0.8988962211143011   

episode: 4162   score: 11.0   memory length: 400000   epsilon: 0.2736811000099537   global_step: 857020   average_q: 0.705667606185353   average loss: 0.003942195767382463
episode: 4163   score: 6.0   memory length: 400000   epsilon: 0.273412000009946   global_step: 857319   average_q: 0.9892826729195573   average loss: 0.0038490817316786004
episode: 4164   score: 9.0   memory length: 400000   epsilon: 0.2730241000099348   global_step: 857750   average_q: 0.9433606892755027   average loss: 0.003797235627082801
episode: 4165   score: 10.0   memory length: 400000   epsilon: 0.2725984000099226   global_step: 858223   average_q: 0.9045640328469791   average loss: 0.004052259138343269
episode: 4166   score: 8.0   memory length: 400000   epsilon: 0.27224920000991254   global_step: 858611   average_q: 0.9487559302258737   average loss: 0.004062209768309594
episode: 4167   score: 4.0   memory length: 400000   epsilon: 0.27201970000990594   global_step: 858866   average_q: 0.8496026975267074   

episode: 4210   score: 11.0   memory length: 400000   epsilon: 0.2590327000095325   global_step: 873296   average_q: 0.9923239178576712   average loss: 0.004299847623708232
episode: 4211   score: 5.0   memory length: 400000   epsilon: 0.25877350000952504   global_step: 873584   average_q: 0.8863440174609423   average loss: 0.004443628075831738
episode: 4212   score: 11.0   memory length: 400000   epsilon: 0.2582785000095108   global_step: 874134   average_q: 0.8677274952151559   average loss: 0.004230724546465684
episode: 4213   score: 5.0   memory length: 400000   epsilon: 0.25799770000950273   global_step: 874446   average_q: 0.8031938804838902   average loss: 0.004350072291391627
episode: 4214   score: 8.0   memory length: 400000   epsilon: 0.25765840000949297   global_step: 874823   average_q: 0.9658092269846869   average loss: 0.00393576857548044
episode: 4215   score: 3.0   memory length: 400000   epsilon: 0.25746940000948754   global_step: 875033   average_q: 0.8337291091680527 

episode: 4258   score: 8.0   memory length: 400000   epsilon: 0.24317290000928699   global_step: 890918   average_q: 0.9355437112444976   average loss: 0.004634337951121676
episode: 4259   score: 7.0   memory length: 400000   epsilon: 0.2428741000092876   global_step: 891250   average_q: 0.9381990942610315   average loss: 0.004208284065881969
episode: 4260   score: 6.0   memory length: 400000   epsilon: 0.24257260000928824   global_step: 891585   average_q: 1.0254663937127413   average loss: 0.004581556697515076
episode: 4261   score: 16.0   memory length: 400000   epsilon: 0.24191110000928961   global_step: 892320   average_q: 0.8763963586213638   average loss: 0.0039767727637536774
episode: 4262   score: 6.0   memory length: 400000   epsilon: 0.24162310000929021   global_step: 892640   average_q: 0.935360564943403   average loss: 0.004131355065146636
episode: 4263   score: 10.0   memory length: 400000   epsilon: 0.24118840000929112   global_step: 893123   average_q: 0.938964565718396

episode: 4306   score: 6.0   memory length: 400000   epsilon: 0.22543480000932395   global_step: 910627   average_q: 0.8402277224697172   average loss: 0.004765707560727606
episode: 4307   score: 10.0   memory length: 400000   epsilon: 0.2250226000093248   global_step: 911085   average_q: 0.8984941390534156   average loss: 0.004597044321118679
episode: 4308   score: 12.0   memory length: 400000   epsilon: 0.22457170000932575   global_step: 911586   average_q: 0.9886176884769203   average loss: 0.004443371494648283
episode: 4309   score: 10.0   memory length: 400000   epsilon: 0.2241109000093267   global_step: 912098   average_q: 0.8318630225840025   average loss: 0.004461523483541896
episode: 4310   score: 10.0   memory length: 400000   epsilon: 0.22367170000932762   global_step: 912586   average_q: 0.9599066352868666   average loss: 0.004220494680220193
episode: 4311   score: 6.0   memory length: 400000   epsilon: 0.22336030000932827   global_step: 912932   average_q: 1.04435086956603

episode: 4354   score: 12.0   memory length: 400000   epsilon: 0.20632690000936377   global_step: 931858   average_q: 0.8685920064600324   average loss: 0.004534806946923531
episode: 4355   score: 8.0   memory length: 400000   epsilon: 0.2059750000093645   global_step: 932249   average_q: 0.9899920458378999   average loss: 0.004497048951794996
episode: 4356   score: 5.0   memory length: 400000   epsilon: 0.20570320000936507   global_step: 932551   average_q: 1.0667263323700191   average loss: 0.004879045285537395
episode: 4357   score: 7.0   memory length: 400000   epsilon: 0.20536930000936576   global_step: 932922   average_q: 0.9122408180223963   average loss: 0.004784622063014325
episode: 4358   score: 6.0   memory length: 400000   epsilon: 0.2051092000093663   global_step: 933211   average_q: 0.7746190656633938   average loss: 0.004509794536476897
episode: 4359   score: 14.0   memory length: 400000   epsilon: 0.20461150000936734   global_step: 933764   average_q: 0.9057702081813519

episode: 4402   score: 8.0   memory length: 400000   epsilon: 0.18701110000940402   global_step: 953320   average_q: 0.9590670498212178   average loss: 0.004905911792845776
episode: 4403   score: 8.0   memory length: 400000   epsilon: 0.1866313000094048   global_step: 953742   average_q: 1.0738989860525627   average loss: 0.004724000649299402
episode: 4404   score: 12.0   memory length: 400000   epsilon: 0.18619480000940572   global_step: 954227   average_q: 0.8130244540492284   average loss: 0.0047661638344618845
episode: 4405   score: 13.0   memory length: 400000   epsilon: 0.1856782000094068   global_step: 954801   average_q: 1.0125281942638371   average loss: 0.004871891106913703
episode: 4406   score: 10.0   memory length: 400000   epsilon: 0.18525250000940768   global_step: 955274   average_q: 1.0112153051960544   average loss: 0.004657637580183687
episode: 4407   score: 8.0   memory length: 400000   epsilon: 0.1849042000094084   global_step: 955661   average_q: 1.038625904139930

episode: 4450   score: 8.0   memory length: 400000   epsilon: 0.16778620000944408   global_step: 974681   average_q: 0.9513613701203448   average loss: 0.004840504729957188
episode: 4451   score: 4.0   memory length: 400000   epsilon: 0.1675846000094445   global_step: 974905   average_q: 0.8890428337534624   average loss: 0.004866315858505134
episode: 4452   score: 8.0   memory length: 400000   epsilon: 0.1672426000094452   global_step: 975285   average_q: 1.1004562256367583   average loss: 0.004530730637213166
episode: 4453   score: 11.0   memory length: 400000   epsilon: 0.16679890000944614   global_step: 975778   average_q: 1.0152080171490296   average loss: 0.004353985359229178
episode: 4454   score: 11.0   memory length: 400000   epsilon: 0.16634440000944709   global_step: 976283   average_q: 1.1025406335249985   average loss: 0.0048044526226827785
episode: 4455   score: 9.0   memory length: 400000   epsilon: 0.165899800009448   global_step: 976777   average_q: 0.9993614994200618 

episode: 4498   score: 5.0   memory length: 400000   epsilon: 0.14854960000948417   global_step: 996055   average_q: 1.0345973944576987   average loss: 0.004495999016365662
episode: 4499   score: 8.0   memory length: 400000   epsilon: 0.14820760000948488   global_step: 996435   average_q: 1.11950386127359   average loss: 0.0046707113800374305
episode: 4500   score: 9.0   memory length: 400000   epsilon: 0.14783050000948567   global_step: 996854   average_q: 1.0402265703905738   average loss: 0.004503253901400659
episode: 4501   score: 10.0   memory length: 400000   epsilon: 0.14739490000948657   global_step: 997338   average_q: 1.0183085214254284   average loss: 0.004728358147895803
episode: 4502   score: 11.0   memory length: 400000   epsilon: 0.14692600000948755   global_step: 997859   average_q: 0.9116589582584184   average loss: 0.004638688849962733
episode: 4503   score: 14.0   memory length: 400000   epsilon: 0.14636620000948872   global_step: 998481   average_q: 0.88540172658357

episode: 4546   score: 12.0   memory length: 400000   epsilon: 0.12740860000952822   global_step: 1019545   average_q: 1.0576517963992025   average loss: 0.004981376001824926
episode: 4547   score: 11.0   memory length: 400000   epsilon: 0.1269919000095291   global_step: 1020008   average_q: 0.8990251878296582   average loss: 0.004704366359391026
episode: 4548   score: 15.0   memory length: 400000   epsilon: 0.12645100000953022   global_step: 1020609   average_q: 1.0606482277396514   average loss: 0.005137463730175712
episode: 4549   score: 8.0   memory length: 400000   epsilon: 0.1261279000095309   global_step: 1020968   average_q: 1.0002533168347765   average loss: 0.004861933527959246
episode: 4550   score: 14.0   memory length: 400000   epsilon: 0.12561580000953196   global_step: 1021537   average_q: 1.075709759927173   average loss: 0.00473267684063771
episode: 4551   score: 12.0   memory length: 400000   epsilon: 0.1251685000095329   global_step: 1022034   average_q: 1.0025324512

episode: 4593   score: 9.0   memory length: 400000   epsilon: 0.106878700009571   global_step: 1042356   average_q: 1.1905359986882944   average loss: 0.004887367473342097
episode: 4594   score: 12.0   memory length: 400000   epsilon: 0.10646470000957187   global_step: 1042816   average_q: 0.9971367295669473   average loss: 0.004873559269522641
episode: 4595   score: 4.0   memory length: 400000   epsilon: 0.10624330000957233   global_step: 1043062   average_q: 0.9963632333205967   average loss: 0.004830403186237
episode: 4596   score: 10.0   memory length: 400000   epsilon: 0.10585270000957314   global_step: 1043496   average_q: 1.15354008244754   average loss: 0.005069689595028929
episode: 4597   score: 8.0   memory length: 400000   epsilon: 0.10549360000957389   global_step: 1043895   average_q: 0.9670691171236205   average loss: 0.004290099229627431
episode: 4598   score: 6.0   memory length: 400000   epsilon: 0.1051993000095745   global_step: 1044222   average_q: 0.9704614839878286

episode: 4641   score: 12.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1063858   average_q: 0.9144366709815879   average loss: 0.0048655122774371486
episode: 4642   score: 17.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1064517   average_q: 1.0477161401591641   average loss: 0.004669864828046835
episode: 4643   score: 8.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1064894   average_q: 1.0689815742899949   average loss: 0.0049700741297923606
episode: 4644   score: 15.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1065496   average_q: 0.983532011162403   average loss: 0.004960741255814488
episode: 4645   score: 11.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1065986   average_q: 1.063934303759312   average loss: 0.004703220027042743
episode: 4646   score: 9.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1066430   average_q: 1.132838

episode: 4688   score: 14.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1090382   average_q: 1.05621420601325   average loss: 0.005363421295221782
episode: 4689   score: 12.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1090879   average_q: 1.014286204363019   average loss: 0.005135156331472361
episode: 4690   score: 11.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1091334   average_q: 1.1106576840301137   average loss: 0.005166915657029456
episode: 4691   score: 13.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1091966   average_q: 1.1118676851538918   average loss: 0.005217957991739746
episode: 4692   score: 13.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1092545   average_q: 1.0850482715586511   average loss: 0.0049035872959263226
episode: 4693   score: 9.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1093024   average_q: 1.1511619

episode: 4735   score: 10.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1115173   average_q: 1.1424871166141666   average loss: 0.0048845869131452805
episode: 4736   score: 12.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1115688   average_q: 0.9719851016998291   average loss: 0.004874528326134839
episode: 4737   score: 14.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1116236   average_q: 0.9964029469466122   average loss: 0.00497203370718537
episode: 4738   score: 8.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1116615   average_q: 1.0597063130785105   average loss: 0.004890001046294371
episode: 4739   score: 12.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1117169   average_q: 1.0490239045249856   average loss: 0.005341415256320664
episode: 4740   score: 10.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1117591   average_q: 1.10535

episode: 4782   score: 9.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1140063   average_q: 1.085103598116013   average loss: 0.005357475032696624
episode: 4783   score: 12.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1140560   average_q: 1.1469571641992995   average loss: 0.004957518445574295
episode: 4784   score: 14.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1141125   average_q: 0.9396805451770799   average loss: 0.0046618662316113115
episode: 4785   score: 11.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1141648   average_q: 1.019902744296632   average loss: 0.004684714556148144
episode: 4786   score: 12.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1142199   average_q: 1.052911755664379   average loss: 0.0050994909234081285
episode: 4787   score: 5.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1142489   average_q: 1.0118593

episode: 4829   score: 11.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1165100   average_q: 1.0779432906031114   average loss: 0.004925358796195625
episode: 4830   score: 10.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1165549   average_q: 1.1846150821593397   average loss: 0.004880730255495525
episode: 4831   score: 10.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1165982   average_q: 1.1592127429558003   average loss: 0.004549263758202004
episode: 4832   score: 6.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1166262   average_q: 1.0782019420393876   average loss: 0.004808195599928565
episode: 4833   score: 7.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1166643   average_q: 1.173248533814598   average loss: 0.0051732634620745895
episode: 4834   score: 12.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1167184   average_q: 1.034675

episode: 4876   score: 13.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1192842   average_q: 1.0521634374841293   average loss: 0.004213383100995895
episode: 4877   score: 9.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1193306   average_q: 1.096677534474895   average loss: 0.004305968199583605
episode: 4878   score: 10.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1193789   average_q: 1.116758660129879   average loss: 0.004176521915788174
episode: 4879   score: 15.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1194352   average_q: 1.0514855713816773   average loss: 0.004295759353386387
episode: 4880   score: 19.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1195089   average_q: 1.0366559408534333   average loss: 0.004352597779675869
episode: 4881   score: 22.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1195960   average_q: 0.9937729

episode: 4923   score: 20.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1220790   average_q: 0.8502597845311828   average loss: 0.004818397756911663
episode: 4924   score: 10.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1221312   average_q: 0.9881764643779203   average loss: 0.004745160235191064
episode: 4925   score: 18.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1222032   average_q: 1.015018800439106   average loss: 0.0046193523128749804
episode: 4926   score: 15.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1222672   average_q: 0.9788485667668283   average loss: 0.004974453148952307
episode: 4927   score: 22.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1223502   average_q: 0.8927868010408907   average loss: 0.004792297637252788
episode: 4928   score: 11.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1223994   average_q: 1.1812

episode: 4970   score: 14.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1251290   average_q: 1.1340383432692078   average loss: 0.0044191124798397515
episode: 4971   score: 11.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1251776   average_q: 1.1536188104147773   average loss: 0.004509673003895868
episode: 4972   score: 16.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1252425   average_q: 0.9598599102904873   average loss: 0.004628475888550442
episode: 4973   score: 11.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1252947   average_q: 1.1708851861428484   average loss: 0.004471829439708749
episode: 4974   score: 13.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1253490   average_q: 0.9746706373509342   average loss: 0.004500860341697671
episode: 4975   score: 14.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1254111   average_q: 1.097

episode: 5017   score: 19.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1284238   average_q: 1.0593741180744811   average loss: 0.00507113177005909
episode: 5018   score: 15.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1284869   average_q: 1.0951312943797105   average loss: 0.0050668050057185365
episode: 5019   score: 19.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1285583   average_q: 1.1337566316252996   average loss: 0.005122586266112877
episode: 5020   score: 21.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1286405   average_q: 0.8929849100954051   average loss: 0.005035323691197474
episode: 5021   score: 14.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1286971   average_q: 1.1584526468397451   average loss: 0.0050354174684030546
episode: 5022   score: 21.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1287803   average_q: 1.035

episode: 5064   score: 19.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1319231   average_q: 1.039840371192318   average loss: 0.004612049565663281
episode: 5065   score: 22.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1320096   average_q: 1.0026833136819002   average loss: 0.004631587275979513
episode: 5066   score: 26.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1321107   average_q: 1.0665178744068957   average loss: 0.004784864309079426
episode: 5067   score: 13.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1321674   average_q: 1.1692541084224375   average loss: 0.004495685123793314
episode: 5068   score: 16.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1322331   average_q: 1.0741456317030675   average loss: 0.004882752791786342
episode: 5069   score: 17.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1323064   average_q: 1.10374

episode: 5111   score: 24.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1354470   average_q: 1.1101404369114205   average loss: 0.004153737903054101
episode: 5112   score: 16.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1355144   average_q: 1.1394717267518228   average loss: 0.004314693075709062
episode: 5113   score: 16.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1355752   average_q: 1.1406362446417149   average loss: 0.004443009989412722
episode: 5114   score: 21.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1356605   average_q: 1.0933342108553208   average loss: 0.0042132192251331455
episode: 5115   score: 12.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1357166   average_q: 1.1752525667880733   average loss: 0.004096068912094422
episode: 5116   score: 16.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1357849   average_q: 1.116

episode: 5158   score: 30.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1388680   average_q: 1.0472811028012075   average loss: 0.004025746445222467
episode: 5159   score: 7.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1388981   average_q: 1.213777364686478   average loss: 0.0039899254428915855
episode: 5160   score: 15.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1389603   average_q: 1.1653863897373438   average loss: 0.003964782182941022
episode: 5161   score: 18.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1390359   average_q: 1.1822432232675728   average loss: 0.004103009897301457
episode: 5162   score: 24.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1391262   average_q: 1.1107530758065374   average loss: 0.0037665061306121736
episode: 5163   score: 22.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1392087   average_q: 1.0453

episode: 5205   score: 18.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1424289   average_q: 1.0782079284792172   average loss: 0.004556993548706299
episode: 5206   score: 16.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1424936   average_q: 1.1975025901516216   average loss: 0.004343411922537602
episode: 5207   score: 24.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1425795   average_q: 1.0396200677811476   average loss: 0.004358322481186768
episode: 5208   score: 28.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1426791   average_q: 1.0991621561587814   average loss: 0.0042228365223948286
episode: 5209   score: 22.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1427632   average_q: 1.1148373073190627   average loss: 0.004429496698428663
episode: 5210   score: 15.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1428266   average_q: 1.167

episode: 5252   score: 22.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1461892   average_q: 1.0947438527795452   average loss: 0.0039424172178701995
episode: 5253   score: 19.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1462704   average_q: 1.0938344157755082   average loss: 0.004107477149069928
episode: 5254   score: 24.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1463658   average_q: 1.1630152782554146   average loss: 0.004027399449610196
episode: 5255   score: 22.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1464501   average_q: 1.138362000067746   average loss: 0.004219992161968177
episode: 5256   score: 24.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1465398   average_q: 1.0874079460882422   average loss: 0.003974181630284242
episode: 5257   score: 21.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1466168   average_q: 1.0804

episode: 5299   score: 29.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1503795   average_q: 1.1842304976227447   average loss: 0.003946431234619105
episode: 5300   score: 29.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1504861   average_q: 1.156602218677246   average loss: 0.003942696945623828
episode: 5301   score: 17.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1505513   average_q: 1.3074445031080508   average loss: 0.0036633974986623663
episode: 5302   score: 21.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1506322   average_q: 1.2638262006165217   average loss: 0.003650269692715336
episode: 5303   score: 29.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1507518   average_q: 1.1847566189773904   average loss: 0.0038039836685841337
episode: 5304   score: 26.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1508499   average_q: 1.154

episode: 5346   score: 22.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1542198   average_q: 1.167226966957117   average loss: 0.004019278773436478
episode: 5347   score: 20.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1542910   average_q: 1.2451661973145236   average loss: 0.003911542023641313
episode: 5348   score: 15.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1543513   average_q: 1.1588671348284727   average loss: 0.004093476955429071
episode: 5349   score: 18.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1544200   average_q: 1.223057627721298   average loss: 0.0038242736626945143
episode: 5350   score: 29.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1545315   average_q: 1.0573009040590893   average loss: 0.003852784317644572
episode: 5351   score: 18.0   memory length: 400000   epsilon: 0.09999910000958534   global_step: 1545961   average_q: 1.30303