In [5]:
import os
import random
import numpy as np
import tensorflow as tf
from collections import deque

from nes_py.wrappers import JoypadSpace
import gym
import gym_tetris
from gym_tetris.actions import MOVEMENT

from skimage.color import rgb2gray
from skimage.transform import resize

from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Conv2D, Dense, Flatten

In [6]:
# 상태가 입력, 큐함수가 출력인 인공신경망 생성
class DQN(tf.keras.Model):
    def __init__(self, action_size, state_size):
        super(DQN, self).__init__()
        self.conv1 = Conv2D(32, (8, 8), strides=(4, 4), activation='relu',
                            input_shape=state_size)
        self.conv2 = Conv2D(64, (4, 4), strides=(2, 2), activation='relu')
        self.conv3 = Conv2D(64, (3, 3), strides=(1, 1), activation='relu')
        self.flatten = Flatten()
        self.fc = Dense(512, activation='relu')
        self.fc_out = Dense(action_size)

    def call(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.flatten(x)
        x = self.fc(x)
        q = self.fc_out(x)
        return q

In [7]:
class Agent:
    def __init__(self, action_size, state_size=(84, 84, 4)):
        self.render = False

        # 상태와 행동의 크기 정의
        self.state_size = state_size
        self.action_size = action_size

        # DQN 하이퍼파라미터
        self.discount_factor = 0.99
        self.learning_rate = 1e-4
        self.epsilon = 1.0
        self.epsilon_start = 1.0
        self.epsilon_end  = 0.1
        self.exploration_steps = 1000000
        self.epsilon_decay_step = self.epsilon_start - self.epsilon_end
        self.epsilon_decay_step /= self.exploration_steps
        self.batch_size = 32
        self.train_start = 50000
        self.update_target_rate = 10000

        # 리플레이 메모리, 최대 크기 100,000
        self.memory = deque(maxlen=100000)
        
#         # 게임 시작 후 랜덤하게 움직이지 않는 것에 대한 옵션
#         self.no_op_steps = 30

        # 모델과 타깃 모델 생성
        self.model = DQN(action_size, state_size)
        self.target_model = DQN(action_size, state_size)
        self.optimizer = Adam(self.learning_rate, clipnorm=10.)
        
        # 타깃 모델 초기화
        self.update_target_model()

        self.avg_q_max, self.avg_loss = 0, 0

        self.writer = tf.summary.create_file_writer('summary/Tetris_DQN')
        self.model_path = os.path.join(os.getcwd(), 'save_model', 'model')

    def update_target_model(self):
        self.target_model.set_weights(self.model.get_weights())

    def get_action(self, history):
        
        # action selection with e-greedy policy
        history = np.float32(history / 255.0)
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        else:
            q_value = self.model(history)
            return np.argmax(q_value[0])

    def append_sample(self, history, action, reward, next_history):
        # 샘플 <s, a, r, s'>을 메모리에 저장
        self.memory.append((history, action, reward, next_history, done))

    # 텐서보드에 학습 정보를 기록
    def draw_tensorboard(self, score, step, episode):
        with self.writer.as_default():
            tf.summary.scalar('Total Reward/Episode', score, step=episode)
            tf.summary.scalar('Average Max Q/Episode',
                              self.avg_q_max / float(step), step=episode)
            tf.summary.scalar('Duration/Episode', step, step=episode)
            tf.summary.scalar('Average Loss/Episode',
                              self.avg_loss / float(step), step=episode)

    # 리플레이 메모리에서 무작위로 추출한 배치로 모델 학습
    def train_model(self):
        if self.epsilon > self.epsilon_end:
            self.epsilon -= self.epsilon_decay_step

        # 메모리에서 배치 크기만큼 무작위로 샘플 추출
        batch = random.sample(self.memory, self.batch_size)
#         temp_batch = np.array(batch)
#         print(temp_batch.shape)
#         print("\n")

        history = np.array([sample[0][0] / 255. for sample in batch],
                           dtype=np.float32)
        actions = np.array([sample[1] for sample in batch])
        rewards = np.array([sample[2] for sample in batch])
        next_history = np.array([sample[3][0] / 255. for sample in batch],
                                dtype=np.float32)
#         dones = np.array([sample[4] for sample in batch])
#         print(dones)
#         print("==============================================================================")

        # 학습 파라메터
        model_params = self.model.trainable_variables
        with tf.GradientTape() as tape:
            # 현재 상태에 대한 모델의 큐함수
            predicts = self.model(history)
            one_hot_action = tf.one_hot(actions, self.action_size)
            predicts = tf.reduce_sum(one_hot_action * predicts, axis=1)

            # 다음 상태에 대한 타깃 모델의 큐함수
            target_predicts = self.target_model(next_history)

            # 벨만 최적 방정식을 구성하기 위한 타깃과 큐함수의 최대 값 계산
            max_q = np.amax(target_predicts, axis=1)
            targets = rewards + self.discount_factor * max_q

            # 후버로스 계산
            error = tf.abs(targets - predicts)
            quadratic_part = tf.clip_by_value(error, 0.0, 1.0)
            linear_part = error - quadratic_part
            loss = tf.reduce_mean(0.5 * tf.square(quadratic_part) + linear_part)

            self.avg_loss += loss.numpy()

        # 오류함수를 줄이는 방향으로 모델 업데이트
        grads = tape.gradient(loss, model_params)
        self.optimizer.apply_gradients(zip(grads, model_params))


def pre_processing(observe):
    # RGB to GRAY
    processed_observe = np.uint8(resize(rgb2gray(observe), (84, 84), mode='constant') * 255)
    
    return processed_observe

In [8]:
if __name__ == "__main__":
    # 환경과 DQN 에이전트 생성
    env = gym_tetris.make('TetrisA-v0')
    env = JoypadSpace(env, MOVEMENT)
    agent = Agent(action_size=12)

    global_step = 0
    score_avg = 0
    score_max = 0

    num_episode = 50000
    
    for e in range(num_episode):
        done = False

        step, score = 0, 0, 
        # env 초기화
        observe = env.reset()

#         # 랜덤으로 뽑힌 값 만큼의 프레임동안 움직이지 않음
#         for _ in range(random.randint(1, agent.no_op_steps)):
#             observe, _, _, _ = env.step(1)

        # 프레임을 전처리 한 후 4개의 상태를 쌓아서 입력값으로 사용.
        state = pre_processing(observe)
        history = np.stack((state, state, state, state), axis=2)
        history = np.reshape([history], (1, 84, 84, 4))

        while not done:
            if agent.render:
                env.render()
            global_step += 1
            step += 1

            # 바로 전 history를 입력으로 받아 행동을 선택
            action = agent.get_action(history)

            # 선택한 행동으로 환경에서 한 타임스텝 진행
            observe, reward, done, info = env.step(action)
            # 각 타임스텝마다 상태 전처리
            next_state = pre_processing(observe)
            next_state = np.reshape([next_state], (1, 84, 84, 1))
            next_history = np.append(next_state, history[:, :, :, :3], axis=3)

            agent.avg_q_max += np.amax(agent.model(np.float32(history / 255.0))[0])

            score += reward
            reward = np.clip(reward, -1., 1.)
            # 샘플 <s, a, r, s'>을 리플레이 메모리에 저장 후 학습
            agent.append_sample(history, action, reward, next_history)

            # 리플레이 메모리 크기가 정해놓은 수치에 도달한 시점부터 모델 학습 시작
            if len(agent.memory) >= agent.train_start:
                agent.train_model()
                # 일정 시간마다 타겟모델을 모델의 가중치로 업데이트
                if global_step % agent.update_target_rate == 0:
                    agent.update_target_model()

            if done:
                # 각 에피소드 당 학습 정보를 기록
                if global_step > agent.train_start:
                    agent.draw_tensorboard(int(score), step, e)

                score_avg = 0.9 * score_avg + 0.1 * score if score_avg != 0 else score
                score_max = score if score > score_max else score_max

                log = "episode: {:5d} | ".format(e)
                log += "score: {:4.1f} | ".format(score)
                log += "score max : {:4.1f} | ".format(score_max)
                log += "score avg: {:4.1f} | ".format(score_avg)
                log += "memory length: {:5d} | ".format(len(agent.memory))
                log += "epsilon: {:.3f} | ".format(agent.epsilon)
                log += "q avg : {:3.2f} | ".format(agent.avg_q_max / float(step))
                log += "avg loss : {:3.2f}".format(agent.avg_loss / float(step))
                print(log)

                agent.avg_q_max, agent.avg_loss = 0, 0

        # 1000 에피소드마다 모델 저장
        if e % 1000 == 0:
            agent.model.save_weights("./save_model/model", save_format="tf")

episode:     0 | score:  1.0 | score max :  1.0 | score avg:  1.0 | memory length:  7824 | epsilon: 1.000 | q avg : 0.07 | avg loss : 0.00
episode:     1 | score:  2.0 | score max :  2.0 | score avg:  1.1 | memory length: 16007 | epsilon: 1.000 | q avg : 0.06 | avg loss : 0.00
episode:     2 | score:  0.0 | score max :  2.0 | score avg:  1.0 | memory length: 26362 | epsilon: 1.000 | q avg : 0.06 | avg loss : 0.00
episode:     3 | score:  0.0 | score max :  2.0 | score avg:  0.9 | memory length: 34708 | epsilon: 1.000 | q avg : 0.06 | avg loss : 0.00
episode:     4 | score:  1.0 | score max :  2.0 | score avg:  0.9 | memory length: 40938 | epsilon: 1.000 | q avg : 0.05 | avg loss : 0.00
episode:     5 | score:  0.0 | score max :  2.0 | score avg:  0.8 | memory length: 52043 | epsilon: 0.998 | q avg : 0.06 | avg loss : 0.00
episode:     6 | score:  0.0 | score max :  2.0 | score avg:  0.7 | memory length: 59239 | epsilon: 0.992 | q avg : 0.06 | avg loss : 0.00
episode:     7 | score:  0.

episode:    59 | score:  4.0 | score max : 44.0 | score avg:  4.2 | memory length: 100000 | epsilon: 0.654 | q avg : 0.10 | avg loss : 0.00
episode:    60 | score:  3.0 | score max : 44.0 | score avg:  4.1 | memory length: 100000 | epsilon: 0.648 | q avg : 0.10 | avg loss : 0.00
episode:    61 | score:  4.0 | score max : 44.0 | score avg:  4.1 | memory length: 100000 | epsilon: 0.644 | q avg : 0.10 | avg loss : 0.00
episode:    62 | score:  4.0 | score max : 44.0 | score avg:  4.1 | memory length: 100000 | epsilon: 0.640 | q avg : 0.10 | avg loss : 0.00
episode:    63 | score:  3.0 | score max : 44.0 | score avg:  4.0 | memory length: 100000 | epsilon: 0.636 | q avg : 0.10 | avg loss : 0.00
episode:    64 | score:  4.0 | score max : 44.0 | score avg:  4.0 | memory length: 100000 | epsilon: 0.632 | q avg : 0.10 | avg loss : 0.00
episode:    65 | score:  3.0 | score max : 44.0 | score avg:  3.9 | memory length: 100000 | epsilon: 0.627 | q avg : 0.10 | avg loss : 0.00
episode:    66 | sco

episode:   118 | score: 10.0 | score max : 53.0 | score avg: 14.7 | memory length: 100000 | epsilon: 0.445 | q avg : 0.17 | avg loss : 0.00
episode:   119 | score: 18.0 | score max : 53.0 | score avg: 15.1 | memory length: 100000 | epsilon: 0.442 | q avg : 0.16 | avg loss : 0.00
episode:   120 | score:  6.0 | score max : 53.0 | score avg: 14.2 | memory length: 100000 | epsilon: 0.439 | q avg : 0.17 | avg loss : 0.00
episode:   121 | score: 11.0 | score max : 53.0 | score avg: 13.8 | memory length: 100000 | epsilon: 0.437 | q avg : 0.17 | avg loss : 0.00
episode:   122 | score:  9.0 | score max : 53.0 | score avg: 13.4 | memory length: 100000 | epsilon: 0.436 | q avg : 0.17 | avg loss : 0.00
episode:   123 | score: 16.0 | score max : 53.0 | score avg: 13.6 | memory length: 100000 | epsilon: 0.434 | q avg : 0.17 | avg loss : 0.00
episode:   124 | score: 18.0 | score max : 53.0 | score avg: 14.1 | memory length: 100000 | epsilon: 0.431 | q avg : 0.18 | avg loss : 0.00
episode:   125 | sco

episode:   177 | score: 42.0 | score max : 53.0 | score avg: 20.6 | memory length: 100000 | epsilon: 0.337 | q avg : 0.25 | avg loss : 0.00
episode:   178 | score: 31.0 | score max : 53.0 | score avg: 21.7 | memory length: 100000 | epsilon: 0.335 | q avg : 0.25 | avg loss : 0.00
episode:   179 | score: 24.0 | score max : 53.0 | score avg: 21.9 | memory length: 100000 | epsilon: 0.334 | q avg : 0.25 | avg loss : 0.00
episode:   180 | score: 15.0 | score max : 53.0 | score avg: 21.2 | memory length: 100000 | epsilon: 0.332 | q avg : 0.25 | avg loss : 0.00
episode:   181 | score: 28.0 | score max : 53.0 | score avg: 21.9 | memory length: 100000 | epsilon: 0.330 | q avg : 0.25 | avg loss : 0.00
episode:   182 | score: 24.0 | score max : 53.0 | score avg: 22.1 | memory length: 100000 | epsilon: 0.328 | q avg : 0.26 | avg loss : 0.00
episode:   183 | score: 25.0 | score max : 53.0 | score avg: 22.4 | memory length: 100000 | epsilon: 0.327 | q avg : 0.25 | avg loss : 0.00
episode:   184 | sco

episode:   236 | score: 38.0 | score max : 80.0 | score avg: 28.8 | memory length: 100000 | epsilon: 0.248 | q avg : 0.31 | avg loss : 0.00
episode:   237 | score: 36.0 | score max : 80.0 | score avg: 29.5 | memory length: 100000 | epsilon: 0.247 | q avg : 0.32 | avg loss : 0.00
episode:   238 | score: 14.0 | score max : 80.0 | score avg: 28.0 | memory length: 100000 | epsilon: 0.246 | q avg : 0.32 | avg loss : 0.00
episode:   239 | score: 30.0 | score max : 80.0 | score avg: 28.2 | memory length: 100000 | epsilon: 0.245 | q avg : 0.32 | avg loss : 0.00
episode:   240 | score: 28.0 | score max : 80.0 | score avg: 28.2 | memory length: 100000 | epsilon: 0.244 | q avg : 0.32 | avg loss : 0.00
episode:   241 | score: 14.0 | score max : 80.0 | score avg: 26.7 | memory length: 100000 | epsilon: 0.242 | q avg : 0.32 | avg loss : 0.00
episode:   242 | score: 28.0 | score max : 80.0 | score avg: 26.9 | memory length: 100000 | epsilon: 0.241 | q avg : 0.33 | avg loss : 0.00
episode:   243 | sco

episode:   295 | score: 32.0 | score max : 80.0 | score avg: 33.2 | memory length: 100000 | epsilon: 0.184 | q avg : 0.38 | avg loss : 0.00
episode:   296 | score: 21.0 | score max : 80.0 | score avg: 32.0 | memory length: 100000 | epsilon: 0.183 | q avg : 0.38 | avg loss : 0.00
episode:   297 | score: 38.0 | score max : 80.0 | score avg: 32.6 | memory length: 100000 | epsilon: 0.182 | q avg : 0.38 | avg loss : 0.00
episode:   298 | score: 29.0 | score max : 80.0 | score avg: 32.2 | memory length: 100000 | epsilon: 0.181 | q avg : 0.40 | avg loss : 0.00
episode:   299 | score: 16.0 | score max : 80.0 | score avg: 30.6 | memory length: 100000 | epsilon: 0.179 | q avg : 0.39 | avg loss : 0.00
episode:   300 | score: 38.0 | score max : 80.0 | score avg: 31.4 | memory length: 100000 | epsilon: 0.178 | q avg : 0.39 | avg loss : 0.00
episode:   301 | score: 37.0 | score max : 80.0 | score avg: 31.9 | memory length: 100000 | epsilon: 0.177 | q avg : 0.39 | avg loss : 0.00
episode:   302 | sco

episode:   354 | score: 23.0 | score max : 80.0 | score avg: 40.5 | memory length: 100000 | epsilon: 0.127 | q avg : 0.44 | avg loss : 0.00
episode:   355 | score: 42.0 | score max : 80.0 | score avg: 40.7 | memory length: 100000 | epsilon: 0.126 | q avg : 0.45 | avg loss : 0.00
episode:   356 | score: 35.0 | score max : 80.0 | score avg: 40.1 | memory length: 100000 | epsilon: 0.126 | q avg : 0.45 | avg loss : 0.00
episode:   357 | score: 49.0 | score max : 80.0 | score avg: 41.0 | memory length: 100000 | epsilon: 0.125 | q avg : 0.46 | avg loss : 0.01
episode:   358 | score: 47.0 | score max : 80.0 | score avg: 41.6 | memory length: 100000 | epsilon: 0.124 | q avg : 0.46 | avg loss : 0.01
episode:   359 | score: 50.0 | score max : 80.0 | score avg: 42.4 | memory length: 100000 | epsilon: 0.124 | q avg : 0.46 | avg loss : 0.01
episode:   360 | score: 59.0 | score max : 80.0 | score avg: 44.1 | memory length: 100000 | epsilon: 0.123 | q avg : 0.46 | avg loss : 0.01
episode:   361 | sco

episode:   413 | score: 46.0 | score max : 80.0 | score avg: 47.5 | memory length: 100000 | epsilon: 0.100 | q avg : 0.52 | avg loss : 0.01
episode:   414 | score: 58.0 | score max : 80.0 | score avg: 48.6 | memory length: 100000 | epsilon: 0.100 | q avg : 0.52 | avg loss : 0.01
episode:   415 | score: 42.0 | score max : 80.0 | score avg: 47.9 | memory length: 100000 | epsilon: 0.100 | q avg : 0.52 | avg loss : 0.01
episode:   416 | score: 52.0 | score max : 80.0 | score avg: 48.3 | memory length: 100000 | epsilon: 0.100 | q avg : 0.52 | avg loss : 0.01
episode:   417 | score: 24.0 | score max : 80.0 | score avg: 45.9 | memory length: 100000 | epsilon: 0.100 | q avg : 0.52 | avg loss : 0.01
episode:   418 | score: 58.0 | score max : 80.0 | score avg: 47.1 | memory length: 100000 | epsilon: 0.100 | q avg : 0.52 | avg loss : 0.01
episode:   419 | score: 44.0 | score max : 80.0 | score avg: 46.8 | memory length: 100000 | epsilon: 0.100 | q avg : 0.52 | avg loss : 0.01
episode:   420 | sco

episode:   472 | score: 62.0 | score max : 80.0 | score avg: 50.9 | memory length: 100000 | epsilon: 0.100 | q avg : 0.58 | avg loss : 0.01
episode:   473 | score: 39.0 | score max : 80.0 | score avg: 49.7 | memory length: 100000 | epsilon: 0.100 | q avg : 0.59 | avg loss : 0.01
episode:   474 | score: 46.0 | score max : 80.0 | score avg: 49.4 | memory length: 100000 | epsilon: 0.100 | q avg : 0.58 | avg loss : 0.01
episode:   475 | score: 40.0 | score max : 80.0 | score avg: 48.4 | memory length: 100000 | epsilon: 0.100 | q avg : 0.58 | avg loss : 0.01
episode:   476 | score: 74.0 | score max : 80.0 | score avg: 51.0 | memory length: 100000 | epsilon: 0.100 | q avg : 0.58 | avg loss : 0.01
episode:   477 | score: 87.0 | score max : 87.0 | score avg: 54.6 | memory length: 100000 | epsilon: 0.100 | q avg : 0.59 | avg loss : 0.01
episode:   478 | score: 71.0 | score max : 87.0 | score avg: 56.2 | memory length: 100000 | epsilon: 0.100 | q avg : 0.59 | avg loss : 0.01
episode:   479 | sco

episode:   531 | score: 36.0 | score max : 88.0 | score avg: 49.4 | memory length: 100000 | epsilon: 0.100 | q avg : 0.65 | avg loss : 0.01
episode:   532 | score: 47.0 | score max : 88.0 | score avg: 49.2 | memory length: 100000 | epsilon: 0.100 | q avg : 0.65 | avg loss : 0.01
episode:   533 | score: 42.0 | score max : 88.0 | score avg: 48.5 | memory length: 100000 | epsilon: 0.100 | q avg : 0.65 | avg loss : 0.01
episode:   534 | score: 44.0 | score max : 88.0 | score avg: 48.0 | memory length: 100000 | epsilon: 0.100 | q avg : 0.65 | avg loss : 0.01
episode:   535 | score: 43.0 | score max : 88.0 | score avg: 47.5 | memory length: 100000 | epsilon: 0.100 | q avg : 0.65 | avg loss : 0.01
episode:   536 | score: 45.0 | score max : 88.0 | score avg: 47.3 | memory length: 100000 | epsilon: 0.100 | q avg : 0.65 | avg loss : 0.01
episode:   537 | score: 70.0 | score max : 88.0 | score avg: 49.5 | memory length: 100000 | epsilon: 0.100 | q avg : 0.65 | avg loss : 0.01
episode:   538 | sco

episode:   590 | score: 36.0 | score max : 88.0 | score avg: 52.8 | memory length: 100000 | epsilon: 0.100 | q avg : 0.70 | avg loss : 0.01
episode:   591 | score: 38.0 | score max : 88.0 | score avg: 51.3 | memory length: 100000 | epsilon: 0.100 | q avg : 0.72 | avg loss : 0.01
episode:   592 | score: 25.0 | score max : 88.0 | score avg: 48.7 | memory length: 100000 | epsilon: 0.100 | q avg : 0.72 | avg loss : 0.01
episode:   593 | score: 42.0 | score max : 88.0 | score avg: 48.0 | memory length: 100000 | epsilon: 0.100 | q avg : 0.72 | avg loss : 0.01
episode:   594 | score: 65.0 | score max : 88.0 | score avg: 49.7 | memory length: 100000 | epsilon: 0.100 | q avg : 0.72 | avg loss : 0.01
episode:   595 | score: 90.0 | score max : 90.0 | score avg: 53.7 | memory length: 100000 | epsilon: 0.100 | q avg : 0.72 | avg loss : 0.01
episode:   596 | score: 65.0 | score max : 90.0 | score avg: 54.8 | memory length: 100000 | epsilon: 0.100 | q avg : 0.72 | avg loss : 0.01
episode:   597 | sco

episode:   649 | score: 39.0 | score max : 90.0 | score avg: 48.0 | memory length: 100000 | epsilon: 0.100 | q avg : 0.77 | avg loss : 0.01
episode:   650 | score: 56.0 | score max : 90.0 | score avg: 48.8 | memory length: 100000 | epsilon: 0.100 | q avg : 0.77 | avg loss : 0.01
episode:   651 | score: 43.0 | score max : 90.0 | score avg: 48.2 | memory length: 100000 | epsilon: 0.100 | q avg : 0.77 | avg loss : 0.01
episode:   652 | score: 41.0 | score max : 90.0 | score avg: 47.5 | memory length: 100000 | epsilon: 0.100 | q avg : 0.77 | avg loss : 0.01
episode:   653 | score: 60.0 | score max : 90.0 | score avg: 48.8 | memory length: 100000 | epsilon: 0.100 | q avg : 0.77 | avg loss : 0.01
episode:   654 | score: 61.0 | score max : 90.0 | score avg: 50.0 | memory length: 100000 | epsilon: 0.100 | q avg : 0.77 | avg loss : 0.01
episode:   655 | score: 45.0 | score max : 90.0 | score avg: 49.5 | memory length: 100000 | epsilon: 0.100 | q avg : 0.78 | avg loss : 0.01
episode:   656 | sco

episode:   708 | score: 34.0 | score max : 90.0 | score avg: 44.2 | memory length: 100000 | epsilon: 0.100 | q avg : 0.82 | avg loss : 0.01
episode:   709 | score: 51.0 | score max : 90.0 | score avg: 44.9 | memory length: 100000 | epsilon: 0.100 | q avg : 0.83 | avg loss : 0.01
episode:   710 | score: 57.0 | score max : 90.0 | score avg: 46.1 | memory length: 100000 | epsilon: 0.100 | q avg : 0.84 | avg loss : 0.01
episode:   711 | score: 45.0 | score max : 90.0 | score avg: 46.0 | memory length: 100000 | epsilon: 0.100 | q avg : 0.83 | avg loss : 0.01
episode:   712 | score: 35.0 | score max : 90.0 | score avg: 44.9 | memory length: 100000 | epsilon: 0.100 | q avg : 0.84 | avg loss : 0.01
episode:   713 | score: 41.0 | score max : 90.0 | score avg: 44.5 | memory length: 100000 | epsilon: 0.100 | q avg : 0.84 | avg loss : 0.01
episode:   714 | score: 65.0 | score max : 90.0 | score avg: 46.5 | memory length: 100000 | epsilon: 0.100 | q avg : 0.84 | avg loss : 0.01
episode:   715 | sco

episode:   767 | score: 48.0 | score max : 105.0 | score avg: 55.2 | memory length: 100000 | epsilon: 0.100 | q avg : 0.88 | avg loss : 0.01
episode:   768 | score: 47.0 | score max : 105.0 | score avg: 54.4 | memory length: 100000 | epsilon: 0.100 | q avg : 0.89 | avg loss : 0.01
episode:   769 | score: 44.0 | score max : 105.0 | score avg: 53.3 | memory length: 100000 | epsilon: 0.100 | q avg : 0.89 | avg loss : 0.01
episode:   770 | score: 41.0 | score max : 105.0 | score avg: 52.1 | memory length: 100000 | epsilon: 0.100 | q avg : 0.89 | avg loss : 0.01
episode:   771 | score: 45.0 | score max : 105.0 | score avg: 51.4 | memory length: 100000 | epsilon: 0.100 | q avg : 0.89 | avg loss : 0.01
episode:   772 | score: 40.0 | score max : 105.0 | score avg: 50.3 | memory length: 100000 | epsilon: 0.100 | q avg : 0.89 | avg loss : 0.01
episode:   773 | score: 74.0 | score max : 105.0 | score avg: 52.6 | memory length: 100000 | epsilon: 0.100 | q avg : 0.89 | avg loss : 0.01
episode:   77

episode:   826 | score: 75.0 | score max : 105.0 | score avg: 51.0 | memory length: 100000 | epsilon: 0.100 | q avg : 0.94 | avg loss : 0.01
episode:   827 | score: 44.0 | score max : 105.0 | score avg: 50.3 | memory length: 100000 | epsilon: 0.100 | q avg : 0.94 | avg loss : 0.01
episode:   828 | score: 70.0 | score max : 105.0 | score avg: 52.2 | memory length: 100000 | epsilon: 0.100 | q avg : 0.94 | avg loss : 0.01
episode:   829 | score: 90.0 | score max : 105.0 | score avg: 56.0 | memory length: 100000 | epsilon: 0.100 | q avg : 0.94 | avg loss : 0.01
episode:   830 | score: 60.0 | score max : 105.0 | score avg: 56.4 | memory length: 100000 | epsilon: 0.100 | q avg : 0.93 | avg loss : 0.01
episode:   831 | score: 27.0 | score max : 105.0 | score avg: 53.5 | memory length: 100000 | epsilon: 0.100 | q avg : 0.94 | avg loss : 0.01
episode:   832 | score: 68.0 | score max : 105.0 | score avg: 54.9 | memory length: 100000 | epsilon: 0.100 | q avg : 0.94 | avg loss : 0.01
episode:   83

episode:   885 | score: 46.0 | score max : 105.0 | score avg: 49.8 | memory length: 100000 | epsilon: 0.100 | q avg : 0.99 | avg loss : 0.01
episode:   886 | score: 42.0 | score max : 105.0 | score avg: 49.0 | memory length: 100000 | epsilon: 0.100 | q avg : 0.99 | avg loss : 0.01
episode:   887 | score: 44.0 | score max : 105.0 | score avg: 48.5 | memory length: 100000 | epsilon: 0.100 | q avg : 0.99 | avg loss : 0.01
episode:   888 | score: 41.0 | score max : 105.0 | score avg: 47.8 | memory length: 100000 | epsilon: 0.100 | q avg : 0.99 | avg loss : 0.01
episode:   889 | score: 48.0 | score max : 105.0 | score avg: 47.8 | memory length: 100000 | epsilon: 0.100 | q avg : 0.99 | avg loss : 0.01
episode:   890 | score: 43.0 | score max : 105.0 | score avg: 47.3 | memory length: 100000 | epsilon: 0.100 | q avg : 0.99 | avg loss : 0.01
episode:   891 | score: 41.0 | score max : 105.0 | score avg: 46.7 | memory length: 100000 | epsilon: 0.100 | q avg : 0.99 | avg loss : 0.01
episode:   89

episode:   944 | score: 90.0 | score max : 105.0 | score avg: 57.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.04 | avg loss : 0.01
episode:   945 | score: 72.0 | score max : 105.0 | score avg: 58.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.03 | avg loss : 0.01
episode:   946 | score: 46.0 | score max : 105.0 | score avg: 57.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.03 | avg loss : 0.01
episode:   947 | score: 84.0 | score max : 105.0 | score avg: 60.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.04 | avg loss : 0.01
episode:   948 | score: 46.0 | score max : 105.0 | score avg: 58.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.04 | avg loss : 0.01
episode:   949 | score: 99.0 | score max : 105.0 | score avg: 62.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.05 | avg loss : 0.01
episode:   950 | score: 34.0 | score max : 105.0 | score avg: 59.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.05 | avg loss : 0.01
episode:   95

episode:  1003 | score: 81.0 | score max : 105.0 | score avg: 50.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.11 | avg loss : 0.01
episode:  1004 | score: 53.0 | score max : 105.0 | score avg: 50.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.11 | avg loss : 0.01
episode:  1005 | score: 37.0 | score max : 105.0 | score avg: 49.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.11 | avg loss : 0.01
episode:  1006 | score: 52.0 | score max : 105.0 | score avg: 49.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.11 | avg loss : 0.01
episode:  1007 | score: 89.0 | score max : 105.0 | score avg: 53.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.11 | avg loss : 0.01
episode:  1008 | score: 73.0 | score max : 105.0 | score avg: 55.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.11 | avg loss : 0.01
episode:  1009 | score: 26.0 | score max : 105.0 | score avg: 52.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.12 | avg loss : 0.01
episode:  101

episode:  1062 | score: 45.0 | score max : 105.0 | score avg: 45.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.17 | avg loss : 0.01
episode:  1063 | score: 66.0 | score max : 105.0 | score avg: 47.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.16 | avg loss : 0.01
episode:  1064 | score: 51.0 | score max : 105.0 | score avg: 48.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.17 | avg loss : 0.01
episode:  1065 | score: 46.0 | score max : 105.0 | score avg: 47.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.16 | avg loss : 0.01
episode:  1066 | score: 35.0 | score max : 105.0 | score avg: 46.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.17 | avg loss : 0.01
episode:  1067 | score: 34.0 | score max : 105.0 | score avg: 45.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.16 | avg loss : 0.01
episode:  1068 | score: 53.0 | score max : 105.0 | score avg: 46.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.16 | avg loss : 0.01
episode:  106

episode:  1121 | score: 39.0 | score max : 105.0 | score avg: 49.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.21 | avg loss : 0.01
episode:  1122 | score: 49.0 | score max : 105.0 | score avg: 49.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.21 | avg loss : 0.01
episode:  1123 | score: 55.0 | score max : 105.0 | score avg: 49.9 | memory length: 100000 | epsilon: 0.100 | q avg : 1.21 | avg loss : 0.01
episode:  1124 | score: 36.0 | score max : 105.0 | score avg: 48.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.21 | avg loss : 0.01
episode:  1125 | score: 63.0 | score max : 105.0 | score avg: 50.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.21 | avg loss : 0.01
episode:  1126 | score: 76.0 | score max : 105.0 | score avg: 52.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.21 | avg loss : 0.01
episode:  1127 | score: 21.0 | score max : 105.0 | score avg: 49.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.22 | avg loss : 0.01
episode:  112

episode:  1180 | score: 42.0 | score max : 105.0 | score avg: 49.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.24 | avg loss : 0.01
episode:  1181 | score: 33.0 | score max : 105.0 | score avg: 48.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.24 | avg loss : 0.01
episode:  1182 | score: 48.0 | score max : 105.0 | score avg: 48.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.24 | avg loss : 0.01
episode:  1183 | score: 36.0 | score max : 105.0 | score avg: 46.9 | memory length: 100000 | epsilon: 0.100 | q avg : 1.24 | avg loss : 0.01
episode:  1184 | score: 44.0 | score max : 105.0 | score avg: 46.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.24 | avg loss : 0.01
episode:  1185 | score: 78.0 | score max : 105.0 | score avg: 49.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.24 | avg loss : 0.01
episode:  1186 | score: 56.0 | score max : 105.0 | score avg: 50.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.24 | avg loss : 0.01
episode:  118

episode:  1239 | score: 45.0 | score max : 105.0 | score avg: 57.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.26 | avg loss : 0.01
episode:  1240 | score: 27.0 | score max : 105.0 | score avg: 54.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.26 | avg loss : 0.01
episode:  1241 | score: 46.0 | score max : 105.0 | score avg: 53.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.26 | avg loss : 0.01
episode:  1242 | score: 69.0 | score max : 105.0 | score avg: 55.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.26 | avg loss : 0.01
episode:  1243 | score: 44.0 | score max : 105.0 | score avg: 54.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.26 | avg loss : 0.01
episode:  1244 | score: 69.0 | score max : 105.0 | score avg: 55.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.26 | avg loss : 0.01
episode:  1245 | score: 72.0 | score max : 105.0 | score avg: 57.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.26 | avg loss : 0.01
episode:  124

episode:  1298 | score: 42.0 | score max : 105.0 | score avg: 57.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.28 | avg loss : 0.01
episode:  1299 | score: 52.0 | score max : 105.0 | score avg: 56.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.29 | avg loss : 0.01
episode:  1300 | score: 46.0 | score max : 105.0 | score avg: 55.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.29 | avg loss : 0.01
episode:  1301 | score: 43.0 | score max : 105.0 | score avg: 54.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.28 | avg loss : 0.01
episode:  1302 | score: 71.0 | score max : 105.0 | score avg: 56.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.28 | avg loss : 0.01
episode:  1303 | score: 65.0 | score max : 105.0 | score avg: 57.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.29 | avg loss : 0.01
episode:  1304 | score: 78.0 | score max : 105.0 | score avg: 59.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.29 | avg loss : 0.01
episode:  130

episode:  1357 | score: 50.0 | score max : 105.0 | score avg: 52.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.34 | avg loss : 0.01
episode:  1358 | score: 57.0 | score max : 105.0 | score avg: 52.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.34 | avg loss : 0.01
episode:  1359 | score: 76.0 | score max : 105.0 | score avg: 55.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.34 | avg loss : 0.01
episode:  1360 | score: 44.0 | score max : 105.0 | score avg: 54.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.33 | avg loss : 0.01
episode:  1361 | score: 60.0 | score max : 105.0 | score avg: 54.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.33 | avg loss : 0.01
episode:  1362 | score: 81.0 | score max : 105.0 | score avg: 57.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.34 | avg loss : 0.01
episode:  1363 | score: 61.0 | score max : 105.0 | score avg: 57.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.34 | avg loss : 0.01
episode:  136

episode:  1416 | score: 75.0 | score max : 105.0 | score avg: 50.9 | memory length: 100000 | epsilon: 0.100 | q avg : 1.38 | avg loss : 0.01
episode:  1417 | score: 44.0 | score max : 105.0 | score avg: 50.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.38 | avg loss : 0.00
episode:  1418 | score: 52.0 | score max : 105.0 | score avg: 50.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.38 | avg loss : 0.01
episode:  1419 | score: 69.0 | score max : 105.0 | score avg: 52.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.38 | avg loss : 0.01
episode:  1420 | score: 27.0 | score max : 105.0 | score avg: 49.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.38 | avg loss : 0.01
episode:  1421 | score: 39.0 | score max : 105.0 | score avg: 48.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.38 | avg loss : 0.01
episode:  1422 | score: 38.0 | score max : 105.0 | score avg: 47.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.38 | avg loss : 0.01
episode:  142

episode:  1475 | score: 54.0 | score max : 105.0 | score avg: 58.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.41 | avg loss : 0.01
episode:  1476 | score: 62.0 | score max : 105.0 | score avg: 58.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.41 | avg loss : 0.01
episode:  1477 | score: 84.0 | score max : 105.0 | score avg: 61.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.41 | avg loss : 0.00
episode:  1478 | score: 51.0 | score max : 105.0 | score avg: 60.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.41 | avg loss : 0.01
episode:  1479 | score: 48.0 | score max : 105.0 | score avg: 59.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.41 | avg loss : 0.00
episode:  1480 | score: 34.0 | score max : 105.0 | score avg: 56.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.41 | avg loss : 0.01
episode:  1481 | score: 60.0 | score max : 105.0 | score avg: 56.9 | memory length: 100000 | epsilon: 0.100 | q avg : 1.41 | avg loss : 0.01
episode:  148

episode:  1534 | score: 54.0 | score max : 105.0 | score avg: 57.9 | memory length: 100000 | epsilon: 0.100 | q avg : 1.43 | avg loss : 0.01
episode:  1535 | score: 51.0 | score max : 105.0 | score avg: 57.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.43 | avg loss : 0.01
episode:  1536 | score: 66.0 | score max : 105.0 | score avg: 58.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.43 | avg loss : 0.01
episode:  1537 | score: 49.0 | score max : 105.0 | score avg: 57.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.43 | avg loss : 0.00
episode:  1538 | score: 58.0 | score max : 105.0 | score avg: 57.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.43 | avg loss : 0.01
episode:  1539 | score: 28.0 | score max : 105.0 | score avg: 54.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.43 | avg loss : 0.01
episode:  1540 | score: 62.0 | score max : 105.0 | score avg: 55.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.43 | avg loss : 0.01
episode:  154

episode:  1593 | score: 59.0 | score max : 105.0 | score avg: 51.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.45 | avg loss : 0.01
episode:  1594 | score: 24.0 | score max : 105.0 | score avg: 48.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.45 | avg loss : 0.01
episode:  1595 | score: 52.0 | score max : 105.0 | score avg: 48.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.45 | avg loss : 0.00
episode:  1596 | score: 49.0 | score max : 105.0 | score avg: 48.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.45 | avg loss : 0.01
episode:  1597 | score: 52.0 | score max : 105.0 | score avg: 49.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.45 | avg loss : 0.01
episode:  1598 | score: 40.0 | score max : 105.0 | score avg: 48.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.45 | avg loss : 0.01
episode:  1599 | score: 59.0 | score max : 105.0 | score avg: 49.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.45 | avg loss : 0.01
episode:  160

episode:  1652 | score: 46.0 | score max : 105.0 | score avg: 49.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.48 | avg loss : 0.01
episode:  1653 | score: 69.0 | score max : 105.0 | score avg: 51.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.48 | avg loss : 0.01
episode:  1654 | score: 41.0 | score max : 105.0 | score avg: 50.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.48 | avg loss : 0.01
episode:  1655 | score: 65.0 | score max : 105.0 | score avg: 51.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.48 | avg loss : 0.01
episode:  1656 | score: 41.0 | score max : 105.0 | score avg: 50.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.48 | avg loss : 0.01
episode:  1657 | score: 44.0 | score max : 105.0 | score avg: 50.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.48 | avg loss : 0.01
episode:  1658 | score: 58.0 | score max : 105.0 | score avg: 50.9 | memory length: 100000 | epsilon: 0.100 | q avg : 1.48 | avg loss : 0.01
episode:  165

episode:  1711 | score: 50.0 | score max : 105.0 | score avg: 48.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.51 | avg loss : 0.01
episode:  1712 | score: 32.0 | score max : 105.0 | score avg: 46.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.51 | avg loss : 0.01
episode:  1713 | score: 31.0 | score max : 105.0 | score avg: 44.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.52 | avg loss : 0.01
episode:  1714 | score: 45.0 | score max : 105.0 | score avg: 44.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.51 | avg loss : 0.01
episode:  1715 | score: 38.0 | score max : 105.0 | score avg: 44.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.52 | avg loss : 0.01
episode:  1716 | score: 50.0 | score max : 105.0 | score avg: 44.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.51 | avg loss : 0.01
episode:  1717 | score: 49.0 | score max : 105.0 | score avg: 45.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.52 | avg loss : 0.01
episode:  171

episode:  1770 | score: 52.0 | score max : 105.0 | score avg: 52.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.55 | avg loss : 0.01
episode:  1771 | score: 52.0 | score max : 105.0 | score avg: 52.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.55 | avg loss : 0.01
episode:  1772 | score: 66.0 | score max : 105.0 | score avg: 53.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.55 | avg loss : 0.01
episode:  1773 | score: 72.0 | score max : 105.0 | score avg: 55.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.55 | avg loss : 0.01
episode:  1774 | score: 52.0 | score max : 105.0 | score avg: 55.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.55 | avg loss : 0.01
episode:  1775 | score: 44.0 | score max : 105.0 | score avg: 54.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.55 | avg loss : 0.01
episode:  1776 | score: 60.0 | score max : 105.0 | score avg: 54.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.55 | avg loss : 0.01
episode:  177

episode:  1829 | score: 43.0 | score max : 105.0 | score avg: 50.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.57 | avg loss : 0.01
episode:  1830 | score: 51.0 | score max : 105.0 | score avg: 50.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.57 | avg loss : 0.01
episode:  1831 | score: 43.0 | score max : 105.0 | score avg: 49.9 | memory length: 100000 | epsilon: 0.100 | q avg : 1.57 | avg loss : 0.01
episode:  1832 | score: 51.0 | score max : 105.0 | score avg: 50.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.57 | avg loss : 0.01
episode:  1833 | score: 59.0 | score max : 105.0 | score avg: 50.9 | memory length: 100000 | epsilon: 0.100 | q avg : 1.57 | avg loss : 0.01
episode:  1834 | score: 21.0 | score max : 105.0 | score avg: 47.9 | memory length: 100000 | epsilon: 0.100 | q avg : 1.57 | avg loss : 0.01
episode:  1835 | score: 42.0 | score max : 105.0 | score avg: 47.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.57 | avg loss : 0.01
episode:  183

episode:  1888 | score: 25.0 | score max : 105.0 | score avg: 47.9 | memory length: 100000 | epsilon: 0.100 | q avg : 1.58 | avg loss : 0.01
episode:  1889 | score: 49.0 | score max : 105.0 | score avg: 48.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.58 | avg loss : 0.01
episode:  1890 | score: 80.0 | score max : 105.0 | score avg: 51.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.58 | avg loss : 0.01
episode:  1891 | score: 48.0 | score max : 105.0 | score avg: 50.9 | memory length: 100000 | epsilon: 0.100 | q avg : 1.58 | avg loss : 0.01
episode:  1892 | score: 64.0 | score max : 105.0 | score avg: 52.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.58 | avg loss : 0.01
episode:  1893 | score: 49.0 | score max : 105.0 | score avg: 51.9 | memory length: 100000 | epsilon: 0.100 | q avg : 1.58 | avg loss : 0.01
episode:  1894 | score: 53.0 | score max : 105.0 | score avg: 52.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.57 | avg loss : 0.00
episode:  189

episode:  1947 | score: 46.0 | score max : 105.0 | score avg: 44.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.61 | avg loss : 0.01
episode:  1948 | score: 58.0 | score max : 105.0 | score avg: 45.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.62 | avg loss : 0.01
episode:  1949 | score: 40.0 | score max : 105.0 | score avg: 45.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.62 | avg loss : 0.01
episode:  1950 | score: 71.0 | score max : 105.0 | score avg: 47.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.62 | avg loss : 0.01
episode:  1951 | score: 71.0 | score max : 105.0 | score avg: 50.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.62 | avg loss : 0.01
episode:  1952 | score: 73.0 | score max : 105.0 | score avg: 52.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.62 | avg loss : 0.01
episode:  1953 | score: 78.0 | score max : 105.0 | score avg: 54.9 | memory length: 100000 | epsilon: 0.100 | q avg : 1.62 | avg loss : 0.01
episode:  195

episode:  2006 | score: 60.0 | score max : 105.0 | score avg: 52.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.61 | avg loss : 0.01
episode:  2007 | score: 53.0 | score max : 105.0 | score avg: 52.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.61 | avg loss : 0.01
episode:  2008 | score: 70.0 | score max : 105.0 | score avg: 54.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.61 | avg loss : 0.01
episode:  2009 | score: 29.0 | score max : 105.0 | score avg: 51.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.61 | avg loss : 0.01
episode:  2010 | score: 45.0 | score max : 105.0 | score avg: 51.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.61 | avg loss : 0.00
episode:  2011 | score: 56.0 | score max : 105.0 | score avg: 51.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.61 | avg loss : 0.01
episode:  2012 | score: 49.0 | score max : 105.0 | score avg: 51.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.61 | avg loss : 0.01
episode:  201

episode:  2065 | score: 50.0 | score max : 105.0 | score avg: 50.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.63 | avg loss : 0.01
episode:  2066 | score: 56.0 | score max : 105.0 | score avg: 51.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.63 | avg loss : 0.01
episode:  2067 | score: 36.0 | score max : 105.0 | score avg: 49.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.63 | avg loss : 0.01
episode:  2068 | score: 57.0 | score max : 105.0 | score avg: 50.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.63 | avg loss : 0.01
episode:  2069 | score: 88.0 | score max : 105.0 | score avg: 54.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.63 | avg loss : 0.01
episode:  2070 | score: 51.0 | score max : 105.0 | score avg: 53.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.64 | avg loss : 0.01
episode:  2071 | score: 65.0 | score max : 105.0 | score avg: 54.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.64 | avg loss : 0.01
episode:  207

episode:  2124 | score: 79.0 | score max : 105.0 | score avg: 56.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.64 | avg loss : 0.01
episode:  2125 | score: 33.0 | score max : 105.0 | score avg: 54.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.64 | avg loss : 0.01
episode:  2126 | score: 49.0 | score max : 105.0 | score avg: 53.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.64 | avg loss : 0.01
episode:  2127 | score: 34.0 | score max : 105.0 | score avg: 51.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.64 | avg loss : 0.01
episode:  2128 | score: 31.0 | score max : 105.0 | score avg: 49.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.64 | avg loss : 0.01
episode:  2129 | score: 61.0 | score max : 105.0 | score avg: 50.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.64 | avg loss : 0.01
episode:  2130 | score: 54.0 | score max : 105.0 | score avg: 51.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.64 | avg loss : 0.01
episode:  213

episode:  2183 | score: 45.0 | score max : 105.0 | score avg: 51.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.64 | avg loss : 0.01
episode:  2184 | score: 68.0 | score max : 105.0 | score avg: 53.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.64 | avg loss : 0.01
episode:  2185 | score: 56.0 | score max : 105.0 | score avg: 53.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.64 | avg loss : 0.01
episode:  2186 | score: 49.0 | score max : 105.0 | score avg: 53.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.64 | avg loss : 0.01
episode:  2187 | score: 56.0 | score max : 105.0 | score avg: 53.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.64 | avg loss : 0.01
episode:  2188 | score: 63.0 | score max : 105.0 | score avg: 54.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.64 | avg loss : 0.01
episode:  2189 | score: 53.0 | score max : 105.0 | score avg: 54.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.65 | avg loss : 0.01
episode:  219

episode:  2242 | score: 64.0 | score max : 105.0 | score avg: 60.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.66 | avg loss : 0.01
episode:  2243 | score: 33.0 | score max : 105.0 | score avg: 57.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.66 | avg loss : 0.01
episode:  2244 | score: 49.0 | score max : 105.0 | score avg: 56.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.65 | avg loss : 0.01
episode:  2245 | score: 66.0 | score max : 105.0 | score avg: 57.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.65 | avg loss : 0.01
episode:  2246 | score: 35.0 | score max : 105.0 | score avg: 55.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.65 | avg loss : 0.01
episode:  2247 | score: 54.0 | score max : 105.0 | score avg: 55.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.65 | avg loss : 0.01
episode:  2248 | score: 42.0 | score max : 105.0 | score avg: 53.9 | memory length: 100000 | epsilon: 0.100 | q avg : 1.65 | avg loss : 0.01
episode:  224

episode:  2301 | score: 50.0 | score max : 105.0 | score avg: 49.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.66 | avg loss : 0.01
episode:  2302 | score: 31.0 | score max : 105.0 | score avg: 47.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.66 | avg loss : 0.01
episode:  2303 | score: 54.0 | score max : 105.0 | score avg: 48.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.67 | avg loss : 0.01
episode:  2304 | score: 44.0 | score max : 105.0 | score avg: 47.9 | memory length: 100000 | epsilon: 0.100 | q avg : 1.67 | avg loss : 0.01
episode:  2305 | score: 62.0 | score max : 105.0 | score avg: 49.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.66 | avg loss : 0.01
episode:  2306 | score: 30.0 | score max : 105.0 | score avg: 47.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.67 | avg loss : 0.01
episode:  2307 | score: 33.0 | score max : 105.0 | score avg: 46.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.66 | avg loss : 0.00
episode:  230

episode:  2360 | score: 48.0 | score max : 113.0 | score avg: 57.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.68 | avg loss : 0.01
episode:  2361 | score: 63.0 | score max : 113.0 | score avg: 58.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.68 | avg loss : 0.01
episode:  2362 | score: 50.0 | score max : 113.0 | score avg: 57.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.68 | avg loss : 0.01
episode:  2363 | score: 53.0 | score max : 113.0 | score avg: 56.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.68 | avg loss : 0.01
episode:  2364 | score: 74.0 | score max : 113.0 | score avg: 58.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.68 | avg loss : 0.01
episode:  2365 | score: 38.0 | score max : 113.0 | score avg: 56.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.68 | avg loss : 0.01
episode:  2366 | score: 59.0 | score max : 113.0 | score avg: 56.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.68 | avg loss : 0.01
episode:  236

episode:  2419 | score: 38.0 | score max : 113.0 | score avg: 53.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.72 | avg loss : 0.01
episode:  2420 | score: 63.0 | score max : 113.0 | score avg: 54.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.72 | avg loss : 0.01
episode:  2421 | score: 57.0 | score max : 113.0 | score avg: 54.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.72 | avg loss : 0.01
episode:  2422 | score: 53.0 | score max : 113.0 | score avg: 54.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.72 | avg loss : 0.01
episode:  2423 | score: 37.0 | score max : 113.0 | score avg: 52.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.72 | avg loss : 0.01
episode:  2424 | score: 51.0 | score max : 113.0 | score avg: 52.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.72 | avg loss : 0.01
episode:  2425 | score: 58.0 | score max : 113.0 | score avg: 53.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.72 | avg loss : 0.01
episode:  242

episode:  2478 | score: 66.0 | score max : 113.0 | score avg: 49.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.72 | avg loss : 0.01
episode:  2479 | score: 45.0 | score max : 113.0 | score avg: 49.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.72 | avg loss : 0.00
episode:  2480 | score: 52.0 | score max : 113.0 | score avg: 49.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.71 | avg loss : 0.00
episode:  2481 | score: 48.0 | score max : 113.0 | score avg: 49.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.72 | avg loss : 0.00
episode:  2482 | score: 60.0 | score max : 113.0 | score avg: 50.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.72 | avg loss : 0.00
episode:  2483 | score: 36.0 | score max : 113.0 | score avg: 49.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.72 | avg loss : 0.00
episode:  2484 | score: 59.0 | score max : 113.0 | score avg: 50.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.72 | avg loss : 0.00
episode:  248

episode:  2537 | score: 56.0 | score max : 113.0 | score avg: 55.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.71 | avg loss : 0.01
episode:  2538 | score: 62.0 | score max : 113.0 | score avg: 56.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.71 | avg loss : 0.00
episode:  2539 | score: 40.0 | score max : 113.0 | score avg: 54.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.71 | avg loss : 0.00
episode:  2540 | score: 43.0 | score max : 113.0 | score avg: 53.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.71 | avg loss : 0.00
episode:  2541 | score: 62.0 | score max : 113.0 | score avg: 54.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.71 | avg loss : 0.00
episode:  2542 | score: 80.0 | score max : 113.0 | score avg: 56.9 | memory length: 100000 | epsilon: 0.100 | q avg : 1.71 | avg loss : 0.01
episode:  2543 | score: 46.0 | score max : 113.0 | score avg: 55.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.71 | avg loss : 0.00
episode:  254

episode:  2596 | score: 71.0 | score max : 113.0 | score avg: 47.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.71 | avg loss : 0.01
episode:  2597 | score: 42.0 | score max : 113.0 | score avg: 46.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.71 | avg loss : 0.01
episode:  2598 | score: 59.0 | score max : 113.0 | score avg: 48.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.71 | avg loss : 0.01
episode:  2599 | score: 30.0 | score max : 113.0 | score avg: 46.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.71 | avg loss : 0.01
episode:  2600 | score: 45.0 | score max : 113.0 | score avg: 46.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.71 | avg loss : 0.01
episode:  2601 | score: 62.0 | score max : 113.0 | score avg: 47.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.71 | avg loss : 0.01
episode:  2602 | score: 71.0 | score max : 113.0 | score avg: 50.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.71 | avg loss : 0.01
episode:  260

episode:  2655 | score: 38.0 | score max : 113.0 | score avg: 49.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.71 | avg loss : 0.01
episode:  2656 | score: 34.0 | score max : 113.0 | score avg: 47.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.71 | avg loss : 0.01
episode:  2657 | score: 43.0 | score max : 113.0 | score avg: 47.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.72 | avg loss : 0.01
episode:  2658 | score: 55.0 | score max : 113.0 | score avg: 47.9 | memory length: 100000 | epsilon: 0.100 | q avg : 1.71 | avg loss : 0.00
episode:  2659 | score: 70.0 | score max : 113.0 | score avg: 50.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.72 | avg loss : 0.01
episode:  2660 | score: 57.0 | score max : 113.0 | score avg: 50.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.71 | avg loss : 0.00
episode:  2661 | score: 63.0 | score max : 113.0 | score avg: 52.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.71 | avg loss : 0.01
episode:  266

episode:  2714 | score: 28.0 | score max : 113.0 | score avg: 54.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.74 | avg loss : 0.01
episode:  2715 | score: 58.0 | score max : 113.0 | score avg: 54.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.74 | avg loss : 0.00
episode:  2716 | score: 51.0 | score max : 113.0 | score avg: 54.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.74 | avg loss : 0.01
episode:  2717 | score: 77.0 | score max : 113.0 | score avg: 56.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.74 | avg loss : 0.01
episode:  2718 | score: 61.0 | score max : 113.0 | score avg: 56.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.74 | avg loss : 0.01
episode:  2719 | score: 62.0 | score max : 113.0 | score avg: 57.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.74 | avg loss : 0.01
episode:  2720 | score: 32.0 | score max : 113.0 | score avg: 54.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.74 | avg loss : 0.01
episode:  272

episode:  2773 | score: 36.0 | score max : 113.0 | score avg: 54.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.75 | avg loss : 0.01
episode:  2774 | score: 73.0 | score max : 113.0 | score avg: 56.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.75 | avg loss : 0.01
episode:  2775 | score: 45.0 | score max : 113.0 | score avg: 55.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.75 | avg loss : 0.01
episode:  2776 | score: 51.0 | score max : 113.0 | score avg: 54.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.75 | avg loss : 0.01
episode:  2777 | score: 44.0 | score max : 113.0 | score avg: 53.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.75 | avg loss : 0.01
episode:  2778 | score: 48.0 | score max : 113.0 | score avg: 53.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.75 | avg loss : 0.01
episode:  2779 | score: 67.0 | score max : 113.0 | score avg: 54.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.75 | avg loss : 0.01
episode:  278

episode:  2832 | score: 52.0 | score max : 113.0 | score avg: 48.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.75 | avg loss : 0.01
episode:  2833 | score: 75.0 | score max : 113.0 | score avg: 51.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.75 | avg loss : 0.01
episode:  2834 | score: 51.0 | score max : 113.0 | score avg: 51.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.75 | avg loss : 0.01
episode:  2835 | score: 75.0 | score max : 113.0 | score avg: 53.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.75 | avg loss : 0.01
episode:  2836 | score: 43.0 | score max : 113.0 | score avg: 52.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.75 | avg loss : 0.01
episode:  2837 | score: 46.0 | score max : 113.0 | score avg: 51.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.75 | avg loss : 0.01
episode:  2838 | score: 39.0 | score max : 113.0 | score avg: 50.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.75 | avg loss : 0.01
episode:  283

episode:  2891 | score: 68.0 | score max : 113.0 | score avg: 52.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.75 | avg loss : 0.01
episode:  2892 | score: 41.0 | score max : 113.0 | score avg: 51.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.75 | avg loss : 0.01
episode:  2893 | score: 37.0 | score max : 113.0 | score avg: 50.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.75 | avg loss : 0.01
episode:  2894 | score: 55.0 | score max : 113.0 | score avg: 50.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.75 | avg loss : 0.01
episode:  2895 | score: 101.0 | score max : 113.0 | score avg: 55.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.75 | avg loss : 0.01
episode:  2896 | score: 54.0 | score max : 113.0 | score avg: 55.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.75 | avg loss : 0.01
episode:  2897 | score: 54.0 | score max : 113.0 | score avg: 55.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.75 | avg loss : 0.01
episode:  28

episode:  2950 | score: 69.0 | score max : 113.0 | score avg: 56.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.75 | avg loss : 0.01
episode:  2951 | score: 44.0 | score max : 113.0 | score avg: 55.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.75 | avg loss : 0.01
episode:  2952 | score: 46.0 | score max : 113.0 | score avg: 54.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.75 | avg loss : 0.01
episode:  2953 | score: 71.0 | score max : 113.0 | score avg: 55.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.75 | avg loss : 0.01
episode:  2954 | score: 34.0 | score max : 113.0 | score avg: 53.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.75 | avg loss : 0.01
episode:  2955 | score: 41.0 | score max : 113.0 | score avg: 52.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.75 | avg loss : 0.01
episode:  2956 | score: 95.0 | score max : 113.0 | score avg: 56.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.75 | avg loss : 0.01
episode:  295

episode:  3009 | score: 53.0 | score max : 113.0 | score avg: 48.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.76 | avg loss : 0.01
episode:  3010 | score: 28.0 | score max : 113.0 | score avg: 46.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.76 | avg loss : 0.01
episode:  3011 | score: 61.0 | score max : 113.0 | score avg: 48.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.76 | avg loss : 0.01
episode:  3012 | score: 40.0 | score max : 113.0 | score avg: 47.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.76 | avg loss : 0.01
episode:  3013 | score: 87.0 | score max : 113.0 | score avg: 51.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.76 | avg loss : 0.01
episode:  3014 | score: 40.0 | score max : 113.0 | score avg: 50.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.76 | avg loss : 0.00
episode:  3015 | score: 70.0 | score max : 113.0 | score avg: 52.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.76 | avg loss : 0.01
episode:  301

episode:  3068 | score: 55.0 | score max : 113.0 | score avg: 54.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.77 | avg loss : 0.01
episode:  3069 | score: 29.0 | score max : 113.0 | score avg: 51.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.77 | avg loss : 0.01
episode:  3070 | score: 73.0 | score max : 113.0 | score avg: 53.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.77 | avg loss : 0.01
episode:  3071 | score: 38.0 | score max : 113.0 | score avg: 52.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.76 | avg loss : 0.01
episode:  3072 | score: 57.0 | score max : 113.0 | score avg: 52.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.77 | avg loss : 0.01
episode:  3073 | score: 29.0 | score max : 113.0 | score avg: 50.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.76 | avg loss : 0.01
episode:  3074 | score: 41.0 | score max : 113.0 | score avg: 49.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.77 | avg loss : 0.01
episode:  307

episode:  3127 | score: 44.0 | score max : 113.0 | score avg: 49.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.78 | avg loss : 0.01
episode:  3128 | score: 46.0 | score max : 113.0 | score avg: 49.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.78 | avg loss : 0.01
episode:  3129 | score: 64.0 | score max : 113.0 | score avg: 50.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.78 | avg loss : 0.01
episode:  3130 | score: 71.0 | score max : 113.0 | score avg: 52.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.78 | avg loss : 0.01
episode:  3131 | score: 43.0 | score max : 113.0 | score avg: 51.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.78 | avg loss : 0.01
episode:  3132 | score: 56.0 | score max : 113.0 | score avg: 52.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.78 | avg loss : 0.01
episode:  3133 | score: 36.0 | score max : 113.0 | score avg: 50.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.78 | avg loss : 0.01
episode:  313

episode:  3186 | score: 37.0 | score max : 113.0 | score avg: 51.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.79 | avg loss : 0.00
episode:  3187 | score: 69.0 | score max : 113.0 | score avg: 53.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.78 | avg loss : 0.00
episode:  3188 | score: 47.0 | score max : 113.0 | score avg: 52.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.78 | avg loss : 0.01
episode:  3189 | score: 52.0 | score max : 113.0 | score avg: 52.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.79 | avg loss : 0.01
episode:  3190 | score: 58.0 | score max : 113.0 | score avg: 53.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.79 | avg loss : 0.01
episode:  3191 | score: 57.0 | score max : 113.0 | score avg: 53.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.79 | avg loss : 0.01
episode:  3192 | score: 45.0 | score max : 113.0 | score avg: 52.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.78 | avg loss : 0.01
episode:  319

episode:  3245 | score: 51.0 | score max : 113.0 | score avg: 50.9 | memory length: 100000 | epsilon: 0.100 | q avg : 1.78 | avg loss : 0.01
episode:  3246 | score: 45.0 | score max : 113.0 | score avg: 50.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.78 | avg loss : 0.01
episode:  3247 | score: 57.0 | score max : 113.0 | score avg: 51.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.78 | avg loss : 0.01
episode:  3248 | score: 61.0 | score max : 113.0 | score avg: 52.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.78 | avg loss : 0.01
episode:  3249 | score: 88.0 | score max : 113.0 | score avg: 55.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.78 | avg loss : 0.01
episode:  3250 | score: 55.0 | score max : 113.0 | score avg: 55.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.78 | avg loss : 0.01
episode:  3251 | score: 74.0 | score max : 113.0 | score avg: 57.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.78 | avg loss : 0.01
episode:  325

episode:  3304 | score: 63.0 | score max : 113.0 | score avg: 52.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.78 | avg loss : 0.01
episode:  3305 | score: 45.0 | score max : 113.0 | score avg: 51.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.78 | avg loss : 0.01
episode:  3306 | score: 70.0 | score max : 113.0 | score avg: 53.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.78 | avg loss : 0.01
episode:  3307 | score: 56.0 | score max : 113.0 | score avg: 53.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.78 | avg loss : 0.01
episode:  3308 | score: 44.0 | score max : 113.0 | score avg: 52.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.78 | avg loss : 0.00
episode:  3309 | score: 62.0 | score max : 113.0 | score avg: 53.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.78 | avg loss : 0.01
episode:  3310 | score: 58.0 | score max : 113.0 | score avg: 54.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.79 | avg loss : 0.01
episode:  331

episode:  3363 | score: 59.0 | score max : 113.0 | score avg: 50.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.79 | avg loss : 0.01
episode:  3364 | score: 86.0 | score max : 113.0 | score avg: 54.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.79 | avg loss : 0.01
episode:  3365 | score: 63.0 | score max : 113.0 | score avg: 55.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.79 | avg loss : 0.01
episode:  3366 | score: 80.0 | score max : 113.0 | score avg: 57.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.79 | avg loss : 0.01
episode:  3367 | score: 61.0 | score max : 113.0 | score avg: 57.9 | memory length: 100000 | epsilon: 0.100 | q avg : 1.79 | avg loss : 0.01
episode:  3368 | score: 33.0 | score max : 113.0 | score avg: 55.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.79 | avg loss : 0.01
episode:  3369 | score: 45.0 | score max : 113.0 | score avg: 54.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.79 | avg loss : 0.01
episode:  337

episode:  3422 | score: 50.0 | score max : 113.0 | score avg: 48.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.81 | avg loss : 0.01
episode:  3423 | score: 102.0 | score max : 113.0 | score avg: 53.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.81 | avg loss : 0.01
episode:  3424 | score: 50.0 | score max : 113.0 | score avg: 53.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.81 | avg loss : 0.01
episode:  3425 | score: 64.0 | score max : 113.0 | score avg: 54.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.81 | avg loss : 0.01
episode:  3426 | score: 60.0 | score max : 113.0 | score avg: 55.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.81 | avg loss : 0.00
episode:  3427 | score: 79.0 | score max : 113.0 | score avg: 57.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.80 | avg loss : 0.01
episode:  3428 | score: 47.0 | score max : 113.0 | score avg: 56.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.80 | avg loss : 0.01
episode:  34

episode:  3481 | score: 45.0 | score max : 113.0 | score avg: 46.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.82 | avg loss : 0.01
episode:  3482 | score: 72.0 | score max : 113.0 | score avg: 48.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.82 | avg loss : 0.01
episode:  3483 | score: 55.0 | score max : 113.0 | score avg: 49.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.81 | avg loss : 0.01
episode:  3484 | score: 76.0 | score max : 113.0 | score avg: 52.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.81 | avg loss : 0.01
episode:  3485 | score: 59.0 | score max : 113.0 | score avg: 52.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.81 | avg loss : 0.01
episode:  3486 | score: 52.0 | score max : 113.0 | score avg: 52.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.81 | avg loss : 0.01
episode:  3487 | score: 52.0 | score max : 113.0 | score avg: 52.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.81 | avg loss : 0.01
episode:  348

episode:  3540 | score: 65.0 | score max : 113.0 | score avg: 51.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.83 | avg loss : 0.01
episode:  3541 | score: 41.0 | score max : 113.0 | score avg: 50.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.83 | avg loss : 0.01
episode:  3542 | score: 37.0 | score max : 113.0 | score avg: 49.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.83 | avg loss : 0.01
episode:  3543 | score: 43.0 | score max : 113.0 | score avg: 48.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.83 | avg loss : 0.01
episode:  3544 | score: 49.0 | score max : 113.0 | score avg: 48.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.83 | avg loss : 0.01
episode:  3545 | score: 89.0 | score max : 113.0 | score avg: 52.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.83 | avg loss : 0.01
episode:  3546 | score: 49.0 | score max : 113.0 | score avg: 52.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.83 | avg loss : 0.01
episode:  354

episode:  3599 | score: 46.0 | score max : 113.0 | score avg: 50.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.83 | avg loss : 0.01
episode:  3600 | score: 68.0 | score max : 113.0 | score avg: 52.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.83 | avg loss : 0.00
episode:  3601 | score: 74.0 | score max : 113.0 | score avg: 54.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.83 | avg loss : 0.01
episode:  3602 | score: 53.0 | score max : 113.0 | score avg: 54.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.83 | avg loss : 0.00
episode:  3603 | score: 54.0 | score max : 113.0 | score avg: 54.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.83 | avg loss : 0.01
episode:  3604 | score: 38.0 | score max : 113.0 | score avg: 52.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.83 | avg loss : 0.01
episode:  3605 | score: 42.0 | score max : 113.0 | score avg: 51.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.83 | avg loss : 0.01
episode:  360

episode:  3658 | score: 30.0 | score max : 113.0 | score avg: 51.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.83 | avg loss : 0.01
episode:  3659 | score: 72.0 | score max : 113.0 | score avg: 53.9 | memory length: 100000 | epsilon: 0.100 | q avg : 1.83 | avg loss : 0.01
episode:  3660 | score: 44.0 | score max : 113.0 | score avg: 52.9 | memory length: 100000 | epsilon: 0.100 | q avg : 1.83 | avg loss : 0.01
episode:  3661 | score: 29.0 | score max : 113.0 | score avg: 50.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.82 | avg loss : 0.00
episode:  3662 | score: 27.0 | score max : 113.0 | score avg: 48.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.83 | avg loss : 0.01
episode:  3663 | score: 39.0 | score max : 113.0 | score avg: 47.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.82 | avg loss : 0.01
episode:  3664 | score: 76.0 | score max : 113.0 | score avg: 50.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.83 | avg loss : 0.01
episode:  366

episode:  3717 | score: 42.0 | score max : 113.0 | score avg: 47.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.85 | avg loss : 0.01
episode:  3718 | score: 29.0 | score max : 113.0 | score avg: 45.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.85 | avg loss : 0.01
episode:  3719 | score: 53.0 | score max : 113.0 | score avg: 46.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.85 | avg loss : 0.01
episode:  3720 | score: 52.0 | score max : 113.0 | score avg: 46.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.85 | avg loss : 0.01
episode:  3721 | score: 49.0 | score max : 113.0 | score avg: 46.9 | memory length: 100000 | epsilon: 0.100 | q avg : 1.84 | avg loss : 0.01
episode:  3722 | score: 53.0 | score max : 113.0 | score avg: 47.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.85 | avg loss : 0.01
episode:  3723 | score: 59.0 | score max : 113.0 | score avg: 48.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.85 | avg loss : 0.01
episode:  372

episode:  3776 | score: 42.0 | score max : 113.0 | score avg: 49.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.87 | avg loss : 0.01
episode:  3777 | score: 48.0 | score max : 113.0 | score avg: 49.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.87 | avg loss : 0.01
episode:  3778 | score: 44.0 | score max : 113.0 | score avg: 48.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.87 | avg loss : 0.01
episode:  3779 | score: 60.0 | score max : 113.0 | score avg: 49.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.87 | avg loss : 0.00
episode:  3780 | score: 44.0 | score max : 113.0 | score avg: 49.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.87 | avg loss : 0.01
episode:  3781 | score: 61.0 | score max : 113.0 | score avg: 50.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.87 | avg loss : 0.01
episode:  3782 | score: 71.0 | score max : 113.0 | score avg: 52.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.87 | avg loss : 0.00
episode:  378

episode:  3835 | score: 80.0 | score max : 113.0 | score avg: 52.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.88 | avg loss : 0.01
episode:  3836 | score: 64.0 | score max : 113.0 | score avg: 53.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.88 | avg loss : 0.01
episode:  3837 | score: 35.0 | score max : 113.0 | score avg: 51.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.88 | avg loss : 0.01
episode:  3838 | score: 52.0 | score max : 113.0 | score avg: 51.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.88 | avg loss : 0.01
episode:  3839 | score: 45.0 | score max : 113.0 | score avg: 51.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.88 | avg loss : 0.01
episode:  3840 | score: 34.0 | score max : 113.0 | score avg: 49.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.88 | avg loss : 0.01
episode:  3841 | score: 60.0 | score max : 113.0 | score avg: 50.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.88 | avg loss : 0.01
episode:  384

episode:  3894 | score: 64.0 | score max : 113.0 | score avg: 51.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.89 | avg loss : 0.01
episode:  3895 | score: 31.0 | score max : 113.0 | score avg: 49.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.89 | avg loss : 0.01
episode:  3896 | score: 48.0 | score max : 113.0 | score avg: 48.9 | memory length: 100000 | epsilon: 0.100 | q avg : 1.89 | avg loss : 0.01
episode:  3897 | score: 45.0 | score max : 113.0 | score avg: 48.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.89 | avg loss : 0.01
episode:  3898 | score: 62.0 | score max : 113.0 | score avg: 49.9 | memory length: 100000 | epsilon: 0.100 | q avg : 1.89 | avg loss : 0.00
episode:  3899 | score: 91.0 | score max : 113.0 | score avg: 54.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.89 | avg loss : 0.01
episode:  3900 | score: 38.0 | score max : 113.0 | score avg: 52.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.88 | avg loss : 0.01
episode:  390

episode:  3953 | score: 60.0 | score max : 113.0 | score avg: 59.9 | memory length: 100000 | epsilon: 0.100 | q avg : 1.87 | avg loss : 0.01
episode:  3954 | score: 57.0 | score max : 113.0 | score avg: 59.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.87 | avg loss : 0.01
episode:  3955 | score: 41.0 | score max : 113.0 | score avg: 57.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.87 | avg loss : 0.01
episode:  3956 | score: 58.0 | score max : 113.0 | score avg: 57.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.87 | avg loss : 0.01
episode:  3957 | score: 51.0 | score max : 113.0 | score avg: 57.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.87 | avg loss : 0.01
episode:  3958 | score: 65.0 | score max : 113.0 | score avg: 57.9 | memory length: 100000 | epsilon: 0.100 | q avg : 1.87 | avg loss : 0.01
episode:  3959 | score: 67.0 | score max : 113.0 | score avg: 58.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.87 | avg loss : 0.01
episode:  396

episode:  4012 | score: 43.0 | score max : 113.0 | score avg: 47.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.88 | avg loss : 0.01
episode:  4013 | score: 57.0 | score max : 113.0 | score avg: 48.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.88 | avg loss : 0.01
episode:  4014 | score: 46.0 | score max : 113.0 | score avg: 47.9 | memory length: 100000 | epsilon: 0.100 | q avg : 1.88 | avg loss : 0.01
episode:  4015 | score: 53.0 | score max : 113.0 | score avg: 48.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.88 | avg loss : 0.00
episode:  4016 | score: 46.0 | score max : 113.0 | score avg: 48.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.88 | avg loss : 0.01
episode:  4017 | score: 31.0 | score max : 113.0 | score avg: 46.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.87 | avg loss : 0.01
episode:  4018 | score: 46.0 | score max : 113.0 | score avg: 46.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.87 | avg loss : 0.01
episode:  401

episode:  4071 | score: 47.0 | score max : 113.0 | score avg: 52.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.88 | avg loss : 0.01
episode:  4072 | score: 46.0 | score max : 113.0 | score avg: 51.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.88 | avg loss : 0.01
episode:  4073 | score: 25.0 | score max : 113.0 | score avg: 48.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.89 | avg loss : 0.01
episode:  4074 | score: 42.0 | score max : 113.0 | score avg: 48.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.88 | avg loss : 0.01
episode:  4075 | score: 48.0 | score max : 113.0 | score avg: 48.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.88 | avg loss : 0.01
episode:  4076 | score: 80.0 | score max : 113.0 | score avg: 51.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.88 | avg loss : 0.01
episode:  4077 | score: 67.0 | score max : 113.0 | score avg: 52.9 | memory length: 100000 | epsilon: 0.100 | q avg : 1.89 | avg loss : 0.01
episode:  407

episode:  4130 | score: 65.0 | score max : 113.0 | score avg: 54.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.87 | avg loss : 0.01
episode:  4131 | score: 58.0 | score max : 113.0 | score avg: 54.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.87 | avg loss : 0.00
episode:  4132 | score: 36.0 | score max : 113.0 | score avg: 52.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.87 | avg loss : 0.01
episode:  4133 | score: 47.0 | score max : 113.0 | score avg: 52.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.87 | avg loss : 0.01
episode:  4134 | score: 45.0 | score max : 113.0 | score avg: 51.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.87 | avg loss : 0.01
episode:  4135 | score: 82.0 | score max : 113.0 | score avg: 54.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.88 | avg loss : 0.01
episode:  4136 | score: 41.0 | score max : 113.0 | score avg: 53.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.88 | avg loss : 0.01
episode:  413

episode:  4189 | score: 40.0 | score max : 113.0 | score avg: 50.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.87 | avg loss : 0.01
episode:  4190 | score: 50.0 | score max : 113.0 | score avg: 50.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.87 | avg loss : 0.01
episode:  4191 | score: 46.0 | score max : 113.0 | score avg: 49.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.87 | avg loss : 0.01
episode:  4192 | score: 37.0 | score max : 113.0 | score avg: 48.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.87 | avg loss : 0.01
episode:  4193 | score: 53.0 | score max : 113.0 | score avg: 48.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.87 | avg loss : 0.01
episode:  4194 | score: 42.0 | score max : 113.0 | score avg: 48.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.87 | avg loss : 0.01
episode:  4195 | score: 43.0 | score max : 113.0 | score avg: 47.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.87 | avg loss : 0.01
episode:  419

episode:  4248 | score: 39.0 | score max : 113.0 | score avg: 47.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.86 | avg loss : 0.00
episode:  4249 | score: 48.0 | score max : 113.0 | score avg: 47.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.86 | avg loss : 0.00
episode:  4250 | score: 53.0 | score max : 113.0 | score avg: 48.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.87 | avg loss : 0.01
episode:  4251 | score: 47.0 | score max : 113.0 | score avg: 48.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.87 | avg loss : 0.01
episode:  4252 | score: 59.0 | score max : 113.0 | score avg: 49.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.87 | avg loss : 0.01
episode:  4253 | score: 49.0 | score max : 113.0 | score avg: 49.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.86 | avg loss : 0.01
episode:  4254 | score: 64.0 | score max : 113.0 | score avg: 50.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.86 | avg loss : 0.01
episode:  425

episode:  4307 | score: 78.0 | score max : 113.0 | score avg: 50.9 | memory length: 100000 | epsilon: 0.100 | q avg : 1.87 | avg loss : 0.01
episode:  4308 | score: 57.0 | score max : 113.0 | score avg: 51.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.86 | avg loss : 0.01
episode:  4309 | score: 74.0 | score max : 113.0 | score avg: 53.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.87 | avg loss : 0.01
episode:  4310 | score: 45.0 | score max : 113.0 | score avg: 52.9 | memory length: 100000 | epsilon: 0.100 | q avg : 1.86 | avg loss : 0.01
episode:  4311 | score: 80.0 | score max : 113.0 | score avg: 55.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.87 | avg loss : 0.01
episode:  4312 | score: 57.0 | score max : 113.0 | score avg: 55.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.88 | avg loss : 0.01
episode:  4313 | score: 51.0 | score max : 113.0 | score avg: 55.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.88 | avg loss : 0.01
episode:  431

episode:  4366 | score: 25.0 | score max : 113.0 | score avg: 54.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.89 | avg loss : 0.01
episode:  4367 | score: 56.0 | score max : 113.0 | score avg: 54.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.90 | avg loss : 0.01
episode:  4368 | score: 73.0 | score max : 113.0 | score avg: 56.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.89 | avg loss : 0.01
episode:  4369 | score: 24.0 | score max : 113.0 | score avg: 53.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.90 | avg loss : 0.01
episode:  4370 | score: 58.0 | score max : 113.0 | score avg: 53.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.89 | avg loss : 0.01
episode:  4371 | score: 43.0 | score max : 113.0 | score avg: 52.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.89 | avg loss : 0.01
episode:  4372 | score: 58.0 | score max : 113.0 | score avg: 53.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.90 | avg loss : 0.01
episode:  437

episode:  4425 | score: 79.0 | score max : 113.0 | score avg: 49.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.92 | avg loss : 0.00
episode:  4426 | score: 36.0 | score max : 113.0 | score avg: 48.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.92 | avg loss : 0.01
episode:  4427 | score: 40.0 | score max : 113.0 | score avg: 47.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.92 | avg loss : 0.00
episode:  4428 | score: 14.0 | score max : 113.0 | score avg: 44.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.92 | avg loss : 0.01
episode:  4429 | score: 52.0 | score max : 113.0 | score avg: 44.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.92 | avg loss : 0.01
episode:  4430 | score: 53.0 | score max : 113.0 | score avg: 45.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.92 | avg loss : 0.01
episode:  4431 | score: 31.0 | score max : 113.0 | score avg: 44.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.92 | avg loss : 0.01
episode:  443

episode:  4484 | score: 59.0 | score max : 113.0 | score avg: 52.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.92 | avg loss : 0.01
episode:  4485 | score: 34.0 | score max : 113.0 | score avg: 50.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.92 | avg loss : 0.01
episode:  4486 | score: 53.0 | score max : 113.0 | score avg: 51.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.92 | avg loss : 0.00
episode:  4487 | score: 43.0 | score max : 113.0 | score avg: 50.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.92 | avg loss : 0.01
episode:  4488 | score: 53.0 | score max : 113.0 | score avg: 50.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.92 | avg loss : 0.01
episode:  4489 | score: 62.0 | score max : 113.0 | score avg: 51.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.92 | avg loss : 0.01
episode:  4490 | score: 42.0 | score max : 113.0 | score avg: 50.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.92 | avg loss : 0.01
episode:  449

episode:  4543 | score: 32.0 | score max : 113.0 | score avg: 51.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.92 | avg loss : 0.01
episode:  4544 | score: 70.0 | score max : 113.0 | score avg: 53.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.92 | avg loss : 0.01
episode:  4545 | score: 24.0 | score max : 113.0 | score avg: 50.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.92 | avg loss : 0.01
episode:  4546 | score: 57.0 | score max : 113.0 | score avg: 50.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.92 | avg loss : 0.01
episode:  4547 | score: 51.0 | score max : 113.0 | score avg: 50.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.92 | avg loss : 0.01
episode:  4548 | score: 25.0 | score max : 113.0 | score avg: 48.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.93 | avg loss : 0.01
episode:  4549 | score: 50.0 | score max : 113.0 | score avg: 48.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.93 | avg loss : 0.01
episode:  455

episode:  4602 | score: 40.0 | score max : 113.0 | score avg: 54.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.92 | avg loss : 0.01
episode:  4603 | score: 55.0 | score max : 113.0 | score avg: 54.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.92 | avg loss : 0.01
episode:  4604 | score: 46.0 | score max : 113.0 | score avg: 53.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.91 | avg loss : 0.01
episode:  4605 | score: 76.0 | score max : 113.0 | score avg: 55.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.91 | avg loss : 0.01
episode:  4606 | score: 44.0 | score max : 113.0 | score avg: 54.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.91 | avg loss : 0.01
episode:  4607 | score: 42.0 | score max : 113.0 | score avg: 53.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.91 | avg loss : 0.01
episode:  4608 | score: 59.0 | score max : 113.0 | score avg: 53.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.91 | avg loss : 0.01
episode:  460

episode:  4661 | score: 59.0 | score max : 113.0 | score avg: 50.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.90 | avg loss : 0.01
episode:  4662 | score: 51.0 | score max : 113.0 | score avg: 50.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.90 | avg loss : 0.01
episode:  4663 | score: 54.0 | score max : 113.0 | score avg: 50.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.90 | avg loss : 0.01
episode:  4664 | score: 55.0 | score max : 113.0 | score avg: 51.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.90 | avg loss : 0.01
episode:  4665 | score: 39.0 | score max : 113.0 | score avg: 49.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.90 | avg loss : 0.01
episode:  4666 | score: 66.0 | score max : 113.0 | score avg: 51.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.90 | avg loss : 0.01
episode:  4667 | score: 61.0 | score max : 113.0 | score avg: 52.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.90 | avg loss : 0.01
episode:  466

episode:  4720 | score: 27.0 | score max : 113.0 | score avg: 51.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.89 | avg loss : 0.00
episode:  4721 | score: 55.0 | score max : 113.0 | score avg: 51.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.90 | avg loss : 0.01
episode:  4722 | score: 46.0 | score max : 113.0 | score avg: 51.2 | memory length: 100000 | epsilon: 0.100 | q avg : 1.90 | avg loss : 0.01
episode:  4723 | score: 29.0 | score max : 113.0 | score avg: 48.9 | memory length: 100000 | epsilon: 0.100 | q avg : 1.90 | avg loss : 0.01
episode:  4724 | score: 38.0 | score max : 113.0 | score avg: 47.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.90 | avg loss : 0.01
episode:  4725 | score: 77.0 | score max : 113.0 | score avg: 50.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.90 | avg loss : 0.01
episode:  4726 | score: 61.0 | score max : 113.0 | score avg: 51.8 | memory length: 100000 | epsilon: 0.100 | q avg : 1.90 | avg loss : 0.01
episode:  472

episode:  4779 | score: 53.0 | score max : 113.0 | score avg: 49.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.90 | avg loss : 0.01
episode:  4780 | score: 62.0 | score max : 113.0 | score avg: 50.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.90 | avg loss : 0.01
episode:  4781 | score: 60.0 | score max : 113.0 | score avg: 51.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.90 | avg loss : 0.01
episode:  4782 | score: 52.0 | score max : 113.0 | score avg: 51.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.90 | avg loss : 0.01
episode:  4783 | score: 69.0 | score max : 113.0 | score avg: 53.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.90 | avg loss : 0.01
episode:  4784 | score: 34.0 | score max : 113.0 | score avg: 51.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.90 | avg loss : 0.01
episode:  4785 | score: 46.0 | score max : 113.0 | score avg: 50.9 | memory length: 100000 | epsilon: 0.100 | q avg : 1.90 | avg loss : 0.01
episode:  478

episode:  4838 | score: 33.0 | score max : 113.0 | score avg: 57.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.89 | avg loss : 0.00
episode:  4839 | score: 74.0 | score max : 113.0 | score avg: 59.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.90 | avg loss : 0.01
episode:  4840 | score: 43.0 | score max : 113.0 | score avg: 57.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.90 | avg loss : 0.01
episode:  4841 | score: 48.0 | score max : 113.0 | score avg: 56.6 | memory length: 100000 | epsilon: 0.100 | q avg : 1.90 | avg loss : 0.01
episode:  4842 | score: 48.0 | score max : 113.0 | score avg: 55.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.89 | avg loss : 0.01
episode:  4843 | score: 96.0 | score max : 113.0 | score avg: 59.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.89 | avg loss : 0.00
episode:  4844 | score: 56.0 | score max : 113.0 | score avg: 59.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.90 | avg loss : 0.01
episode:  484

episode:  4897 | score: 58.0 | score max : 113.0 | score avg: 52.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.89 | avg loss : 0.01
episode:  4898 | score: 45.0 | score max : 113.0 | score avg: 51.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.89 | avg loss : 0.01
episode:  4899 | score: 63.0 | score max : 113.0 | score avg: 52.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.89 | avg loss : 0.00
episode:  4900 | score: 42.0 | score max : 113.0 | score avg: 51.4 | memory length: 100000 | epsilon: 0.100 | q avg : 1.88 | avg loss : 0.00
episode:  4901 | score: 40.0 | score max : 113.0 | score avg: 50.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.89 | avg loss : 0.01
episode:  4902 | score: 47.0 | score max : 113.0 | score avg: 50.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.89 | avg loss : 0.01
episode:  4903 | score: 43.0 | score max : 113.0 | score avg: 49.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.88 | avg loss : 0.00
episode:  490

episode:  4956 | score: 59.0 | score max : 113.0 | score avg: 51.3 | memory length: 100000 | epsilon: 0.100 | q avg : 1.90 | avg loss : 0.00
episode:  4957 | score: 29.0 | score max : 113.0 | score avg: 49.1 | memory length: 100000 | epsilon: 0.100 | q avg : 1.90 | avg loss : 0.00
episode:  4958 | score: 35.0 | score max : 113.0 | score avg: 47.7 | memory length: 100000 | epsilon: 0.100 | q avg : 1.90 | avg loss : 0.01
episode:  4959 | score: 76.0 | score max : 113.0 | score avg: 50.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.90 | avg loss : 0.01
episode:  4960 | score: 71.0 | score max : 113.0 | score avg: 52.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.90 | avg loss : 0.01
episode:  4961 | score: 72.0 | score max : 113.0 | score avg: 54.5 | memory length: 100000 | epsilon: 0.100 | q avg : 1.90 | avg loss : 0.01
episode:  4962 | score: 60.0 | score max : 113.0 | score avg: 55.0 | memory length: 100000 | epsilon: 0.100 | q avg : 1.90 | avg loss : 0.01
episode:  496

KeyboardInterrupt: 

In [None]:
# env = gym_tetris.make('TetrisA-v0')
# env = JoypadSpace(env, MOVEMENT)
# state, reward, done, info = env.step(env.action_space.sample())

In [None]:
### info