### Cab-Driver Agent

In [1]:
# Importing libraries
import numpy as np
import random
import math
from collections import deque
import collections
import pickle
import os
from itertools import permutations,product
# for building DQN model
from keras import layers
from keras import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam
import keras.engine.saving as save_model

# for plotting graphs
import matplotlib.pyplot as plt
import pylab

# Import the environment
from Env import CabDriver


Using TensorFlow backend.


#### Defining Time Matrix

In [2]:
# Loading the time matrix provided
Time_matrix = np.load("TM.npy")

#### Tracking the state-action pairs for checking convergence


In [3]:
#Defining a function to save the Q-dictionary as a pickle file
def save_pickle(obj, name ):
    with open(name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

In [4]:
#define epsilon_decay strategy
def epsilon_decay(total_steps, step):
    epsilon = abs(np.exp((-np.pi/total_steps)*step))
    return epsilon

### Agent Class

If you are using this framework, you need to fill the following to complete the following code block:
1. State and Action Size
2. Hyperparameters
3. Create a neural-network model in function 'build_model()'
4. Define epsilon-greedy strategy in function 'get_action()'
5. Complete the function 'append_sample()'. This function appends the recent experience tuple <state, action, reward, new-state> to the memory
6. Complete the 'train_model()' function with following logic:
   - If the memory size is greater than mini-batch size, you randomly sample experiences from memory as per the mini-batch size and do the following:
      - Initialise your input and output batch for training the model
      - Calculate the target Q value for each sample: reward + gamma*max(Q(s'a,))
      - Get Q(s', a) values from the last trained model
      - Update the input batch as your encoded state and output batch as your Q-values
      - Then fit your DQN model using the updated input and output batch.

In [None]:
#this cell contains the agent class

class DQNAgent:
    def __init__(self, state_size, action_size, action_map, discount_factor=0.95, learning_rate=0.01,
                       epsilon=0.99, epsilon_decay=0.99, epsilon_min=0.01):
        # Define size of state and action
        self.state_size = state_size
        self.action_size = action_size

        # Write here: Specify you hyper parameters for the DQN
        self.discount_factor = discount_factor
        self.learning_rate = learning_rate        
        self.epsilon_max = epsilon
        self.epsilon_decay = epsilon_decay
        self.epsilon_min = epsilon_min
        self.model_history = None
        self.action_map = action_map
        self.batch_size = 32
        #self.batch_size = 1
        # create replay memory using deque
        self.memory = deque(maxlen=4096)

        # create main model and target model
        self.model = self.build_model()
    

    # approximate Q function using Neural Network
    def build_model(self):
        model = Sequential()
        # Write your code here: Add layers to your neural nets       

        # hidden layers
        model.add(Dense(32, input_dim=self.state_size, activation='relu', kernel_initializer='he_uniform'))
        model.add(Dense(32, activation='relu', kernel_initializer='he_uniform'))
        #model.add(Dense(32, activation='relu', kernel_initializer='he_uniform'))

        # the output layer: output is of size num_actions
        model.add(Dense(self.action_size, activation='relu', kernel_initializer='he_uniform'))     
        model.compile(loss='mse',optimizer=Adam(lr=self.learning_rate))
        model.summary()
        return model


    def get_action(self, cstate, all_actions, pos_act_ind):
    # Write your code here:
    # get action from model using epsilon-greedy policy
    # Decay in ε after we generate each sample from the environment
        actions = all_actions[pos_act_ind]
        if np.random.rand() <= self.epsilon_max:
            # explore: choose a random action from all possible actions
            mode = 'Exploring'
            action = random.choice(actions)
        else:
            # choose the action with the highest q(s, a)
            # the first index corresponds to the batch size, so
            # reshape state to (1, state_size) so that the first index corresponds to the batch size
            mode = 'Exploiting'
            #cstate = cstate.reshape(1, self.state_size) 
            q_value = self.model.predict(x=cstate)
            Q_val_for_actions = q_value[0][pos_act_ind]
            max_index = np.argmax(Q_val_for_actions)
            print(q_value.shape, Q_val_for_actions, 'Index ', max_index, np.max(Q_val_for_actions))
            action = actions[max_index]
        print(mode, 'Available actions ', actions, ' Selected action ', action)    
        return action
        

    def append_sample(self, state, action, reward, next_state, done):
    # Write your code here:
    # save sample <s,a,r,s'> to the replay memory
        self.memory.append((state, action, reward, next_state, done))
    
    
    # pick samples randomly from replay memory (with batch_size) and train the network
    def train_model(self, states_to_be_tracked):
        if len(self.memory) > self.batch_size:
            # Sample batch from the memory
            mini_batch = random.sample(self.memory, self.batch_size)
            update_input = np.zeros((self.batch_size, self.state_size))
            update_output = np.zeros((self.batch_size, self.state_size))
            
            actions, rewards, done = [], [], []
            
            for i in range(self.batch_size):
                state, action, reward, next_state, done_boolean = mini_batch[i]
                # Write your code from here
                # 1. Identify the next action 
                update_input[i] = env.state_encod_arch1(state)
                actions.append(action)
                rewards.append(reward)
                update_output[i] = env.state_encod_arch1(next_state)
                done.append(done_boolean)
                
            # 2. Get the target for the Q-network
            
            target = self.model.predict(update_input)
            target_qval = self.model.predict(update_output)
            
            #3. Update our target rewards
            for i in range(self.batch_size):
                index = self.action_map[tuple(actions[i])]
                if done[i]:
                    target[i][index] = rewards[i]
                else: # non-terminal state
                    max_qvalue = np.max(target_qval[i])
                    target[i][index] = rewards[i] + self.discount_factor * max_qvalue
                    if states_to_be_tracked and (mini_batch[i][0], tuple(actions[i])) in states_to_be_tracked:
                        states_to_be_tracked[(mini_batch[i][0], tuple(actions[i]))] = target_qval
                        
                
        # 4. Fit your model and track the loss values
            return self.model.fit(update_input, target, batch_size=self.batch_size, epochs=1, verbose=0)
            
            
    def save(self, name):
        self.model.save(name)

In [None]:
# to store rewards in each episode
rewards_per_episode, episodes  = [], []

# make dir to store model weights
if not os.path.exists("saved_model_weights"):
    os.mkdir("saved_model_weights")

# n_episodes
n_episodes = 1000

### DQN block

In [None]:
 # Call all the initialised variables of the environment
env = CabDriver()
#Call the DQN agent
dqn = DQNAgent(env.state_size, env.action_size, env.action_map)
states_to_be_tracked_keys = product(env.state_space[:210], env.action_list[:20])
states_to_be_tracked = {k:[] for k in states_to_be_tracked_keys}

for episode in range(n_episodes):

    # Write code here
    # Call the environment
   
    _,_,curr_state = env.reset()
    state_size = env.state_size
    step = 0
    
   
    #action_size = len(actions)
    reward = 0
    curr_time = 0
    
    
    terminal_state = False
    print("Episode :", episode)
    
    while not terminal_state:
        # Write your code here
        # 1. Pick epsilon-greedy action from possible actions for the current state
        step +=1
        encoded_state = env.state_encod_arch1(curr_state)
        pos_act_ind, actions = env.requests(curr_state)
        action = dqn.get_action(encoded_state, env.action_space, pos_act_ind)
        
        # 2. Evaluate your reward and next state
        reward = reward + env.reward_func(curr_state, action, Time_matrix)
        next_state = env.next_state_func(curr_state,action,Time_matrix)
        
        pickup_loc = action[0]
        drop_loc = action[1]
        current_loc = curr_state[0]
        time = curr_state[1]
        day = curr_state[2]
        #calculate time increase only on different pickup and drop points
        if pickup_loc != drop_loc:
            curr_time = curr_time + Time_matrix[current_loc][pickup_loc][time][day]
            time = next_state[1]
            day = next_state[2]
            curr_time = curr_time + Time_matrix[pickup_loc][drop_loc][time][day]
        else:
            curr_time += 1.0
        
        # 3. Append the experience to the memory
        dqn.append_sample(curr_state, action, reward, next_state, terminal_state)
        curr_state = next_state
        
        # 4. Train the model by calling function agent.train_model
        history = dqn.train_model(states_to_be_tracked)
        # 5. Keep a track of rewards, Q-values, loss
        print("episode:", episode, "  score:", reward, "  memory length:",
                      len(dqn.memory), "  epsilon:", dqn.epsilon_max)
        
        if curr_time >= 24*30:
            terminal_state = True
    
    # store total reward obtained in this episode
    rewards_per_episode.append(reward)
    episodes.append(episode)
    #pylab.plot(episode, rewards_per_episode, 'b')
    #pylab.savefig("./cab_dqn.png")
        
    
    dqn.epsilon_max =  epsilon_decay(n_episodes, episode)
    #save model for every 50 episodes
    if episode % 50 == 0:
            dqn.model.save_weights("./saved_model_weights/driver_dqn.h5")
            save_pickle(save_model.pickle_model(dqn.model), f"saved_pickle_files/driver_dqn_models_{episode}")
            
        
        

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 32)                26912     
_________________________________________________________________
dense_2 (Dense)              (None, 32)                1056      
_________________________________________________________________
dense_3 (Dense)              (None, 21)                693       
Total params: 28,661
Trainable params: 28,661
Non-trainable params: 0
_________________________________________________________________
Episode : 0
Exploring Available actions  [[3 2]
 [1 0]
 [1 2]
 [1 3]
 [0 1]
 [1 4]
 [0 0]]  Selected action  [1 2]
episode: 0   score: -31.0   memory length: 1   epsilon: 0.99
Exploring Available actions  [[1 0]
 [0 0]]  Selected action  [0 0]
episode: 0   score: -36.0   memory length: 2   epsilon: 0.99
Exploring Available actions  [[0 4]
 [1 0]
 [0 0]]  Selected action  [0 0]
episode: 0   score: -4

episode: 0   score: -99.0   memory length: 62   epsilon: 0.99
Exploring Available actions  [[0 3]
 [3 4]
 [3 0]
 [0 2]
 [4 1]
 [0 1]
 [2 0]
 [3 1]
 [0 4]
 [3 2]
 [1 3]
 [0 0]]  Selected action  [3 4]
episode: 0   score: -80.0   memory length: 63   epsilon: 0.99
Exploring Available actions  [[4 1]
 [2 4]
 [0 4]
 [0 0]]  Selected action  [0 0]
episode: 0   score: -85.0   memory length: 64   epsilon: 0.99
Exploring Available actions  [[2 4]
 [3 4]
 [1 0]
 [4 1]
 [3 1]
 [3 0]
 [2 3]
 [1 2]
 [0 0]]  Selected action  [3 4]
episode: 0   score: -97.0   memory length: 65   epsilon: 0.99
Exploring Available actions  [[0 2]
 [1 3]
 [4 1]
 [1 0]
 [3 2]
 [2 1]
 [4 3]
 [1 4]
 [3 4]
 [4 0]
 [0 0]]  Selected action  [0 0]
episode: 0   score: -102.0   memory length: 66   epsilon: 0.99
Exploring Available actions  [[1 2]
 [2 0]
 [3 4]
 [3 1]
 [3 2]
 [0 4]
 [2 4]
 [0 0]]  Selected action  [0 4]
episode: 0   score: -102.0   memory length: 67   epsilon: 0.99
Exploring Available actions  [[3 2]
 [4 1]
 [0 4

episode: 0   score: -150.0   memory length: 121   epsilon: 0.99
Exploring Available actions  [[3 4]
 [2 0]
 [3 1]
 [2 3]
 [3 2]
 [1 3]
 [2 4]
 [4 1]
 [0 4]
 [1 4]
 [1 0]
 [0 1]
 [1 2]
 [0 3]
 [4 2]
 [0 0]]  Selected action  [0 4]
episode: 0   score: -181.0   memory length: 122   epsilon: 0.99
Exploring Available actions  [[0 2]
 [4 0]
 [0 3]
 [3 2]
 [2 1]
 [4 1]
 [0 1]
 [2 0]
 [0 0]]  Selected action  [0 3]
episode: 0   score: -182.0   memory length: 123   epsilon: 0.99
Exploring Available actions  [[0 1]
 [2 3]
 [4 0]
 [3 0]
 [0 4]
 [4 3]
 [3 2]
 [1 0]
 [1 2]
 [0 0]]  Selected action  [0 4]
episode: 0   score: -171.0   memory length: 124   epsilon: 0.99
Exploring Available actions  [[0 2]
 [0 1]
 [1 4]
 [3 1]
 [0 0]]  Selected action  [1 4]
episode: 0   score: -171.0   memory length: 125   epsilon: 0.99
Exploring Available actions  [[2 4]
 [3 4]
 [4 1]
 [2 1]
 [4 3]
 [0 1]
 [1 4]
 [1 3]
 [1 0]
 [0 0]]  Selected action  [4 3]
episode: 0   score: -163.0   memory length: 126   epsilon: 0

episode: 1   score: 26.0   memory length: 185   epsilon: 1.0
Exploring Available actions  [[4 2]
 [1 4]
 [3 2]
 [4 3]
 [3 1]
 [1 0]
 [2 4]
 [0 3]
 [0 0]]  Selected action  [1 0]
episode: 1   score: 26.0   memory length: 186   epsilon: 1.0
Exploring Available actions  [[2 4]
 [0 0]]  Selected action  [2 4]
episode: 1   score: -9.0   memory length: 187   epsilon: 1.0
Exploring Available actions  [[3 4]
 [2 3]
 [4 0]
 [0 2]
 [1 2]
 [4 3]
 [3 0]
 [0 1]
 [0 0]]  Selected action  [0 2]
episode: 1   score: 7.0   memory length: 188   epsilon: 1.0
Exploring Available actions  [[4 1]
 [3 0]
 [0 4]
 [0 0]]  Selected action  [0 4]
episode: 1   score: -9.0   memory length: 189   epsilon: 1.0
Exploring Available actions  [[4 0]
 [1 0]
 [2 3]
 [4 3]
 [2 1]
 [2 4]
 [0 3]
 [0 0]]  Selected action  [4 0]
episode: 1   score: -5.0   memory length: 190   epsilon: 1.0
Exploring Available actions  [[0 1]
 [3 0]
 [0 2]
 [0 0]]  Selected action  [3 0]
episode: 1   score: 4.0   memory length: 191   epsilon: 1.0

episode: 1   score: 20.0   memory length: 239   epsilon: 1.0
Exploring Available actions  [[2 4]
 [1 3]
 [0 2]
 [2 0]
 [3 0]
 [0 3]
 [1 4]
 [4 1]
 [4 2]
 [1 0]
 [0 4]
 [0 0]]  Selected action  [2 0]
episode: 1   score: 19.0   memory length: 240   epsilon: 1.0
Exploring Available actions  [[3 1]
 [3 4]
 [0 0]]  Selected action  [3 1]
episode: 1   score: 12.0   memory length: 241   epsilon: 1.0
Exploring Available actions  [[3 2]
 [1 3]
 [0 3]
 [4 3]
 [4 2]
 [1 0]
 [4 0]
 [1 4]
 [2 0]
 [2 1]
 [3 0]
 [0 1]
 [0 2]
 [0 0]]  Selected action  [0 2]
episode: 1   score: -15.0   memory length: 242   epsilon: 1.0
Exploring Available actions  [[0 1]
 [0 0]]  Selected action  [0 0]
episode: 1   score: -20.0   memory length: 243   epsilon: 1.0
Exploring Available actions  [[3 1]
 [0 2]
 [2 0]
 [4 1]
 [3 2]
 [0 0]]  Selected action  [3 2]
episode: 1   score: -28.0   memory length: 244   epsilon: 1.0
Exploring Available actions  [[2 0]
 [3 2]
 [4 0]
 [0 0]]  Selected action  [4 0]
episode: 1   score: 

episode: 1   score: -55.0   memory length: 310   epsilon: 1.0
Exploring Available actions  [[0 3]
 [0 2]
 [3 0]
 [0 0]]  Selected action  [0 3]
episode: 1   score: -58.0   memory length: 311   epsilon: 1.0
Exploring Available actions  [[1 2]
 [0 2]
 [2 1]
 [4 0]
 [0 0]]  Selected action  [4 0]
episode: 1   score: -69.0   memory length: 312   epsilon: 1.0
Exploring Available actions  [[0 2]
 [0 0]]  Selected action  [0 2]
episode: 1   score: -61.0   memory length: 313   epsilon: 1.0
Exploring Available actions  [[0 4]
 [4 1]
 [0 0]]  Selected action  [0 0]
episode: 1   score: -66.0   memory length: 314   epsilon: 1.0
Exploring Available actions  [[0 1]
 [3 1]
 [0 0]]  Selected action  [0 1]
episode: 1   score: -68.0   memory length: 315   epsilon: 1.0
Exploring Available actions  [[2 1]
 [3 4]
 [3 0]
 [4 2]
 [0 4]
 [1 4]
 [4 0]
 [1 2]
 [4 1]
 [0 1]
 [3 2]
 [2 4]
 [2 0]
 [0 3]
 [3 1]
 [0 0]]  Selected action  [0 1]
episode: 1   score: -97.0   memory length: 316   epsilon: 1.0
Episode : 2

episode: 2   score: -160.0   memory length: 361   epsilon: 0.9968633369849541
Exploring Available actions  [[2 1]
 [1 4]
 [0 2]
 [0 0]]  Selected action  [0 2]
episode: 2   score: -148.0   memory length: 362   epsilon: 0.9968633369849541
Exploring Available actions  [[4 2]
 [3 2]
 [4 3]
 [0 3]
 [1 3]
 [0 0]]  Selected action  [4 3]
episode: 2   score: -149.0   memory length: 363   epsilon: 0.9968633369849541
Exploring Available actions  [[1 3]
 [3 0]
 [3 4]
 [3 2]
 [0 3]
 [4 2]
 [2 4]
 [4 0]
 [2 3]
 [0 0]]  Selected action  [4 0]
episode: 2   score: -175.0   memory length: 364   epsilon: 0.9968633369849541
Exploring Available actions  [[2 0]
 [0 0]]  Selected action  [2 0]
episode: 2   score: -178.0   memory length: 365   epsilon: 0.9968633369849541
Exploring Available actions  [[3 0]
 [4 1]
 [0 0]]  Selected action  [0 0]
episode: 2   score: -183.0   memory length: 366   epsilon: 0.9968633369849541
Exploring Available actions  [[4 2]
 [0 2]
 [0 0]]  Selected action  [0 0]
episode: 2  

episode: 2   score: -345.0   memory length: 407   epsilon: 0.9968633369849541
Exploring Available actions  [[0 4]
 [4 0]
 [1 2]
 [3 4]
 [2 0]
 [3 1]
 [4 1]
 [4 2]
 [1 4]
 [2 3]
 [0 2]
 [2 4]
 [0 0]]  Selected action  [4 1]
episode: 2   score: -348.0   memory length: 408   epsilon: 0.9968633369849541
Exploring Available actions  [[2 4]
 [1 3]
 [4 1]
 [0 2]
 [4 3]
 [2 0]
 [2 1]
 [2 3]
 [3 2]
 [1 0]
 [1 2]
 [1 4]
 [0 0]]  Selected action  [1 0]
episode: 2   score: -340.0   memory length: 409   epsilon: 0.9968633369849541
Exploring Available actions  [[2 1]
 [2 3]
 [0 1]
 [0 0]]  Selected action  [2 3]
episode: 2   score: -326.0   memory length: 410   epsilon: 0.9968633369849541
Exploring Available actions  [[2 4]
 [0 1]
 [4 2]
 [3 2]
 [0 3]
 [0 4]
 [0 0]]  Selected action  [4 2]
episode: 2   score: -353.0   memory length: 411   epsilon: 0.9968633369849541
Exploring Available actions  [[4 0]
 [3 1]
 [3 4]
 [3 2]
 [0 3]
 [2 1]
 [0 0]]  Selected action  [0 0]
episode: 2   score: -358.0   mem

episode: 3   score: -51.0   memory length: 452   epsilon: 0.9937365126247782
Exploring Available actions  [[1 2]
 [3 4]
 [1 4]
 [1 0]
 [0 1]
 [0 3]
 [4 3]
 [0 0]]  Selected action  [1 2]
episode: 3   score: -53.0   memory length: 453   epsilon: 0.9937365126247782
Exploring Available actions  [[0 1]
 [0 3]
 [0 0]]  Selected action  [0 3]
episode: 3   score: -66.0   memory length: 454   epsilon: 0.9937365126247782
Exploring Available actions  [[1 2]
 [3 0]
 [4 1]
 [0 1]
 [3 2]
 [2 0]
 [4 2]
 [0 0]]  Selected action  [4 2]
episode: 3   score: -72.0   memory length: 455   epsilon: 0.9937365126247782
Exploring Available actions  [[2 1]
 [3 1]
 [0 0]]  Selected action  [2 1]
episode: 3   score: -52.0   memory length: 456   epsilon: 0.9937365126247782
Exploring Available actions  [[2 0]
 [0 3]
 [0 4]
 [2 4]
 [2 3]
 [3 1]
 [1 4]
 [4 1]
 [4 0]
 [1 2]
 [0 0]]  Selected action  [4 0]
episode: 3   score: -55.0   memory length: 457   epsilon: 0.9937365126247782
Exploring Available actions  [[4 3]
 

episode: 3   score: -86.0   memory length: 515   epsilon: 0.9937365126247782
Exploring Available actions  [[1 0]
 [0 0]]  Selected action  [0 0]
episode: 3   score: -91.0   memory length: 516   epsilon: 0.9937365126247782
Exploring Available actions  [[0 2]
 [0 0]]  Selected action  [0 0]
episode: 3   score: -96.0   memory length: 517   epsilon: 0.9937365126247782
Exploring Available actions  [[4 2]
 [0 2]
 [0 0]]  Selected action  [4 2]
episode: 3   score: -132.0   memory length: 518   epsilon: 0.9937365126247782
Exploring Available actions  [[0 4]
 [4 1]
 [0 0]]  Selected action  [4 1]
episode: 3   score: -121.0   memory length: 519   epsilon: 0.9937365126247782
Exploring Available actions  [[0 3]
 [2 1]
 [0 2]
 [1 3]
 [0 4]
 [1 0]
 [4 3]
 [2 4]
 [4 1]
 [0 0]]  Selected action  [2 1]
episode: 3   score: -151.0   memory length: 520   epsilon: 0.9937365126247782
Exploring Available actions  [[0 1]
 [1 4]
 [2 1]
 [4 1]
 [0 2]
 [4 3]
 [2 4]
 [3 4]
 [3 0]
 [1 2]
 [0 0]]  Selected action  

episode: 3   score: -63.0   memory length: 563   epsilon: 0.9937365126247782
Exploring Available actions  [[2 1]
 [3 2]
 [2 4]
 [3 1]
 [4 0]
 [2 3]
 [4 2]
 [0 0]]  Selected action  [3 1]
episode: 3   score: -65.0   memory length: 564   epsilon: 0.9937365126247782
Exploring Available actions  [[2 0]
 [0 3]
 [4 2]
 [0 2]
 [3 1]
 [1 0]
 [0 4]
 [1 4]
 [2 3]
 [0 0]]  Selected action  [4 2]
episode: 3   score: -57.0   memory length: 565   epsilon: 0.9937365126247782
Exploring Available actions  [[0 1]
 [1 2]
 [4 0]
 [4 1]
 [0 0]]  Selected action  [0 0]
episode: 3   score: -62.0   memory length: 566   epsilon: 0.9937365126247782
Exploring Available actions  [[1 3]
 [2 3]
 [0 0]]  Selected action  [1 3]
episode: 3   score: -67.0   memory length: 567   epsilon: 0.9937365126247782
Exploring Available actions  [[2 4]
 [3 2]
 [3 1]
 [2 0]
 [3 4]
 [4 0]
 [3 0]
 [0 0]]  Selected action  [0 0]
episode: 3   score: -72.0   memory length: 568   epsilon: 0.9937365126247782
Exploring Available actions  [

episode: 4   score: -2.0   memory length: 623   epsilon: 0.9906194960589274
Exploring Available actions  [[3 1]
 [0 0]]  Selected action  [0 0]
episode: 4   score: -7.0   memory length: 624   epsilon: 0.9906194960589274
Exploring Available actions  [[3 2]
 [0 0]]  Selected action  [3 2]
episode: 4   score: 4.0   memory length: 625   epsilon: 0.9906194960589274
Exploring Available actions  [[0 4]
 [1 0]
 [1 3]
 [3 2]
 [4 2]
 [0 0]]  Selected action  [3 2]
episode: 4   score: 1.0   memory length: 626   epsilon: 0.9906194960589274
Exploring Available actions  [[0 4]
 [0 3]
 [0 0]]  Selected action  [0 4]
episode: 4   score: -25.0   memory length: 627   epsilon: 0.9906194960589274
Exploring Available actions  [[4 3]
 [2 0]
 [1 2]
 [0 2]
 [4 2]
 [0 3]
 [3 0]
 [1 0]
 [0 0]]  Selected action  [4 2]
episode: 4   score: -17.0   memory length: 628   epsilon: 0.9906194960589274
Exploring Available actions  [[2 4]
 [0 0]]  Selected action  [2 4]
episode: 4   score: -9.0   memory length: 629   epsi

episode: 4   score: -118.0   memory length: 671   epsilon: 0.9906194960589274
Exploring Available actions  [[0 2]
 [3 4]
 [2 1]
 [1 3]
 [1 2]
 [1 4]
 [4 1]
 [2 0]
 [0 4]
 [0 0]]  Selected action  [2 1]
episode: 4   score: -102.0   memory length: 672   epsilon: 0.9906194960589274
Exploring Available actions  [[4 0]
 [2 3]
 [0 4]
 [2 0]
 [3 2]
 [2 4]
 [0 3]
 [3 0]
 [3 4]
 [0 0]]  Selected action  [2 0]
episode: 4   score: -100.0   memory length: 673   epsilon: 0.9906194960589274
Exploring Available actions  [[3 1]
 [0 0]]  Selected action  [0 0]
episode: 4   score: -105.0   memory length: 674   epsilon: 0.9906194960589274
Exploring Available actions  [[3 2]
 [0 0]]  Selected action  [3 2]
episode: 4   score: -112.0   memory length: 675   epsilon: 0.9906194960589274
Exploring Available actions  [[0 1]
 [0 2]
 [1 0]
 [4 3]
 [0 0]]  Selected action  [1 0]
episode: 4   score: -116.0   memory length: 676   epsilon: 0.9906194960589274
Exploring Available actions  [[4 1]
 [4 0]
 [0 0]]  Selecte

episode: 4   score: -76.0   memory length: 731   epsilon: 0.9906194960589274
Exploring Available actions  [[0 3]
 [3 2]
 [3 1]
 [0 2]
 [3 4]
 [2 4]
 [2 3]
 [0 1]
 [1 0]
 [4 1]
 [0 0]]  Selected action  [1 0]
episode: 4   score: -96.0   memory length: 732   epsilon: 0.9906194960589274
Exploring Available actions  [[1 2]
 [0 0]]  Selected action  [1 2]
episode: 4   score: -60.0   memory length: 733   epsilon: 0.9906194960589274
Exploring Available actions  [[4 3]
 [0 2]
 [2 1]
 [3 4]
 [0 0]]  Selected action  [0 2]
episode: 4   score: -64.0   memory length: 734   epsilon: 0.9906194960589274
Exploring Available actions  [[0 1]
 [2 4]
 [4 0]
 [1 3]
 [0 0]]  Selected action  [0 1]
episode: 4   score: -65.0   memory length: 735   epsilon: 0.9906194960589274
Exploring Available actions  [[3 2]
 [0 2]
 [0 3]
 [2 1]
 [2 0]
 [0 1]
 [2 4]
 [3 0]
 [1 4]
 [2 3]
 [3 1]
 [1 3]
 [1 0]
 [0 0]]  Selected action  [2 0]
episode: 4   score: -73.0   memory length: 736   epsilon: 0.9906194960589274
Exploring

episode: 5   score: 48.0   memory length: 779   epsilon: 0.987512256523656
Exploring Available actions  [[0 4]
 [3 1]
 [4 3]
 [0 2]
 [0 0]]  Selected action  [4 3]
episode: 5   score: 55.0   memory length: 780   epsilon: 0.987512256523656
Exploring Available actions  [[2 4]
 [3 0]
 [0 3]
 [3 2]
 [4 1]
 [0 0]]  Selected action  [4 1]
episode: 5   score: 52.0   memory length: 781   epsilon: 0.987512256523656
Exploring Available actions  [[0 2]
 [3 4]
 [0 1]
 [4 3]
 [2 1]
 [2 3]
 [0 4]
 [0 0]]  Selected action  [0 2]
episode: 5   score: 50.0   memory length: 782   epsilon: 0.987512256523656
Exploring Available actions  [[1 0]
 [1 4]
 [3 2]
 [0 0]]  Selected action  [0 0]
episode: 5   score: 45.0   memory length: 783   epsilon: 0.987512256523656
Exploring Available actions  [[4 2]
 [3 2]
 [2 1]
 [4 3]
 [1 4]
 [0 1]
 [0 0]]  Selected action  [3 2]
episode: 5   score: 23.0   memory length: 784   epsilon: 0.987512256523656
Exploring Available actions  [[3 2]
 [0 4]
 [4 3]
 [0 0]]  Selected ac

episode: 5   score: 35.0   memory length: 839   epsilon: 0.987512256523656
Exploring Available actions  [[3 4]
 [2 1]
 [3 2]
 [1 3]
 [0 0]]  Selected action  [2 1]
episode: 5   score: 67.0   memory length: 840   epsilon: 0.987512256523656
Exploring Available actions  [[2 1]
 [4 2]
 [2 3]
 [2 0]
 [3 0]
 [4 0]
 [0 4]
 [1 0]
 [4 1]
 [1 4]
 [3 4]
 [0 3]
 [2 4]
 [1 2]
 [0 0]]  Selected action  [1 2]
episode: 5   score: 95.0   memory length: 841   epsilon: 0.987512256523656
Exploring Available actions  [[0 2]
 [3 0]
 [0 0]]  Selected action  [0 0]
episode: 5   score: 90.0   memory length: 842   epsilon: 0.987512256523656
Exploring Available actions  [[1 0]
 [0 2]
 [4 0]
 [4 2]
 [0 4]
 [0 3]
 [0 1]
 [1 3]
 [0 0]]  Selected action  [0 4]
episode: 5   score: 91.0   memory length: 843   epsilon: 0.987512256523656
Exploring Available actions  [[0 3]
 [1 4]
 [1 2]
 [3 1]
 [3 2]
 [2 1]
 [4 1]
 [2 0]
 [0 1]
 [0 0]]  Selected action  [3 2]
episode: 5   score: 95.0   memory length: 844   epsilon: 0.98

episode: 6   score: -99.0   memory length: 893   epsilon: 0.9844147633517137
Exploring Available actions  [[4 2]
 [1 4]
 [2 0]
 [3 2]
 [0 3]
 [0 0]]  Selected action  [0 0]
episode: 6   score: -104.0   memory length: 894   epsilon: 0.9844147633517137
Exploring Available actions  [[1 2]
 [0 2]
 [1 4]
 [0 3]
 [4 0]
 [2 3]
 [4 1]
 [3 2]
 [1 0]
 [0 0]]  Selected action  [3 2]
episode: 6   score: -92.0   memory length: 895   epsilon: 0.9844147633517137
Exploring Available actions  [[1 3]
 [2 4]
 [1 4]
 [0 3]
 [0 0]]  Selected action  [0 3]
episode: 6   score: -95.0   memory length: 896   epsilon: 0.9844147633517137
Exploring Available actions  [[2 4]
 [1 2]
 [1 3]
 [0 0]]  Selected action  [0 0]
episode: 6   score: -100.0   memory length: 897   epsilon: 0.9844147633517137
Exploring Available actions  [[1 4]
 [1 2]
 [2 0]
 [0 0]]  Selected action  [2 0]
episode: 6   score: -102.0   memory length: 898   epsilon: 0.9844147633517137
Exploring Available actions  [[3 2]
 [0 3]
 [0 2]
 [0 0]]  Sel

episode: 6   score: -38.0   memory length: 946   epsilon: 0.9844147633517137
Exploring Available actions  [[0 3]
 [2 3]
 [4 1]
 [3 2]
 [0 0]]  Selected action  [2 3]
episode: 6   score: -30.0   memory length: 947   epsilon: 0.9844147633517137
Exploring Available actions  [[3 4]
 [2 0]
 [4 0]
 [1 3]
 [0 1]
 [1 4]
 [0 4]
 [3 0]
 [4 3]
 [0 0]]  Selected action  [4 0]
episode: 6   score: -41.0   memory length: 948   epsilon: 0.9844147633517137
Exploring Available actions  [[2 0]
 [0 0]]  Selected action  [0 0]
episode: 6   score: -46.0   memory length: 949   epsilon: 0.9844147633517137
Exploring Available actions  [[0 3]
 [1 0]
 [4 0]
 [3 2]
 [0 0]]  Selected action  [0 0]
episode: 6   score: -51.0   memory length: 950   epsilon: 0.9844147633517137
Exploring Available actions  [[1 3]
 [2 0]
 [3 4]
 [0 0]]  Selected action  [0 0]
episode: 6   score: -56.0   memory length: 951   epsilon: 0.9844147633517137
Exploring Available actions  [[0 1]
 [0 2]
 [1 0]
 [1 4]
 [0 0]]  Selected action  [1 

episode: 6   score: -117.0   memory length: 996   epsilon: 0.9844147633517137
Exploring Available actions  [[2 4]
 [4 2]
 [1 0]
 [0 4]
 [4 1]
 [2 3]
 [3 1]
 [0 0]]  Selected action  [3 1]
episode: 6   score: -109.0   memory length: 997   epsilon: 0.9844147633517137
Exploring Available actions  [[3 2]
 [4 0]
 [4 2]
 [4 3]
 [3 0]
 [1 4]
 [0 1]
 [0 2]
 [2 4]
 [1 0]
 [2 3]
 [2 0]
 [3 4]
 [1 2]
 [0 0]]  Selected action  [0 1]
episode: 6   score: -115.0   memory length: 998   epsilon: 0.9844147633517137
Exploring Available actions  [[3 4]
 [2 4]
 [3 1]
 [0 1]
 [1 4]
 [4 1]
 [4 0]
 [1 2]
 [2 3]
 [1 3]
 [4 2]
 [0 4]
 [3 0]
 [0 2]
 [2 1]
 [0 0]]  Selected action  [2 4]
episode: 6   score: -130.0   memory length: 999   epsilon: 0.9844147633517137
Exploring Available actions  [[4 2]
 [0 2]
 [3 1]
 [1 0]
 [0 4]
 [2 4]
 [1 4]
 [2 1]
 [4 0]
 [0 0]]  Selected action  [2 4]
episode: 6   score: -131.0   memory length: 1000   epsilon: 0.9844147633517137
Exploring Available actions  [[3 2]
 [3 0]
 [1 3]


episode: 7   score: -44.0   memory length: 1044   epsilon: 0.9813269859720434
Exploring Available actions  [[3 4]
 [3 1]
 [2 1]
 [2 3]
 [4 1]
 [3 0]
 [4 3]
 [0 1]
 [1 2]
 [2 4]
 [2 0]
 [0 0]]  Selected action  [2 1]
episode: 7   score: -75.0   memory length: 1045   epsilon: 0.9813269859720434
Exploring Available actions  [[4 2]
 [4 0]
 [3 2]
 [3 0]
 [2 4]
 [4 1]
 [2 0]
 [2 1]
 [0 1]
 [2 3]
 [4 3]
 [1 0]
 [3 4]
 [0 4]
 [1 3]
 [0 0]]  Selected action  [1 3]
episode: 7   score: -59.0   memory length: 1046   epsilon: 0.9813269859720434
Exploring Available actions  [[4 0]
 [0 1]
 [4 2]
 [4 1]
 [2 3]
 [0 0]]  Selected action  [0 1]
episode: 7   score: -50.0   memory length: 1047   epsilon: 0.9813269859720434
Exploring Available actions  [[0 4]
 [4 3]
 [3 2]
 [4 0]
 [1 0]
 [1 4]
 [2 4]
 [4 2]
 [2 1]
 [1 3]
 [3 0]
 [0 1]
 [3 4]
 [3 1]
 [1 2]
 [0 0]]  Selected action  [0 0]
episode: 7   score: -55.0   memory length: 1048   epsilon: 0.9813269859720434
Exploring Available actions  [[4 1]
 [4 3]
 

episode: 7   score: -112.0   memory length: 1094   epsilon: 0.9813269859720434
Exploring Available actions  [[2 0]
 [1 2]
 [0 2]
 [0 1]
 [1 0]
 [4 1]
 [0 0]]  Selected action  [0 0]
episode: 7   score: -117.0   memory length: 1095   epsilon: 0.9813269859720434
Exploring Available actions  [[3 2]
 [4 3]
 [0 1]
 [0 0]]  Selected action  [4 3]
episode: 7   score: -118.0   memory length: 1096   epsilon: 0.9813269859720434
Exploring Available actions  [[4 3]
 [4 1]
 [2 0]
 [1 0]
 [1 2]
 [3 4]
 [0 3]
 [1 3]
 [2 4]
 [0 2]
 [0 0]]  Selected action  [2 0]
episode: 7   score: -113.0   memory length: 1097   epsilon: 0.9813269859720434
Exploring Available actions  [[2 3]
 [0 1]
 [0 2]
 [0 0]]  Selected action  [0 2]
episode: 7   score: -89.0   memory length: 1098   epsilon: 0.9813269859720434
Exploring Available actions  [[0 1]
 [4 2]
 [0 4]
 [3 4]
 [1 0]
 [0 0]]  Selected action  [0 4]
episode: 7   score: -90.0   memory length: 1099   epsilon: 0.9813269859720434
Exploring Available actions  [[4 0

episode: 7   score: -254.0   memory length: 1144   epsilon: 0.9813269859720434
Exploring Available actions  [[0 3]
 [3 1]
 [1 3]
 [3 2]
 [1 4]
 [0 0]]  Selected action  [1 3]
episode: 7   score: -255.0   memory length: 1145   epsilon: 0.9813269859720434
Exploring Available actions  [[2 4]
 [1 2]
 [3 4]
 [0 0]]  Selected action  [1 2]
episode: 7   score: -256.0   memory length: 1146   epsilon: 0.9813269859720434
Exploring Available actions  [[2 4]
 [1 3]
 [2 1]
 [4 1]
 [4 0]
 [0 0]]  Selected action  [2 4]
episode: 7   score: -256.0   memory length: 1147   epsilon: 0.9813269859720434
Exploring Available actions  [[4 3]
 [4 2]
 [0 2]
 [4 0]
 [0 4]
 [2 4]
 [0 3]
 [2 3]
 [3 0]
 [3 1]
 [2 1]
 [3 2]
 [0 0]]  Selected action  [4 3]
episode: 7   score: -240.0   memory length: 1148   epsilon: 0.9813269859720434
Exploring Available actions  [[3 2]
 [0 2]
 [3 4]
 [1 3]
 [0 1]
 [3 1]
 [4 0]
 [2 1]
 [2 4]
 [2 3]
 [2 0]
 [4 1]
 [3 0]
 [0 4]
 [1 4]
 [0 0]]  Selected action  [2 4]
episode: 7   score: 

episode: 8   score: 4.0   memory length: 1194   epsilon: 0.9782488939094783
Exploring Available actions  [[2 3]
 [3 4]
 [4 2]
 [0 4]
 [0 1]
 [2 1]
 [2 4]
 [0 0]]  Selected action  [0 0]
episode: 8   score: -1.0   memory length: 1195   epsilon: 0.9782488939094783
Exploring Available actions  [[0 3]
 [2 4]
 [4 0]
 [2 3]
 [1 4]
 [0 4]
 [2 1]
 [0 0]]  Selected action  [1 4]
episode: 8   score: 2.0   memory length: 1196   epsilon: 0.9782488939094783
Exploring Available actions  [[4 1]
 [1 0]
 [3 4]
 [4 0]
 [2 3]
 [1 3]
 [4 3]
 [2 0]
 [3 1]
 [0 0]]  Selected action  [0 0]
episode: 8   score: -3.0   memory length: 1197   epsilon: 0.9782488939094783
Exploring Available actions  [[2 4]
 [1 0]
 [1 2]
 [4 1]
 [0 1]
 [3 0]
 [0 4]
 [1 4]
 [0 0]]  Selected action  [1 2]
episode: 8   score: 27.0   memory length: 1198   epsilon: 0.9782488939094783
Exploring Available actions  [[3 1]
 [4 2]
 [2 3]
 [3 2]
 [2 1]
 [0 0]]  Selected action  [3 2]
episode: 8   score: 4.0   memory length: 1199   epsilon: 0.9

episode: 8   score: -186.0   memory length: 1243   epsilon: 0.9782488939094783
Exploring Available actions  [[2 3]
 [0 1]
 [4 0]
 [0 2]
 [4 3]
 [3 0]
 [0 0]]  Selected action  [2 3]
episode: 8   score: -204.0   memory length: 1244   epsilon: 0.9782488939094783
Exploring Available actions  [[3 4]
 [2 1]
 [0 1]
 [3 0]
 [4 3]
 [2 4]
 [4 1]
 [0 4]
 [3 1]
 [1 2]
 [0 0]]  Selected action  [3 4]
episode: 8   score: -184.0   memory length: 1245   epsilon: 0.9782488939094783
Exploring Available actions  [[0 1]
 [4 1]
 [0 4]
 [3 0]
 [1 2]
 [0 0]]  Selected action  [1 2]
episode: 8   score: -144.0   memory length: 1246   epsilon: 0.9782488939094783
Exploring Available actions  [[1 4]
 [3 0]
 [3 1]
 [2 3]
 [1 0]
 [0 0]]  Selected action  [1 0]
episode: 8   score: -173.0   memory length: 1247   epsilon: 0.9782488939094783
Exploring Available actions  [[4 2]
 [2 1]
 [0 0]]  Selected action  [0 0]
episode: 8   score: -178.0   memory length: 1248   epsilon: 0.9782488939094783
Exploring Available actio

episode: 8   score: -262.0   memory length: 1293   epsilon: 0.9782488939094783
Exploring Available actions  [[4 2]
 [4 3]
 [2 3]
 [0 2]
 [0 0]]  Selected action  [2 3]
episode: 8   score: -268.0   memory length: 1294   epsilon: 0.9782488939094783
Exploring Available actions  [[0 2]
 [1 3]
 [0 0]]  Selected action  [1 3]
episode: 8   score: -270.0   memory length: 1295   epsilon: 0.9782488939094783
Exploring Available actions  [[3 2]
 [4 1]
 [1 0]
 [0 0]]  Selected action  [4 1]
episode: 8   score: -305.0   memory length: 1296   epsilon: 0.9782488939094783
Exploring Available actions  [[1 2]
 [2 0]
 [0 2]
 [4 1]
 [3 0]
 [4 0]
 [2 1]
 [0 3]
 [0 0]]  Selected action  [4 0]
episode: 8   score: -301.0   memory length: 1297   epsilon: 0.9782488939094783
Exploring Available actions  [[3 2]
 [0 0]]  Selected action  [0 0]
episode: 8   score: -306.0   memory length: 1298   epsilon: 0.9782488939094783
Exploring Available actions  [[2 4]
 [0 0]]  Selected action  [2 4]
episode: 8   score: -308.0 

episode: 9   score: 8.0   memory length: 1347   epsilon: 0.975180456784443
Exploring Available actions  [[3 0]
 [0 0]]  Selected action  [0 0]
episode: 9   score: 3.0   memory length: 1348   epsilon: 0.975180456784443
Exploring Available actions  [[4 0]
 [1 0]
 [0 1]
 [0 0]]  Selected action  [4 0]
episode: 9   score: -23.0   memory length: 1349   epsilon: 0.975180456784443
Exploring Available actions  [[1 0]
 [3 0]
 [0 0]]  Selected action  [3 0]
episode: 9   score: -54.0   memory length: 1350   epsilon: 0.975180456784443
Exploring Available actions  [[0 1]
 [3 2]
 [0 0]]  Selected action  [0 0]
episode: 9   score: -59.0   memory length: 1351   epsilon: 0.975180456784443
Exploring Available actions  [[4 1]
 [0 4]
 [0 0]]  Selected action  [0 0]
episode: 9   score: -64.0   memory length: 1352   epsilon: 0.975180456784443
Exploring Available actions  [[3 2]
 [0 0]]  Selected action  [0 0]
episode: 9   score: -69.0   memory length: 1353   epsilon: 0.975180456784443
Exploring Available ac

episode: 9   score: -161.0   memory length: 1401   epsilon: 0.975180456784443
Exploring Available actions  [[1 0]
 [0 4]
 [0 2]
 [4 2]
 [0 0]]  Selected action  [1 0]
episode: 9   score: -143.0   memory length: 1402   epsilon: 0.975180456784443
Exploring Available actions  [[3 4]
 [3 1]
 [1 4]
 [1 3]
 [0 2]
 [0 0]]  Selected action  [3 4]
episode: 9   score: -140.0   memory length: 1403   epsilon: 0.975180456784443
Exploring Available actions  [[3 0]
 [0 2]
 [0 1]
 [1 2]
 [4 1]
 [3 1]
 [2 4]
 [1 4]
 [0 0]]  Selected action  [1 4]
episode: 9   score: -144.0   memory length: 1404   epsilon: 0.975180456784443
Exploring Available actions  [[1 3]
 [0 4]
 [3 2]
 [4 0]
 [0 3]
 [3 4]
 [1 4]
 [4 2]
 [3 0]
 [2 0]
 [4 1]
 [0 2]
 [3 1]
 [0 0]]  Selected action  [1 4]
episode: 9   score: -179.0   memory length: 1405   epsilon: 0.975180456784443
Exploring Available actions  [[2 3]
 [0 2]
 [3 4]
 [4 2]
 [3 1]
 [1 4]
 [1 2]
 [2 1]
 [1 3]
 [0 3]
 [0 0]]  Selected action  [1 4]
episode: 9   score: -179.

episode: 9   score: -30.0   memory length: 1454   epsilon: 0.975180456784443
(1, 21) [0.        0.        0.        0.        0.        0.        0.
 0.        0.        0.        0.        0.        2.6508121 0.
 0.        0.       ] Index  12 2.6508121
Exploiting Available actions  [[3 1]
 [3 2]
 [2 3]
 [0 1]
 [3 4]
 [0 4]
 [4 1]
 [3 0]
 [1 0]
 [4 3]
 [1 4]
 [4 2]
 [4 0]
 [0 3]
 [2 4]
 [0 0]]  Selected action  [4 0]
episode: 9   score: -55.0   memory length: 1455   epsilon: 0.975180456784443
Exploring Available actions  [[4 3]
 [0 0]]  Selected action  [0 0]
episode: 9   score: -60.0   memory length: 1456   epsilon: 0.975180456784443
Exploring Available actions  [[3 0]
 [0 0]]  Selected action  [0 0]
episode: 9   score: -65.0   memory length: 1457   epsilon: 0.975180456784443
Exploring Available actions  [[1 3]
 [1 2]
 [0 0]]  Selected action  [1 2]
episode: 9   score: -61.0   memory length: 1458   epsilon: 0.975180456784443
Exploring Available actions  [[3 4]
 [2 1]
 [3 2]
 [2 0]
 [

episode: 10   score: 1.0   memory length: 1500   epsilon: 0.9721216443126517
Exploring Available actions  [[0 4]
 [1 4]
 [2 3]
 [1 0]
 [4 2]
 [0 2]
 [0 1]
 [4 1]
 [0 0]]  Selected action  [1 4]
episode: 10   score: 1.0   memory length: 1501   epsilon: 0.9721216443126517
Exploring Available actions  [[2 1]
 [1 2]
 [0 3]
 [0 4]
 [2 4]
 [2 3]
 [1 4]
 [0 0]]  Selected action  [2 1]
episode: 10   score: 11.0   memory length: 1502   epsilon: 0.9721216443126517
Exploring Available actions  [[3 0]
 [3 1]
 [4 2]
 [2 3]
 [0 3]
 [1 3]
 [2 1]
 [4 3]
 [0 0]]  Selected action  [1 3]
episode: 10   score: 19.0   memory length: 1503   epsilon: 0.9721216443126517
Exploring Available actions  [[3 1]
 [4 0]
 [3 2]
 [0 0]]  Selected action  [3 2]
episode: 10   score: 31.0   memory length: 1504   epsilon: 0.9721216443126517
Exploring Available actions  [[0 1]
 [2 3]
 [1 4]
 [1 3]
 [0 0]]  Selected action  [1 4]
episode: 10   score: 11.0   memory length: 1505   epsilon: 0.9721216443126517
Exploring Available

episode: 10   score: -127.0   memory length: 1553   epsilon: 0.9721216443126517
Exploring Available actions  [[1 2]
 [2 0]
 [0 0]]  Selected action  [2 0]
episode: 10   score: -103.0   memory length: 1554   epsilon: 0.9721216443126517
Exploring Available actions  [[0 3]
 [0 0]]  Selected action  [0 3]
episode: 10   score: -103.0   memory length: 1555   epsilon: 0.9721216443126517
Exploring Available actions  [[3 2]
 [1 4]
 [1 2]
 [2 0]
 [0 2]
 [4 3]
 [1 0]
 [0 3]
 [2 4]
 [0 0]]  Selected action  [1 0]
episode: 10   score: -95.0   memory length: 1556   epsilon: 0.9721216443126517
Exploring Available actions  [[0 1]
 [0 3]
 [0 0]]  Selected action  [0 0]
episode: 10   score: -100.0   memory length: 1557   epsilon: 0.9721216443126517
Exploring Available actions  [[2 3]
 [0 0]]  Selected action  [0 0]
episode: 10   score: -105.0   memory length: 1558   epsilon: 0.9721216443126517
Exploring Available actions  [[2 0]
 [0 0]]  Selected action  [2 0]
episode: 10   score: -100.0   memory length

episode: 10   score: -298.0   memory length: 1606   epsilon: 0.9721216443126517
Exploring Available actions  [[4 2]
 [2 3]
 [0 2]
 [3 1]
 [0 0]]  Selected action  [3 1]
episode: 10   score: -312.0   memory length: 1607   epsilon: 0.9721216443126517
Exploring Available actions  [[2 0]
 [3 0]
 [2 1]
 [2 3]
 [3 2]
 [4 1]
 [0 0]]  Selected action  [3 0]
episode: 10   score: -305.0   memory length: 1608   epsilon: 0.9721216443126517
Exploring Available actions  [[2 3]
 [3 1]
 [4 2]
 [0 0]]  Selected action  [4 2]
episode: 10   score: -306.0   memory length: 1609   epsilon: 0.9721216443126517
Exploring Available actions  [[0 1]
 [3 2]
 [0 0]]  Selected action  [0 1]
episode: 10   score: -293.0   memory length: 1610   epsilon: 0.9721216443126517
Exploring Available actions  [[4 0]
 [1 2]
 [0 1]
 [1 0]
 [3 4]
 [2 1]
 [0 4]
 [2 4]
 [0 0]]  Selected action  [0 4]
episode: 10   score: -292.0   memory length: 1611   epsilon: 0.9721216443126517
Exploring Available actions  [[0 1]
 [1 4]
 [0 4]
 [3 

episode: 11   score: -72.0   memory length: 1653   epsilon: 0.9690724263048106
Exploring Available actions  [[2 4]
 [1 3]
 [3 4]
 [4 0]
 [3 1]
 [4 1]
 [3 2]
 [1 4]
 [0 4]
 [0 0]]  Selected action  [1 4]
episode: 11   score: -72.0   memory length: 1654   epsilon: 0.9690724263048106
Exploring Available actions  [[3 1]
 [2 4]
 [2 3]
 [1 0]
 [3 4]
 [1 4]
 [4 2]
 [0 4]
 [1 3]
 [3 0]
 [0 0]]  Selected action  [0 4]
episode: 11   score: -73.0   memory length: 1655   epsilon: 0.9690724263048106
Exploring Available actions  [[0 1]
 [3 4]
 [4 0]
 [3 1]
 [1 0]
 [2 3]
 [4 1]
 [0 0]]  Selected action  [0 0]
episode: 11   score: -78.0   memory length: 1656   epsilon: 0.9690724263048106
Exploring Available actions  [[3 2]
 [2 4]
 [4 2]
 [3 0]
 [0 3]
 [0 0]]  Selected action  [3 2]
episode: 11   score: -92.0   memory length: 1657   epsilon: 0.9690724263048106
Exploring Available actions  [[0 2]
 [2 1]
 [1 2]
 [4 2]
 [0 0]]  Selected action  [0 0]
episode: 11   score: -97.0   memory length: 1658   epsi

episode: 11   score: -286.0   memory length: 1706   epsilon: 0.9690724263048106
Exploring Available actions  [[0 4]
 [1 3]
 [3 1]
 [0 2]
 [2 3]
 [3 4]
 [0 0]]  Selected action  [3 4]
episode: 11   score: -289.0   memory length: 1707   epsilon: 0.9690724263048106
Exploring Available actions  [[3 2]
 [0 4]
 [1 0]
 [4 1]
 [2 1]
 [0 1]
 [3 4]
 [3 1]
 [1 2]
 [2 4]
 [0 0]]  Selected action  [2 1]
episode: 11   score: -274.0   memory length: 1708   epsilon: 0.9690724263048106
Exploring Available actions  [[3 0]
 [3 1]
 [2 4]
 [2 0]
 [3 2]
 [0 2]
 [4 1]
 [0 1]
 [0 0]]  Selected action  [2 4]
episode: 11   score: -316.0   memory length: 1709   epsilon: 0.9690724263048106
Exploring Available actions  [[1 3]
 [3 0]
 [1 0]
 [3 1]
 [2 0]
 [3 4]
 [0 1]
 [0 0]]  Selected action  [3 1]
episode: 11   score: -337.0   memory length: 1710   epsilon: 0.9690724263048106
Exploring Available actions  [[0 4]
 [4 3]
 [2 3]
 [0 2]
 [2 1]
 [3 0]
 [4 2]
 [3 1]
 [1 4]
 [1 0]
 [0 1]
 [2 0]
 [4 1]
 [0 0]]  Selected a

episode: 11   score: -464.0   memory length: 1759   epsilon: 0.9690724263048106
Exploring Available actions  [[1 3]
 [3 2]
 [3 4]
 [0 3]
 [3 1]
 [0 0]]  Selected action  [3 4]
episode: 11   score: -440.0   memory length: 1760   epsilon: 0.9690724263048106
Exploring Available actions  [[4 3]
 [1 3]
 [3 2]
 [3 4]
 [1 4]
 [2 4]
 [4 1]
 [3 1]
 [4 2]
 [1 0]
 [4 0]
 [3 0]
 [0 0]]  Selected action  [4 1]
episode: 11   score: -432.0   memory length: 1761   epsilon: 0.9690724263048106
Exploring Available actions  [[1 0]
 [1 2]
 [0 4]
 [3 1]
 [4 3]
 [2 0]
 [3 0]
 [2 1]
 [0 0]]  Selected action  [3 1]
episode: 11   score: -436.0   memory length: 1762   epsilon: 0.9690724263048106
Exploring Available actions  [[4 1]
 [0 1]
 [1 2]
 [4 3]
 [1 0]
 [2 0]
 [3 4]
 [4 0]
 [0 3]
 [1 4]
 [1 3]
 [3 2]
 [2 4]
 [3 1]
 [2 3]
 [0 0]]  Selected action  [2 0]
episode: 11   score: -444.0   memory length: 1763   epsilon: 0.9690724263048106
Exploring Available actions  [[0 1]
 [0 0]]  Selected action  [0 0]
episode:

episode: 12   score: -18.0   memory length: 1802   epsilon: 0.9660327726663196
Exploring Available actions  [[0 4]
 [3 4]
 [1 2]
 [3 1]
 [4 0]
 [4 1]
 [0 0]]  Selected action  [3 1]
episode: 12   score: -12.0   memory length: 1803   epsilon: 0.9660327726663196
Exploring Available actions  [[0 3]
 [1 4]
 [1 0]
 [2 3]
 [3 1]
 [4 1]
 [4 2]
 [3 2]
 [3 0]
 [0 4]
 [2 4]
 [1 3]
 [3 4]
 [0 0]]  Selected action  [1 4]
episode: 12   score: 16.0   memory length: 1804   epsilon: 0.9660327726663196
Exploring Available actions  [[0 4]
 [3 4]
 [1 4]
 [4 1]
 [3 0]
 [2 1]
 [0 3]
 [0 0]]  Selected action  [2 1]
episode: 12   score: 24.0   memory length: 1805   epsilon: 0.9660327726663196
Exploring Available actions  [[1 3]
 [4 0]
 [1 4]
 [4 2]
 [3 0]
 [2 3]
 [2 4]
 [2 0]
 [0 1]
 [0 4]
 [3 1]
 [3 2]
 [0 0]]  Selected action  [3 2]
episode: 12   score: 32.0   memory length: 1806   epsilon: 0.9660327726663196
Exploring Available actions  [[0 2]
 [4 2]
 [0 0]]  Selected action  [0 0]
episode: 12   score: 27

episode: 12   score: -32.0   memory length: 1854   epsilon: 0.9660327726663196
Exploring Available actions  [[2 4]
 [1 0]
 [0 4]
 [3 0]
 [0 2]
 [2 3]
 [4 3]
 [3 4]
 [1 3]
 [1 2]
 [1 4]
 [2 0]
 [0 0]]  Selected action  [2 4]
episode: 12   score: -32.0   memory length: 1855   epsilon: 0.9660327726663196
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[4 2]
 [4 3]
 [1 2]
 [4 0]
 [1 0]
 [0 1]
 [1 4]
 [2 4]
 [0 0]]  Selected action  [4 2]
episode: 12   score: -32.0   memory length: 1856   epsilon: 0.9660327726663196
Exploring Available actions  [[0 1]
 [3 4]
 [4 2]
 [1 0]
 [1 2]
 [0 0]]  Selected action  [0 1]
episode: 12   score: -29.0   memory length: 1857   epsilon: 0.9660327726663196
Exploring Available actions  [[4 3]
 [3 4]
 [2 4]
 [4 1]
 [3 2]
 [3 1]
 [1 3]
 [3 0]
 [0 1]
 [0 4]
 [2 0]
 [1 0]
 [0 3]
 [4 0]
 [0 0]]  Selected action  [0 0]
episode: 12   score: -34.0   memory length: 1858   epsilon: 0.9660327726663196
Exploring Available actions  [[4 0]
 

episode: 12   score: 92.0   memory length: 1904   epsilon: 0.9660327726663196
Exploring Available actions  [[4 3]
 [3 1]
 [0 1]
 [3 2]
 [1 2]
 [2 0]
 [2 4]
 [3 4]
 [0 0]]  Selected action  [0 0]
episode: 12   score: 87.0   memory length: 1905   epsilon: 0.9660327726663196
Exploring Available actions  [[3 4]
 [1 2]
 [1 3]
 [2 3]
 [3 1]
 [4 2]
 [0 4]
 [2 1]
 [4 3]
 [0 0]]  Selected action  [0 4]
episode: 12   score: 56.0   memory length: 1906   epsilon: 0.9660327726663196
Exploring Available actions  [[3 4]
 [2 0]
 [2 3]
 [0 1]
 [1 0]
 [0 2]
 [4 0]
 [3 0]
 [0 0]]  Selected action  [4 0]
episode: 12   score: 60.0   memory length: 1907   epsilon: 0.9660327726663196
Episode : 13
Exploring Available actions  [[2 4]
 [0 0]]  Selected action  [0 0]
episode: 13   score: -5   memory length: 1908   epsilon: 0.9630026533969749
Exploring Available actions  [[0 4]
 [1 2]
 [2 3]
 [3 4]
 [1 3]
 [4 2]
 [0 2]
 [0 0]]  Selected action  [1 3]
episode: 13   score: -7.0   memory length: 1909   epsilon: 0.96

episode: 13   score: -1.0   memory length: 1955   epsilon: 0.9630026533969749
Exploring Available actions  [[1 0]
 [0 4]
 [4 2]
 [1 4]
 [2 4]
 [4 3]
 [3 4]
 [2 0]
 [4 0]
 [3 1]
 [0 0]]  Selected action  [0 4]
episode: 13   score: 21.0   memory length: 1956   epsilon: 0.9630026533969749
Exploring Available actions  [[0 1]
 [0 4]
 [0 2]
 [2 0]
 [1 2]
 [4 3]
 [3 2]
 [1 4]
 [4 0]
 [0 0]]  Selected action  [0 4]
episode: 13   score: 20.0   memory length: 1957   epsilon: 0.9630026533969749
Exploring Available actions  [[1 2]
 [4 0]
 [4 1]
 [0 1]
 [2 3]
 [2 1]
 [0 4]
 [0 0]]  Selected action  [4 1]
episode: 13   score: 32.0   memory length: 1958   epsilon: 0.9630026533969749
Exploring Available actions  [[0 4]
 [1 2]
 [3 4]
 [1 4]
 [3 0]
 [4 2]
 [1 0]
 [4 1]
 [3 1]
 [4 0]
 [0 3]
 [1 3]
 [3 2]
 [4 3]
 [2 3]
 [0 0]]  Selected action  [0 3]
episode: 13   score: -13.0   memory length: 1959   epsilon: 0.9630026533969749
Exploring Available actions  [[0 1]
 [3 4]
 [3 1]
 [1 3]
 [0 4]
 [0 3]
 [4 2]


episode: 13   score: -32.0   memory length: 2010   epsilon: 0.9630026533969749
Exploring Available actions  [[2 1]
 [3 4]
 [3 0]
 [4 0]
 [1 3]
 [1 4]
 [2 4]
 [0 0]]  Selected action  [2 4]
episode: 13   score: -15.0   memory length: 2011   epsilon: 0.9630026533969749
Exploring Available actions  [[3 0]
 [0 4]
 [2 3]
 [2 4]
 [3 1]
 [0 1]
 [0 3]
 [1 0]
 [0 0]]  Selected action  [0 4]
episode: 13   score: -18.0   memory length: 2012   epsilon: 0.9630026533969749
Exploring Available actions  [[3 1]
 [0 3]
 [4 0]
 [2 4]
 [2 0]
 [4 2]
 [1 0]
 [0 2]
 [2 1]
 [3 4]
 [1 3]
 [3 2]
 [0 1]
 [0 4]
 [0 0]]  Selected action  [0 1]
episode: 13   score: -23.0   memory length: 2013   epsilon: 0.9630026533969749
Exploring Available actions  [[2 4]
 [4 1]
 [3 4]
 [0 2]
 [2 0]
 [3 2]
 [1 2]
 [1 3]
 [3 1]
 [3 0]
 [0 0]]  Selected action  [3 0]
episode: 13   score: -19.0   memory length: 2014   epsilon: 0.9630026533969749
Exploring Available actions  [[4 1]
 [0 0]]  Selected action  [4 1]
episode: 13   score:

episode: 13   score: 18.0   memory length: 2062   epsilon: 0.9630026533969749
Exploring Available actions  [[0 4]
 [1 0]
 [3 4]
 [4 3]
 [2 0]
 [2 3]
 [0 1]
 [1 3]
 [0 0]]  Selected action  [1 3]
episode: 13   score: 16.0   memory length: 2063   epsilon: 0.9630026533969749
Exploring Available actions  [[4 1]
 [0 3]
 [2 1]
 [3 0]
 [1 3]
 [0 4]
 [0 1]
 [4 2]
 [1 2]
 [0 0]]  Selected action  [3 0]
episode: 13   score: 28.0   memory length: 2064   epsilon: 0.9630026533969749
Exploring Available actions  [[1 0]
 [1 2]
 [0 0]]  Selected action  [1 2]
episode: 13   score: 37.0   memory length: 2065   epsilon: 0.9630026533969749
Episode : 14
Exploring Available actions  [[3 1]
 [0 0]]  Selected action  [3 1]
episode: 14   score: -26.0   memory length: 2066   epsilon: 0.9599820385906737
Exploring Available actions  [[1 4]
 [0 3]
 [3 0]
 [0 2]
 [4 2]
 [4 3]
 [1 3]
 [2 3]
 [2 0]
 [2 4]
 [1 0]
 [0 1]
 [4 1]
 [0 0]]  Selected action  [4 1]
episode: 14   score: -29.0   memory length: 2067   epsilon: 

episode: 14   score: -245.0   memory length: 2116   epsilon: 0.9599820385906737
Exploring Available actions  [[2 1]
 [1 4]
 [3 2]
 [4 0]
 [0 3]
 [4 2]
 [2 3]
 [0 0]]  Selected action  [2 1]
episode: 14   score: -235.0   memory length: 2117   epsilon: 0.9599820385906737
Exploring Available actions  [[3 2]
 [1 4]
 [2 0]
 [4 2]
 [4 1]
 [1 2]
 [3 1]
 [1 3]
 [0 0]]  Selected action  [3 2]
episode: 14   score: -228.0   memory length: 2118   epsilon: 0.9599820385906737
Exploring Available actions  [[0 3]
 [0 1]
 [0 2]
 [0 4]
 [0 0]]  Selected action  [0 4]
episode: 14   score: -254.0   memory length: 2119   epsilon: 0.9599820385906737
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[4 1]
 [3 1]
 [0 3]
 [2 1]
 [2 3]
 [4 3]
 [3 4]
 [0 0]]  Selected action  [4 1]
episode: 14   score: -250.0   memory length: 2120   epsilon: 0.9599820385906737
Exploring Available actions  [[4 1]
 [1 3]
 [1 2]
 [4 0]
 [0 3]
 [3 1]
 [4 2]
 [2 1]
 [3 0]
 [4 3]
 [2 3]
 [2 0]
 [2 4]
 [0 0]

episode: 14   score: -348.0   memory length: 2167   epsilon: 0.9599820385906737
Exploring Available actions  [[4 3]
 [4 1]
 [0 4]
 [1 3]
 [3 2]
 [0 0]]  Selected action  [0 4]
episode: 14   score: -373.0   memory length: 2168   epsilon: 0.9599820385906737
Exploring Available actions  [[2 4]
 [0 1]
 [2 3]
 [4 1]
 [2 1]
 [3 4]
 [0 3]
 [1 0]
 [1 2]
 [0 0]]  Selected action  [0 3]
episode: 14   score: -369.0   memory length: 2169   epsilon: 0.9599820385906737
Exploring Available actions  [[3 2]
 [0 3]
 [2 0]
 [2 4]
 [0 4]
 [0 2]
 [3 0]
 [0 0]]  Selected action  [3 2]
episode: 14   score: -337.0   memory length: 2170   epsilon: 0.9599820385906737
Exploring Available actions  [[4 1]
 [2 0]
 [3 2]
 [1 0]
 [0 4]
 [0 3]
 [1 2]
 [0 0]]  Selected action  [3 2]
episode: 14   score: -323.0   memory length: 2171   epsilon: 0.9599820385906737
Exploring Available actions  [[1 0]
 [1 3]
 [0 0]]  Selected action  [1 3]
episode: 14   score: -325.0   memory length: 2172   epsilon: 0.9599820385906737
Explo

episode: 15   score: -47.0   memory length: 2217   epsilon: 0.9569708984351178
Exploring Available actions  [[3 1]
 [2 0]
 [4 2]
 [0 0]]  Selected action  [3 1]
episode: 15   score: -61.0   memory length: 2218   epsilon: 0.9569708984351178
Exploring Available actions  [[2 3]
 [3 4]
 [4 1]
 [2 0]
 [0 1]
 [2 4]
 [3 2]
 [2 1]
 [0 2]
 [0 0]]  Selected action  [2 3]
episode: 15   score: -42.0   memory length: 2219   epsilon: 0.9569708984351178
Exploring Available actions  [[4 0]
 [0 1]
 [3 4]
 [4 2]
 [3 0]
 [4 1]
 [2 0]
 [3 1]
 [2 1]
 [1 2]
 [0 0]]  Selected action  [4 2]
episode: 15   score: -68.0   memory length: 2220   epsilon: 0.9569708984351178
Exploring Available actions  [[1 0]
 [4 1]
 [2 3]
 [3 4]
 [2 0]
 [1 3]
 [0 0]]  Selected action  [2 3]
episode: 15   score: -56.0   memory length: 2221   epsilon: 0.9569708984351178
Exploring Available actions  [[3 4]
 [2 4]
 [0 2]
 [0 3]
 [3 2]
 [1 3]
 [4 3]
 [4 2]
 [1 0]
 [0 0]]  Selected action  [1 0]
episode: 15   score: -59.0   memory lengt

episode: 15   score: -155.0   memory length: 2272   epsilon: 0.9569708984351178
Exploring Available actions  [[4 2]
 [0 3]
 [0 2]
 [1 4]
 [2 1]
 [2 3]
 [3 4]
 [0 0]]  Selected action  [2 3]
episode: 15   score: -136.0   memory length: 2273   epsilon: 0.9569708984351178
Exploring Available actions  [[4 2]
 [1 2]
 [3 2]
 [2 0]
 [0 0]]  Selected action  [4 2]
episode: 15   score: -162.0   memory length: 2274   epsilon: 0.9569708984351178
Exploring Available actions  [[0 2]
 [3 4]
 [3 0]
 [2 1]
 [0 4]
 [4 0]
 [0 1]
 [0 0]]  Selected action  [3 4]
episode: 15   score: -172.0   memory length: 2275   epsilon: 0.9569708984351178
Exploring Available actions  [[2 3]
 [0 3]
 [0 4]
 [3 0]
 [3 2]
 [3 1]
 [4 3]
 [0 2]
 [4 0]
 [2 1]
 [2 0]
 [1 2]
 [0 0]]  Selected action  [1 2]
episode: 15   score: -178.0   memory length: 2276   epsilon: 0.9569708984351178
Exploring Available actions  [[2 4]
 [0 4]
 [3 2]
 [0 1]
 [0 2]
 [3 4]
 [3 1]
 [0 0]]  Selected action  [0 2]
episode: 15   score: -182.0   memory

episode: 15   score: -173.0   memory length: 2324   epsilon: 0.9569708984351178
Exploring Available actions  [[1 2]
 [0 4]
 [2 3]
 [0 0]]  Selected action  [0 0]
episode: 15   score: -178.0   memory length: 2325   epsilon: 0.9569708984351178
Exploring Available actions  [[3 1]
 [4 3]
 [1 2]
 [0 0]]  Selected action  [1 2]
episode: 15   score: -173.0   memory length: 2326   epsilon: 0.9569708984351178
Exploring Available actions  [[2 3]
 [1 3]
 [4 3]
 [1 4]
 [0 0]]  Selected action  [1 3]
episode: 15   score: -189.0   memory length: 2327   epsilon: 0.9569708984351178
Exploring Available actions  [[1 2]
 [2 0]
 [0 2]
 [0 3]
 [4 1]
 [3 4]
 [3 2]
 [2 1]
 [3 1]
 [1 4]
 [1 0]
 [1 3]
 [0 0]]  Selected action  [0 3]
episode: 15   score: -191.0   memory length: 2328   epsilon: 0.9569708984351178
Exploring Available actions  [[3 0]
 [4 2]
 [2 0]
 [2 4]
 [0 4]
 [3 4]
 [4 1]
 [0 1]
 [2 3]
 [4 0]
 [0 2]
 [0 0]]  Selected action  [0 4]
episode: 15   score: -173.0   memory length: 2329   epsilon: 0.9

episode: 16   score: -11.0   memory length: 2374   epsilon: 0.9539692032115212
Exploring Available actions  [[4 1]
 [1 4]
 [3 1]
 [1 3]
 [2 3]
 [3 0]
 [4 2]
 [1 0]
 [2 1]
 [0 4]
 [0 2]
 [2 0]
 [0 0]]  Selected action  [0 0]
episode: 16   score: -16.0   memory length: 2375   epsilon: 0.9539692032115212
Exploring Available actions  [[0 4]
 [0 2]
 [3 0]
 [1 2]
 [1 0]
 [4 0]
 [2 1]
 [0 0]]  Selected action  [3 0]
episode: 16   score: -8.0   memory length: 2376   epsilon: 0.9539692032115212
Exploring Available actions  [[3 1]
 [0 0]]  Selected action  [3 1]
episode: 16   score: -14.0   memory length: 2377   epsilon: 0.9539692032115212
Exploring Available actions  [[3 2]
 [1 2]
 [2 3]
 [3 0]
 [4 3]
 [1 4]
 [0 4]
 [2 1]
 [1 0]
 [0 2]
 [3 1]
 [2 4]
 [3 4]
 [0 3]
 [0 0]]  Selected action  [3 0]
episode: 16   score: -11.0   memory length: 2378   epsilon: 0.9539692032115212
Exploring Available actions  [[2 0]
 [0 0]]  Selected action  [2 0]
episode: 16   score: -27.0   memory length: 2379   epsil

episode: 16   score: 38.0   memory length: 2421   epsilon: 0.9539692032115212
Exploring Available actions  [[2 4]
 [0 4]
 [0 3]
 [1 0]
 [3 0]
 [1 3]
 [3 1]
 [3 2]
 [0 0]]  Selected action  [0 3]
episode: 16   score: 37.0   memory length: 2422   epsilon: 0.9539692032115212
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[4 1]
 [3 1]
 [0 2]
 [0 4]
 [0 3]
 [1 4]
 [4 0]
 [0 1]
 [2 4]
 [1 0]
 [2 0]
 [0 0]]  Selected action  [4 1]
episode: 16   score: 20.0   memory length: 2423   epsilon: 0.9539692032115212
Exploring Available actions  [[3 1]
 [2 0]
 [0 2]
 [3 4]
 [4 3]
 [4 0]
 [4 1]
 [0 4]
 [2 3]
 [4 2]
 [1 3]
 [2 4]
 [0 3]
 [3 0]
 [0 1]
 [0 0]]  Selected action  [0 2]
episode: 16   score: 21.0   memory length: 2424   epsilon: 0.9539692032115212
Exploring Available actions  [[3 1]
 [0 0]]  Selected action  [0 0]
episode: 16   score: 16.0   memory length: 2425   epsilon: 0.9539692032115212
Exploring Available actions  [[4 3]
 [4 0]
 [0 0]]  Selected 

(1, 21) [0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[1 3]
 [1 0]
 [2 1]
 [3 2]
 [0 1]
 [0 3]
 [0 0]]  Selected action  [1 3]
episode: 16   score: -37.0   memory length: 2471   epsilon: 0.9539692032115212
Episode : 17
Exploring Available actions  [[3 4]
 [2 4]
 [4 1]
 [4 2]
 [0 1]
 [4 3]
 [2 1]
 [1 2]
 [0 4]
 [4 0]
 [1 0]
 [3 1]
 [3 0]
 [0 3]
 [1 3]
 [0 0]]  Selected action  [0 3]
episode: 17   score: 7.0   memory length: 2472   epsilon: 0.9509769232943149
Exploring Available actions  [[2 0]
 [1 2]
 [3 4]
 [4 1]
 [4 2]
 [0 4]
 [3 0]
 [0 0]]  Selected action  [4 1]
episode: 17   score: -28.0   memory length: 2473   epsilon: 0.9509769232943149
Exploring Available actions  [[4 0]
 [4 3]
 [2 0]
 [2 1]
 [1 0]
 [1 2]
 [0 0]]  Selected action  [4 0]
episode: 17   score: -24.0   memory length: 2474   epsilon: 0.9509769232943149
Exploring Available actions  [[0 3]
 [2 0]
 [0 0]]  Selected action  [0 0]
episode: 17   score: -29.0   memory length: 2475   epsilon: 0.950976923

episode: 17   score: -117.0   memory length: 2517   epsilon: 0.9509769232943149
Exploring Available actions  [[0 2]
 [4 0]
 [3 1]
 [3 0]
 [3 2]
 [4 3]
 [4 2]
 [2 1]
 [2 3]
 [0 4]
 [1 0]
 [0 0]]  Selected action  [0 4]
episode: 17   score: -148.0   memory length: 2518   epsilon: 0.9509769232943149
Exploring Available actions  [[3 4]
 [4 3]
 [1 0]
 [0 3]
 [2 4]
 [4 2]
 [4 0]
 [4 1]
 [1 4]
 [0 0]]  Selected action  [4 3]
episode: 17   score: -140.0   memory length: 2519   epsilon: 0.9509769232943149
Exploring Available actions  [[3 2]
 [1 0]
 [2 4]
 [0 2]
 [4 1]
 [4 3]
 [1 2]
 [2 0]
 [0 0]]  Selected action  [1 0]
episode: 17   score: -140.0   memory length: 2520   epsilon: 0.9509769232943149
Exploring Available actions  [[0 4]
 [1 0]
 [0 0]]  Selected action  [0 4]
episode: 17   score: -140.0   memory length: 2521   epsilon: 0.9509769232943149
Exploring Available actions  [[1 0]
 [3 1]
 [2 1]
 [0 1]
 [1 3]
 [0 2]
 [3 4]
 [0 3]
 [3 2]
 [4 3]
 [4 0]
 [0 0]]  Selected action  [3 1]
episode:

episode: 17   score: -112.0   memory length: 2564   epsilon: 0.9509769232943149
Exploring Available actions  [[0 2]
 [2 1]
 [2 4]
 [4 1]
 [0 1]
 [2 3]
 [4 0]
 [0 0]]  Selected action  [2 1]
episode: 17   score: -88.0   memory length: 2565   epsilon: 0.9509769232943149
Exploring Available actions  [[2 4]
 [1 3]
 [3 1]
 [1 0]
 [1 2]
 [0 1]
 [4 1]
 [3 2]
 [3 4]
 [3 0]
 [1 4]
 [4 0]
 [0 0]]  Selected action  [0 0]
episode: 17   score: -93.0   memory length: 2566   epsilon: 0.9509769232943149
Exploring Available actions  [[0 2]
 [1 4]
 [0 1]
 [4 0]
 [1 0]
 [1 2]
 [2 1]
 [3 4]
 [2 4]
 [4 3]
 [0 4]
 [0 3]
 [4 2]
 [0 0]]  Selected action  [0 1]
episode: 17   score: -100.0   memory length: 2567   epsilon: 0.9509769232943149
Exploring Available actions  [[3 4]
 [0 2]
 [4 1]
 [3 2]
 [1 0]
 [1 3]
 [4 2]
 [0 4]
 [1 2]
 [3 1]
 [4 3]
 [0 3]
 [1 4]
 [2 3]
 [2 4]
 [0 0]]  Selected action  [0 0]
episode: 17   score: -105.0   memory length: 2568   epsilon: 0.9509769232943149
Exploring Available actions  

episode: 18   score: -34.0   memory length: 2613   epsilon: 0.9479940291508555
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[3 4]
 [4 3]
 [2 1]
 [4 1]
 [3 2]
 [3 1]
 [1 2]
 [2 0]
 [1 4]
 [1 0]
 [0 0]]  Selected action  [3 4]
episode: 18   score: -18.0   memory length: 2614   epsilon: 0.9479940291508555
Exploring Available actions  [[3 1]
 [1 2]
 [1 0]
 [2 3]
 [2 0]
 [0 3]
 [4 3]
 [2 4]
 [0 0]]  Selected action  [0 0]
episode: 18   score: -23.0   memory length: 2615   epsilon: 0.9479940291508555
Exploring Available actions  [[1 0]
 [2 4]
 [0 1]
 [1 3]
 [0 0]]  Selected action  [0 1]
episode: 18   score: -7.0   memory length: 2616   epsilon: 0.9479940291508555
Exploring Available actions  [[4 0]
 [1 3]
 [3 4]
 [4 1]
 [2 4]
 [0 3]
 [0 2]
 [1 2]
 [1 4]
 [4 2]
 [1 0]
 [2 1]
 [0 0]]  Selected action  [1 4]
episode: 18   score: 1.0   memory length: 2617   epsilon: 0.9479940291508555
Exploring Available actions  [[4 0]
 [0 1]
 [2 0]
 [3 0]
 [1 0]
 [4 2

episode: 18   score: -12.0   memory length: 2662   epsilon: 0.9479940291508555
Exploring Available actions  [[1 0]
 [0 4]
 [3 0]
 [0 2]
 [1 4]
 [2 3]
 [2 1]
 [0 1]
 [3 4]
 [0 0]]  Selected action  [1 4]
episode: 18   score: 6.0   memory length: 2663   epsilon: 0.9479940291508555
Exploring Available actions  [[3 1]
 [3 4]
 [0 3]
 [0 1]
 [0 2]
 [2 1]
 [3 0]
 [0 0]]  Selected action  [2 1]
episode: 18   score: 24.0   memory length: 2664   epsilon: 0.9479940291508555
Exploring Available actions  [[0 2]
 [0 1]
 [1 0]
 [0 3]
 [3 1]
 [2 0]
 [1 3]
 [2 1]
 [2 4]
 [3 2]
 [4 1]
 [2 3]
 [3 0]
 [0 0]]  Selected action  [0 1]
episode: 18   score: 27.0   memory length: 2665   epsilon: 0.9479940291508555
Exploring Available actions  [[4 1]
 [3 1]
 [2 1]
 [0 1]
 [0 4]
 [1 3]
 [4 2]
 [0 2]
 [2 0]
 [3 4]
 [2 3]
 [4 3]
 [1 0]
 [0 0]]  Selected action  [0 2]
episode: 18   score: 27.0   memory length: 2666   epsilon: 0.9479940291508555
Exploring Available actions  [[1 2]
 [0 1]
 [3 4]
 [0 3]
 [3 2]
 [0 0]] 

episode: 18   score: 71.0   memory length: 2711   epsilon: 0.9479940291508555
Exploring Available actions  [[0 4]
 [0 0]]  Selected action  [0 4]
episode: 18   score: 83.0   memory length: 2712   epsilon: 0.9479940291508555
Exploring Available actions  [[3 1]
 [0 2]
 [1 0]
 [0 4]
 [1 3]
 [0 0]]  Selected action  [0 4]
episode: 18   score: 72.0   memory length: 2713   epsilon: 0.9479940291508555
Exploring Available actions  [[1 3]
 [4 0]
 [0 4]
 [0 1]
 [1 2]
 [4 2]
 [0 0]]  Selected action  [0 0]
episode: 18   score: 67.0   memory length: 2714   epsilon: 0.9479940291508555
Exploring Available actions  [[0 4]
 [0 3]
 [4 3]
 [1 4]
 [2 1]
 [0 0]]  Selected action  [1 4]
episode: 18   score: 65.0   memory length: 2715   epsilon: 0.9479940291508555
Exploring Available actions  [[2 0]
 [3 0]
 [4 2]
 [4 0]
 [2 3]
 [3 2]
 [0 0]]  Selected action  [3 2]
episode: 18   score: 58.0   memory length: 2716   epsilon: 0.9479940291508555
Exploring Available actions  [[0 4]
 [4 0]
 [2 0]
 [0 2]
 [2 3]
 [

episode: 19   score: 21.0   memory length: 2763   epsilon: 0.9450204913411338
Exploring Available actions  [[3 4]
 [1 4]
 [4 2]
 [4 3]
 [0 2]
 [0 4]
 [3 1]
 [0 0]]  Selected action  [3 4]
episode: 19   score: 37.0   memory length: 2764   epsilon: 0.9450204913411338
Exploring Available actions  [[1 0]
 [2 4]
 [0 4]
 [3 0]
 [3 1]
 [4 1]
 [1 3]
 [0 3]
 [0 0]]  Selected action  [0 0]
episode: 19   score: 32.0   memory length: 2765   epsilon: 0.9450204913411338
Exploring Available actions  [[0 2]
 [3 1]
 [1 3]
 [2 1]
 [4 3]
 [1 2]
 [2 3]
 [4 1]
 [0 1]
 [0 0]]  Selected action  [3 1]
episode: 19   score: 28.0   memory length: 2766   epsilon: 0.9450204913411338
Exploring Available actions  [[0 3]
 [2 4]
 [4 1]
 [2 0]
 [0 4]
 [2 3]
 [4 2]
 [4 3]
 [3 0]
 [2 1]
 [0 0]]  Selected action  [0 0]
episode: 19   score: 23.0   memory length: 2767   epsilon: 0.9450204913411338
Exploring Available actions  [[1 3]
 [1 2]
 [0 2]
 [1 4]
 [4 3]
 [2 1]
 [4 1]
 [2 4]
 [4 2]
 [4 0]
 [0 3]
 [3 4]
 [2 3]
 [0 0]] 

episode: 19   score: -29.0   memory length: 2814   epsilon: 0.9450204913411338
Exploring Available actions  [[2 4]
 [2 1]
 [3 1]
 [1 0]
 [0 2]
 [2 3]
 [0 0]]  Selected action  [0 0]
episode: 19   score: -34.0   memory length: 2815   epsilon: 0.9450204913411338
Exploring Available actions  [[4 2]
 [3 2]
 [1 3]
 [2 0]
 [3 0]
 [4 0]
 [0 2]
 [1 2]
 [2 3]
 [0 4]
 [0 0]]  Selected action  [0 0]
episode: 19   score: -39.0   memory length: 2816   epsilon: 0.9450204913411338
Exploring Available actions  [[0 1]
 [2 0]
 [4 2]
 [0 2]
 [0 3]
 [2 4]
 [0 0]]  Selected action  [2 0]
episode: 19   score: -15.0   memory length: 2817   epsilon: 0.9450204913411338
Exploring Available actions  [[2 1]
 [0 0]]  Selected action  [2 1]
episode: 19   score: -33.0   memory length: 2818   epsilon: 0.9450204913411338
Exploring Available actions  [[3 0]
 [2 3]
 [0 3]
 [0 2]
 [1 4]
 [2 0]
 [3 2]
 [4 0]
 [4 2]
 [0 4]
 [3 4]
 [4 3]
 [1 2]
 [2 1]
 [0 1]
 [0 0]]  Selected action  [1 4]
episode: 19   score: -33.0   memor

episode: 19   score: -71.0   memory length: 2867   epsilon: 0.9450204913411338
Exploring Available actions  [[3 2]
 [0 4]
 [4 3]
 [1 2]
 [0 0]]  Selected action  [4 3]
episode: 19   score: -61.0   memory length: 2868   epsilon: 0.9450204913411338
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[0 4]
 [3 4]
 [4 3]
 [3 1]
 [0 3]
 [4 1]
 [2 3]
 [0 0]]  Selected action  [0 4]
episode: 19   score: -54.0   memory length: 2869   epsilon: 0.9450204913411338
Exploring Available actions  [[4 0]
 [1 3]
 [2 3]
 [1 2]
 [1 4]
 [0 4]
 [4 2]
 [2 4]
 [3 1]
 [4 1]
 [0 0]]  Selected action  [0 0]
episode: 19   score: -59.0   memory length: 2870   epsilon: 0.9450204913411338
Exploring Available actions  [[3 1]
 [2 1]
 [4 0]
 [3 0]
 [1 2]
 [2 4]
 [1 0]
 [0 0]]  Selected action  [1 2]
episode: 19   score: -28.0   memory length: 2871   epsilon: 0.9450204913411338
Exploring Available actions  [[4 3]
 [2 3]
 [0 4]
 [4 1]
 [0 0]]  Selected action  [2 3]
episode: 19   score: -20.0  

episode: 20   score: -50.0   memory length: 2918   epsilon: 0.9420562805174835
Exploring Available actions  [[0 1]
 [0 0]]  Selected action  [0 0]
episode: 20   score: -55.0   memory length: 2919   epsilon: 0.9420562805174835
Exploring Available actions  [[3 1]
 [4 1]
 [0 0]]  Selected action  [3 1]
episode: 20   score: -69.0   memory length: 2920   epsilon: 0.9420562805174835
Exploring Available actions  [[3 2]
 [3 1]
 [4 2]
 [4 1]
 [2 1]
 [2 3]
 [3 4]
 [3 0]
 [1 4]
 [0 1]
 [1 2]
 [4 3]
 [0 2]
 [0 0]]  Selected action  [2 3]
episode: 20   score: -47.0   memory length: 2921   epsilon: 0.9420562805174835
Exploring Available actions  [[2 0]
 [4 1]
 [1 4]
 [4 0]
 [0 1]
 [3 2]
 [0 0]]  Selected action  [0 0]
episode: 20   score: -52.0   memory length: 2922   epsilon: 0.9420562805174835
Exploring Available actions  [[2 0]
 [1 4]
 [3 0]
 [4 0]
 [4 3]
 [0 0]]  Selected action  [4 3]
episode: 20   score: -54.0   memory length: 2923   epsilon: 0.9420562805174835
Exploring Available actions  [[3

episode: 20   score: -222.0   memory length: 2972   epsilon: 0.9420562805174835
Exploring Available actions  [[3 1]
 [0 3]
 [1 0]
 [0 4]
 [2 3]
 [3 2]
 [2 1]
 [0 1]
 [0 0]]  Selected action  [2 1]
episode: 20   score: -223.0   memory length: 2973   epsilon: 0.9420562805174835
Exploring Available actions  [[2 3]
 [3 2]
 [4 2]
 [0 1]
 [1 2]
 [1 0]
 [0 4]
 [4 3]
 [2 4]
 [0 3]
 [2 0]
 [0 0]]  Selected action  [2 4]
episode: 20   score: -233.0   memory length: 2974   epsilon: 0.9420562805174835
Exploring Available actions  [[0 3]
 [0 2]
 [3 2]
 [2 1]
 [0 1]
 [2 0]
 [0 0]]  Selected action  [0 1]
episode: 20   score: -224.0   memory length: 2975   epsilon: 0.9420562805174835
Exploring Available actions  [[3 2]
 [1 0]
 [1 3]
 [2 4]
 [4 2]
 [2 0]
 [3 4]
 [4 0]
 [0 2]
 [1 4]
 [0 3]
 [0 0]]  Selected action  [2 4]
episode: 20   score: -226.0   memory length: 2976   epsilon: 0.9420562805174835
Exploring Available actions  [[2 1]
 [2 3]
 [4 3]
 [1 2]
 [1 3]
 [0 4]
 [3 4]
 [0 0]]  Selected action  

episode: 20   score: -294.0   memory length: 3025   epsilon: 0.9420562805174835
Exploring Available actions  [[4 2]
 [0 3]
 [2 0]
 [4 1]
 [0 4]
 [1 2]
 [4 0]
 [1 3]
 [3 4]
 [3 2]
 [0 1]
 [0 0]]  Selected action  [3 4]
episode: 20   score: -312.0   memory length: 3026   epsilon: 0.9420562805174835
Exploring Available actions  [[0 1]
 [0 2]
 [3 4]
 [4 0]
 [3 2]
 [1 3]
 [4 2]
 [1 4]
 [1 2]
 [2 0]
 [4 1]
 [0 4]
 [4 3]
 [0 0]]  Selected action  [0 1]
episode: 20   score: -289.0   memory length: 3027   epsilon: 0.9420562805174835
Exploring Available actions  [[4 0]
 [1 4]
 [1 3]
 [3 0]
 [1 2]
 [0 1]
 [2 4]
 [1 0]
 [3 2]
 [4 2]
 [0 2]
 [3 1]
 [2 3]
 [4 1]
 [3 4]
 [0 0]]  Selected action  [3 1]
episode: 20   score: -300.0   memory length: 3028   epsilon: 0.9420562805174835
Exploring Available actions  [[3 0]
 [0 1]
 [2 4]
 [0 3]
 [2 0]
 [3 2]
 [2 3]
 [1 3]
 [4 1]
 [2 1]
 [4 3]
 [1 0]
 [4 2]
 [3 1]
 [3 4]
 [0 0]]  Selected action  [3 0]
episode: 20   score: -293.0   memory length: 3029   epsilo

episode: 21   score: -235.0   memory length: 3078   epsilon: 0.9391013674242926
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[1 0]
 [1 2]
 [4 0]
 [1 3]
 [0 2]
 [0 4]
 [2 1]
 [0 0]]  Selected action  [1 0]
episode: 21   score: -250.0   memory length: 3079   epsilon: 0.9391013674242926
Exploring Available actions  [[0 1]
 [0 0]]  Selected action  [0 1]
episode: 21   score: -222.0   memory length: 3080   epsilon: 0.9391013674242926
Exploring Available actions  [[0 1]
 [4 0]
 [2 4]
 [4 2]
 [4 1]
 [1 3]
 [3 1]
 [2 0]
 [4 3]
 [3 2]
 [1 4]
 [0 0]]  Selected action  [4 0]
episode: 21   score: -222.0   memory length: 3081   epsilon: 0.9391013674242926
Exploring Available actions  [[4 0]
 [0 4]
 [4 2]
 [0 0]]  Selected action  [4 0]
episode: 21   score: -222.0   memory length: 3082   epsilon: 0.9391013674242926
Exploring Available actions  [[2 4]
 [3 1]
 [0 0]]  Selected action  [3 1]
episode: 21   score: -243.0   memory length: 3083   epsilon: 0.9391013674242926

episode: 21   score: -282.0   memory length: 3132   epsilon: 0.9391013674242926
Exploring Available actions  [[2 1]
 [0 0]]  Selected action  [0 0]
episode: 21   score: -287.0   memory length: 3133   epsilon: 0.9391013674242926
Exploring Available actions  [[2 0]
 [0 0]]  Selected action  [2 0]
episode: 21   score: -288.0   memory length: 3134   epsilon: 0.9391013674242926
Exploring Available actions  [[0 4]
 [0 0]]  Selected action  [0 4]
episode: 21   score: -284.0   memory length: 3135   epsilon: 0.9391013674242926
Exploring Available actions  [[0 2]
 [0 4]
 [3 2]
 [2 4]
 [4 1]
 [1 4]
 [3 4]
 [1 3]
 [1 0]
 [2 0]
 [0 0]]  Selected action  [0 2]
episode: 21   score: -307.0   memory length: 3136   epsilon: 0.9391013674242926
Exploring Available actions  [[0 3]
 [3 4]
 [0 0]]  Selected action  [3 4]
episode: 21   score: -320.0   memory length: 3137   epsilon: 0.9391013674242926
Exploring Available actions  [[2 4]
 [4 1]
 [0 3]
 [0 4]
 [3 1]
 [3 4]
 [2 1]
 [2 0]
 [1 4]
 [0 0]]  Selected 

episode: 22   score: -23.0   memory length: 3185   epsilon: 0.9361557228977139
Exploring Available actions  [[3 4]
 [4 0]
 [4 1]
 [0 4]
 [1 4]
 [2 0]
 [3 1]
 [4 2]
 [0 2]
 [1 2]
 [3 2]
 [0 3]
 [3 0]
 [1 3]
 [2 3]
 [0 0]]  Selected action  [3 4]
episode: 22   score: -19.0   memory length: 3186   epsilon: 0.9361557228977139
Exploring Available actions  [[1 3]
 [3 4]
 [0 4]
 [1 2]
 [2 1]
 [3 0]
 [2 0]
 [3 2]
 [4 2]
 [3 1]
 [1 4]
 [0 0]]  Selected action  [1 4]
episode: 22   score: -21.0   memory length: 3187   epsilon: 0.9361557228977139
Exploring Available actions  [[1 3]
 [2 4]
 [4 3]
 [2 0]
 [3 4]
 [0 1]
 [4 2]
 [0 2]
 [3 0]
 [3 2]
 [2 3]
 [1 2]
 [0 0]]  Selected action  [3 0]
episode: 22   score: -7.0   memory length: 3188   epsilon: 0.9361557228977139
(1, 21) [0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[4 2]
 [1 0]
 [4 3]
 [1 2]
 [0 0]]  Selected action  [4 2]
episode: 22   score: -22.0   memory length: 3189   epsilon: 0.9361557228977139
Exploring Available actions  

episode: 22   score: 0.0   memory length: 3238   epsilon: 0.9361557228977139
Exploring Available actions  [[4 0]
 [1 0]
 [2 3]
 [1 2]
 [2 4]
 [2 0]
 [4 3]
 [0 4]
 [0 2]
 [4 2]
 [1 4]
 [4 1]
 [0 0]]  Selected action  [1 2]
episode: 22   score: 6.0   memory length: 3239   epsilon: 0.9361557228977139
Exploring Available actions  [[1 4]
 [3 2]
 [3 4]
 [0 0]]  Selected action  [1 4]
episode: 22   score: 1.0   memory length: 3240   epsilon: 0.9361557228977139
Exploring Available actions  [[2 4]
 [1 4]
 [2 0]
 [0 1]
 [0 0]]  Selected action  [2 0]
episode: 22   score: -5.0   memory length: 3241   epsilon: 0.9361557228977139
Exploring Available actions  [[0 2]
 [2 4]
 [4 2]
 [0 0]]  Selected action  [2 4]
episode: 22   score: -37.0   memory length: 3242   epsilon: 0.9361557228977139
Exploring Available actions  [[2 0]
 [3 1]
 [1 4]
 [1 3]
 [4 2]
 [1 0]
 [0 3]
 [3 0]
 [4 1]
 [0 0]]  Selected action  [1 4]
episode: 22   score: -23.0   memory length: 3243   epsilon: 0.9361557228977139
Exploring A

episode: 22   score: -233.0   memory length: 3293   epsilon: 0.9361557228977139
Exploring Available actions  [[3 2]
 [2 0]
 [3 4]
 [0 0]]  Selected action  [3 4]
episode: 22   score: -236.0   memory length: 3294   epsilon: 0.9361557228977139
Exploring Available actions  [[1 0]
 [3 1]
 [3 0]
 [2 1]
 [3 2]
 [1 3]
 [0 0]]  Selected action  [3 0]
episode: 22   score: -206.0   memory length: 3295   epsilon: 0.9361557228977139
Exploring Available actions  [[2 4]
 [1 3]
 [0 0]]  Selected action  [1 3]
episode: 22   score: -222.0   memory length: 3296   epsilon: 0.9361557228977139
Exploring Available actions  [[4 3]
 [2 3]
 [3 0]
 [4 0]
 [4 1]
 [1 3]
 [2 0]
 [3 4]
 [0 0]]  Selected action  [3 4]
episode: 22   score: -202.0   memory length: 3297   epsilon: 0.9361557228977139
Exploring Available actions  [[2 3]
 [4 0]
 [4 1]
 [1 0]
 [0 2]
 [1 2]
 [3 2]
 [2 1]
 [0 4]
 [4 2]
 [3 0]
 [0 0]]  Selected action  [0 4]
episode: 22   score: -228.0   memory length: 3298   epsilon: 0.9361557228977139
Explo

episode: 23   score: 27.0   memory length: 3349   epsilon: 0.933219317865377
Exploring Available actions  [[3 1]
 [0 0]]  Selected action  [0 0]
episode: 23   score: 22.0   memory length: 3350   epsilon: 0.933219317865377
Exploring Available actions  [[0 4]
 [0 0]]  Selected action  [0 4]
episode: 23   score: 26.0   memory length: 3351   epsilon: 0.933219317865377
Exploring Available actions  [[0 1]
 [2 3]
 [0 2]
 [3 2]
 [1 4]
 [1 0]
 [2 0]
 [0 3]
 [1 3]
 [0 0]]  Selected action  [2 0]
episode: 23   score: 13.0   memory length: 3352   epsilon: 0.933219317865377
(1, 21) [0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[4 2]
 [4 0]
 [4 1]
 [0 1]
 [0 0]]  Selected action  [4 2]
episode: 23   score: 12.0   memory length: 3353   epsilon: 0.933219317865377
Exploring Available actions  [[0 2]
 [0 0]]  Selected action  [0 0]
episode: 23   score: 7.0   memory length: 3354   epsilon: 0.933219317865377
Exploring Available actions  [[4 0]
 [0 0]]  Selected action  [0 0]
episode: 23   s

episode: 23   score: -38.0   memory length: 3405   epsilon: 0.933219317865377
Exploring Available actions  [[3 0]
 [0 0]]  Selected action  [0 0]
episode: 23   score: -43.0   memory length: 3406   epsilon: 0.933219317865377
(1, 21) [0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[4 3]
 [1 2]
 [0 0]]  Selected action  [4 3]
episode: 23   score: -50.0   memory length: 3407   epsilon: 0.933219317865377
Exploring Available actions  [[4 2]
 [1 3]
 [0 2]
 [3 0]
 [1 2]
 [0 1]
 [1 4]
 [2 0]
 [0 4]
 [3 2]
 [0 3]
 [2 1]
 [2 3]
 [1 0]
 [0 0]]  Selected action  [1 3]
episode: 23   score: -54.0   memory length: 3408   epsilon: 0.933219317865377
Exploring Available actions  [[1 2]
 [1 3]
 [0 3]
 [3 4]
 [0 4]
 [4 0]
 [2 1]
 [0 1]
 [3 0]
 [0 0]]  Selected action  [2 1]
episode: 23   score: -70.0   memory length: 3409   epsilon: 0.933219317865377
Exploring Available actions  [[3 0]
 [3 1]
 [4 1]
 [3 2]
 [4 3]
 [2 3]
 [3 4]
 [4 2]
 [1 4]
 [2 0]
 [1 0]
 [0 3]
 [2 1]
 [4 0]
 [0 4]
 [0 0]]  Selected

episode: 24   score: 8.0   memory length: 3461   epsilon: 0.9302921233461025
Exploring Available actions  [[4 0]
 [0 2]
 [4 1]
 [3 0]
 [2 4]
 [0 1]
 [3 2]
 [2 3]
 [1 3]
 [1 0]
 [0 0]]  Selected action  [2 4]
episode: 24   score: -32.0   memory length: 3462   epsilon: 0.9302921233461025
Exploring Available actions  [[0 4]
 [1 3]
 [0 1]
 [0 0]]  Selected action  [0 4]
episode: 24   score: -32.0   memory length: 3463   epsilon: 0.9302921233461025
Exploring Available actions  [[3 4]
 [4 2]
 [1 4]
 [0 2]
 [1 0]
 [3 2]
 [3 0]
 [0 0]]  Selected action  [4 2]
episode: 24   score: -32.0   memory length: 3464   epsilon: 0.9302921233461025
Exploring Available actions  [[2 0]
 [0 4]
 [2 1]
 [1 3]
 [3 2]
 [4 0]
 [0 0]]  Selected action  [0 0]
episode: 24   score: -37.0   memory length: 3465   epsilon: 0.9302921233461025
Exploring Available actions  [[3 0]
 [4 2]
 [0 0]]  Selected action  [4 2]
episode: 24   score: -37.0   memory length: 3466   epsilon: 0.9302921233461025
Exploring Available actions

episode: 24   score: -125.0   memory length: 3513   epsilon: 0.9302921233461025
Exploring Available actions  [[0 2]
 [1 0]
 [2 4]
 [2 1]
 [2 3]
 [0 0]]  Selected action  [0 0]
episode: 24   score: -130.0   memory length: 3514   epsilon: 0.9302921233461025
(1, 21) [0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[1 2]
 [3 4]
 [2 3]
 [0 0]]  Selected action  [1 2]
episode: 24   score: -135.0   memory length: 3515   epsilon: 0.9302921233461025
Exploring Available actions  [[0 4]
 [3 1]
 [4 3]
 [2 1]
 [0 3]
 [0 0]]  Selected action  [0 0]
episode: 24   score: -140.0   memory length: 3516   epsilon: 0.9302921233461025
(1, 21) [0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[4 3]
 [3 0]
 [1 3]
 [3 1]
 [0 0]]  Selected action  [4 3]
episode: 24   score: -136.0   memory length: 3517   epsilon: 0.9302921233461025
Exploring Available actions  [[3 4]
 [4 0]
 [2 4]
 [1 2]
 [0 3]
 [3 0]
 [0 1]
 [1 0]
 [2 3]
 [1 4]
 [1 3]
 [0 0]]  Selected action  [2 4]
episode: 24   score: -14

episode: 24   score: -327.0   memory length: 3567   epsilon: 0.9302921233461025
Exploring Available actions  [[2 3]
 [1 4]
 [1 3]
 [0 0]]  Selected action  [0 0]
episode: 24   score: -332.0   memory length: 3568   epsilon: 0.9302921233461025
Exploring Available actions  [[0 3]
 [4 3]
 [0 0]]  Selected action  [4 3]
episode: 24   score: -313.0   memory length: 3569   epsilon: 0.9302921233461025
Exploring Available actions  [[2 3]
 [3 0]
 [3 4]
 [4 2]
 [4 0]
 [3 2]
 [4 3]
 [0 0]]  Selected action  [3 2]
episode: 24   score: -297.0   memory length: 3570   epsilon: 0.9302921233461025
(1, 21) [0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[1 2]
 [3 0]
 [0 4]
 [0 0]]  Selected action  [1 2]
episode: 24   score: -336.0   memory length: 3571   epsilon: 0.9302921233461025
Exploring Available actions  [[0 2]
 [2 1]
 [3 4]
 [2 3]
 [3 2]
 [0 0]]  Selected action  [2 3]
episode: 24   score: -320.0   memory length: 3572   epsilon: 0.9302921233461025
(1, 21) [0. 0. 0. 0. 0. 0.] Index  0 0.

episode: 25   score: 49.0   memory length: 3623   epsilon: 0.9273741104496143
Exploring Available actions  [[2 3]
 [4 3]
 [0 0]]  Selected action  [2 3]
episode: 25   score: 51.0   memory length: 3624   epsilon: 0.9273741104496143
Exploring Available actions  [[4 1]
 [2 3]
 [0 2]
 [4 3]
 [0 4]
 [2 1]
 [0 1]
 [1 4]
 [0 0]]  Selected action  [0 1]
episode: 25   score: 59.0   memory length: 3625   epsilon: 0.9273741104496143
Exploring Available actions  [[1 4]
 [0 2]
 [2 0]
 [3 2]
 [3 4]
 [4 0]
 [4 1]
 [1 0]
 [2 3]
 [1 3]
 [3 0]
 [1 2]
 [2 4]
 [0 0]]  Selected action  [0 2]
episode: 25   score: 73.0   memory length: 3626   epsilon: 0.9273741104496143
Exploring Available actions  [[0 1]
 [4 3]
 [4 1]
 [0 4]
 [3 2]
 [1 4]
 [0 0]]  Selected action  [1 4]
episode: 25   score: 28.0   memory length: 3627   epsilon: 0.9273741104496143
Exploring Available actions  [[4 3]
 [1 4]
 [0 1]
 [4 2]
 [0 4]
 [2 1]
 [1 2]
 [4 0]
 [0 3]
 [2 4]
 [0 0]]  Selected action  [2 4]
episode: 25   score: 27.0   memo

episode: 25   score: -25.0   memory length: 3679   epsilon: 0.9273741104496143
Exploring Available actions  [[3 4]
 [2 3]
 [1 4]
 [0 0]]  Selected action  [3 4]
episode: 25   score: -39.0   memory length: 3680   epsilon: 0.9273741104496143
(1, 21) [0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[3 2]
 [4 2]
 [1 0]
 [0 4]
 [4 0]
 [0 0]]  Selected action  [3 2]
episode: 25   score: -40.0   memory length: 3681   epsilon: 0.9273741104496143
Exploring Available actions  [[1 2]
 [3 2]
 [1 4]
 [2 4]
 [3 1]
 [3 0]
 [2 0]
 [0 0]]  Selected action  [1 4]
episode: 25   score: -85.0   memory length: 3682   epsilon: 0.9273741104496143
Exploring Available actions  [[3 2]
 [0 2]
 [0 1]
 [4 2]
 [0 0]]  Selected action  [3 2]
episode: 25   score: -99.0   memory length: 3683   epsilon: 0.9273741104496143
Exploring Available actions  [[4 3]
 [4 0]
 [4 1]
 [3 0]
 [2 1]
 [0 0]]  Selected action  [0 0]
episode: 25   score: -104.0   memory length: 3684   epsilon: 0.9273741104496143
Exploring 

episode: 25   score: -112.0   memory length: 3733   epsilon: 0.9273741104496143
Exploring Available actions  [[4 3]
 [2 3]
 [0 1]
 [3 1]
 [3 2]
 [1 3]
 [4 2]
 [2 0]
 [1 0]
 [1 2]
 [4 0]
 [3 4]
 [2 1]
 [0 0]]  Selected action  [2 1]
episode: 25   score: -104.0   memory length: 3734   epsilon: 0.9273741104496143
Exploring Available actions  [[4 3]
 [1 4]
 [0 1]
 [1 3]
 [4 2]
 [2 0]
 [4 1]
 [0 2]
 [0 4]
 [1 2]
 [3 0]
 [3 2]
 [3 1]
 [0 0]]  Selected action  [3 2]
episode: 25   score: -96.0   memory length: 3735   epsilon: 0.9273741104496143
Exploring Available actions  [[1 0]
 [4 3]
 [0 0]]  Selected action  [4 3]
episode: 25   score: -94.0   memory length: 3736   epsilon: 0.9273741104496143
Exploring Available actions  [[1 0]
 [4 1]
 [0 4]
 [3 0]
 [1 3]
 [0 3]
 [0 0]]  Selected action  [4 1]
episode: 25   score: -88.0   memory length: 3737   epsilon: 0.9273741104496143
Exploring Available actions  [[2 0]
 [3 2]
 [4 2]
 [3 1]
 [0 3]
 [3 0]
 [1 4]
 [1 0]
 [2 3]
 [4 3]
 [0 4]
 [1 2]
 [4 0]
 

episode: 26   score: 7.0   memory length: 3785   epsilon: 0.9244652503762558
Exploring Available actions  [[1 3]
 [3 2]
 [0 1]
 [4 1]
 [0 0]]  Selected action  [1 3]
episode: 26   score: 16.0   memory length: 3786   epsilon: 0.9244652503762558
Exploring Available actions  [[3 0]
 [2 0]
 [1 4]
 [2 1]
 [0 2]
 [0 4]
 [4 3]
 [2 3]
 [0 0]]  Selected action  [2 0]
episode: 26   score: 3.0   memory length: 3787   epsilon: 0.9244652503762558
Exploring Available actions  [[4 3]
 [2 4]
 [1 3]
 [0 0]]  Selected action  [1 3]
episode: 26   score: -4.0   memory length: 3788   epsilon: 0.9244652503762558
Exploring Available actions  [[2 1]
 [4 3]
 [2 4]
 [1 4]
 [0 3]
 [0 1]
 [3 2]
 [4 1]
 [1 0]
 [0 0]]  Selected action  [1 0]
episode: 26   score: -2.0   memory length: 3789   epsilon: 0.9244652503762558
Exploring Available actions  [[2 1]
 [4 3]
 [0 2]
 [0 0]]  Selected action  [2 1]
episode: 26   score: -11.0   memory length: 3790   epsilon: 0.9244652503762558
Exploring Available actions  [[4 2]
 [3

episode: 26   score: -81.0   memory length: 3840   epsilon: 0.9244652503762558
Exploring Available actions  [[0 1]
 [2 1]
 [2 4]
 [0 4]
 [3 1]
 [4 3]
 [3 2]
 [4 1]
 [1 3]
 [3 0]
 [1 0]
 [3 4]
 [0 3]
 [0 2]
 [0 0]]  Selected action  [4 3]
episode: 26   score: -68.0   memory length: 3841   epsilon: 0.9244652503762558
Exploring Available actions  [[2 1]
 [1 0]
 [4 0]
 [1 2]
 [0 1]
 [3 2]
 [3 0]
 [0 0]]  Selected action  [3 2]
episode: 26   score: -56.0   memory length: 3842   epsilon: 0.9244652503762558
(1, 21) [0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[0 3]
 [2 0]
 [4 2]
 [2 4]
 [0 0]]  Selected action  [0 3]
episode: 26   score: -62.0   memory length: 3843   epsilon: 0.9244652503762558
Exploring Available actions  [[1 4]
 [0 1]
 [2 4]
 [1 2]
 [3 1]
 [0 0]]  Selected action  [0 0]
episode: 26   score: -67.0   memory length: 3844   epsilon: 0.9244652503762558
Exploring Available actions  [[0 2]
 [4 1]
 [0 1]
 [1 2]
 [3 0]
 [4 2]
 [2 1]
 [0 4]
 [0 0]]  Selected action  [

episode: 27   score: 73.0   memory length: 3895   epsilon: 0.9215655144167055
Exploring Available actions  [[4 1]
 [2 4]
 [0 1]
 [4 3]
 [2 0]
 [1 0]
 [1 4]
 [3 0]
 [2 1]
 [4 2]
 [0 4]
 [3 1]
 [1 3]
 [2 3]
 [4 0]
 [0 0]]  Selected action  [0 1]
episode: 27   score: 70.0   memory length: 3896   epsilon: 0.9215655144167055
Exploring Available actions  [[2 1]
 [0 2]
 [0 3]
 [4 0]
 [1 4]
 [0 4]
 [0 0]]  Selected action  [0 3]
episode: 27   score: 51.0   memory length: 3897   epsilon: 0.9215655144167055
Exploring Available actions  [[1 3]
 [4 3]
 [3 0]
 [1 4]
 [0 0]]  Selected action  [4 3]
episode: 27   score: 47.0   memory length: 3898   epsilon: 0.9215655144167055
Exploring Available actions  [[3 1]
 [1 0]
 [0 4]
 [3 0]
 [2 3]
 [4 3]
 [3 2]
 [0 2]
 [4 0]
 [1 4]
 [2 0]
 [0 3]
 [4 2]
 [0 0]]  Selected action  [3 2]
episode: 27   score: 79.0   memory length: 3899   epsilon: 0.9215655144167055
Exploring Available actions  [[3 0]
 [2 0]
 [4 0]
 [0 4]
 [0 1]
 [1 2]
 [4 3]
 [2 3]
 [3 1]
 [2 1]
 

episode: 27   score: 141.0   memory length: 3949   epsilon: 0.9215655144167055
Exploring Available actions  [[2 3]
 [3 0]
 [4 2]
 [0 0]]  Selected action  [3 0]
episode: 27   score: 131.0   memory length: 3950   epsilon: 0.9215655144167055
Exploring Available actions  [[2 3]
 [3 0]
 [1 3]
 [0 0]]  Selected action  [2 3]
episode: 27   score: 121.0   memory length: 3951   epsilon: 0.9215655144167055
Exploring Available actions  [[2 3]
 [1 4]
 [3 4]
 [3 1]
 [4 1]
 [3 0]
 [0 3]
 [4 2]
 [0 0]]  Selected action  [3 4]
episode: 27   score: 145.0   memory length: 3952   epsilon: 0.9215655144167055
Exploring Available actions  [[1 3]
 [1 0]
 [2 0]
 [3 1]
 [1 2]
 [4 3]
 [3 4]
 [0 0]]  Selected action  [3 1]
episode: 27   score: 150.0   memory length: 3953   epsilon: 0.9215655144167055
Exploring Available actions  [[4 2]
 [0 2]
 [4 1]
 [1 4]
 [2 3]
 [1 3]
 [2 1]
 [0 4]
 [2 0]
 [3 0]
 [0 3]
 [0 0]]  Selected action  [2 1]
episode: 27   score: 119.0   memory length: 3954   epsilon: 0.92156551441670

episode: 27   score: 61.0   memory length: 4005   epsilon: 0.9215655144167055
Exploring Available actions  [[0 2]
 [2 4]
 [1 0]
 [0 4]
 [1 2]
 [0 0]]  Selected action  [0 0]
episode: 27   score: 56.0   memory length: 4006   epsilon: 0.9215655144167055
Exploring Available actions  [[1 2]
 [4 3]
 [3 0]
 [1 0]
 [0 0]]  Selected action  [1 0]
episode: 27   score: 69.0   memory length: 4007   epsilon: 0.9215655144167055
Exploring Available actions  [[0 1]
 [0 0]]  Selected action  [0 1]
episode: 27   score: 81.0   memory length: 4008   epsilon: 0.9215655144167055
Exploring Available actions  [[1 0]
 [2 0]
 [2 1]
 [0 2]
 [4 2]
 [4 0]
 [0 0]]  Selected action  [0 2]
episode: 27   score: 86.0   memory length: 4009   epsilon: 0.9215655144167055
Exploring Available actions  [[3 4]
 [1 4]
 [0 0]]  Selected action  [1 4]
episode: 27   score: 66.0   memory length: 4010   epsilon: 0.9215655144167055
Exploring Available actions  [[1 2]
 [2 1]
 [1 3]
 [1 0]
 [0 0]]  Selected action  [0 0]
episode: 27 

episode: 28   score: -66.0   memory length: 4059   epsilon: 0.9186748739516929
Exploring Available actions  [[0 2]
 [1 0]
 [4 0]
 [2 3]
 [1 2]
 [0 1]
 [2 4]
 [4 1]
 [3 4]
 [0 3]
 [2 0]
 [3 1]
 [1 3]
 [1 4]
 [2 1]
 [0 0]]  Selected action  [0 1]
episode: 28   score: -69.0   memory length: 4060   epsilon: 0.9186748739516929
Exploring Available actions  [[0 2]
 [3 4]
 [1 2]
 [1 4]
 [4 2]
 [3 2]
 [4 3]
 [0 1]
 [2 4]
 [0 4]
 [1 3]
 [1 0]
 [0 0]]  Selected action  [3 2]
episode: 28   score: -59.0   memory length: 4061   epsilon: 0.9186748739516929
Exploring Available actions  [[0 2]
 [1 4]
 [2 0]
 [4 2]
 [1 3]
 [0 0]]  Selected action  [1 4]
episode: 28   score: -74.0   memory length: 4062   epsilon: 0.9186748739516929
Exploring Available actions  [[2 1]
 [1 0]
 [4 2]
 [1 2]
 [4 0]
 [3 0]
 [2 3]
 [0 1]
 [2 4]
 [2 0]
 [1 3]
 [0 3]
 [0 0]]  Selected action  [0 0]
episode: 28   score: -79.0   memory length: 4063   epsilon: 0.9186748739516929
Exploring Available actions  [[2 3]
 [4 3]
 [0 3]
 [4

 [0 0]]  Selected action  [2 4]
episode: 28   score: -62.0   memory length: 4096   epsilon: 0.9186748739516929
Exploring Available actions  [[2 0]
 [1 0]
 [0 0]]  Selected action  [0 0]
episode: 28   score: -67.0   memory length: 4096   epsilon: 0.9186748739516929
Exploring Available actions  [[0 4]
 [3 4]
 [3 2]
 [1 2]
 [0 2]
 [1 4]
 [4 1]
 [2 0]
 [4 2]
 [2 3]
 [1 3]
 [3 1]
 [0 3]
 [0 0]]  Selected action  [0 0]
episode: 28   score: -72.0   memory length: 4096   epsilon: 0.9186748739516929
Exploring Available actions  [[3 4]
 [2 0]
 [2 3]
 [0 2]
 [4 0]
 [4 3]
 [1 2]
 [4 1]
 [2 4]
 [1 0]
 [0 3]
 [0 4]
 [3 2]
 [2 1]
 [1 4]
 [0 0]]  Selected action  [3 4]
episode: 28   score: -78.0   memory length: 4096   epsilon: 0.9186748739516929
Exploring Available actions  [[2 4]
 [4 1]
 [3 4]
 [2 3]
 [3 0]
 [1 4]
 [0 2]
 [4 2]
 [1 0]
 [2 1]
 [1 3]
 [1 2]
 [0 4]
 [0 0]]  Selected action  [2 4]
episode: 28   score: -87.0   memory length: 4096   epsilon: 0.9186748739516929
Exploring Available actions 

episode: 29   score: 12.0   memory length: 4096   epsilon: 0.9157933004517167
Exploring Available actions  [[1 0]
 [0 0]]  Selected action  [1 0]
episode: 29   score: 8.0   memory length: 4096   epsilon: 0.9157933004517167
(1, 21) [0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[0 4]
 [4 1]
 [0 0]]  Selected action  [0 4]
episode: 29   score: 12.0   memory length: 4096   epsilon: 0.9157933004517167
Exploring Available actions  [[3 1]
 [0 1]
 [2 0]
 [2 1]
 [2 3]
 [1 4]
 [0 3]
 [0 0]]  Selected action  [0 3]
episode: 29   score: 31.0   memory length: 4096   epsilon: 0.9157933004517167
Exploring Available actions  [[1 2]
 [2 1]
 [3 4]
 [1 0]
 [3 1]
 [3 0]
 [0 2]
 [0 0]]  Selected action  [1 0]
episode: 29   score: 19.0   memory length: 4096   epsilon: 0.9157933004517167
Exploring Available actions  [[0 1]
 [3 4]
 [4 2]
 [0 0]]  Selected action  [0 0]
episode: 29   score: 14.0   memory length: 4096   epsilon: 0.9157933004517167
Exploring Available actions  [[1 0]
 [2 3]
 [0 0]]  Sel

Exploring Available actions  [[2 4]
 [4 2]
 [2 3]
 [2 1]
 [4 1]
 [1 0]
 [1 2]
 [0 0]]  Selected action  [4 2]
episode: 29   score: 228.0   memory length: 4096   epsilon: 0.9157933004517167
Exploring Available actions  [[1 3]
 [0 2]
 [0 3]
 [0 0]]  Selected action  [0 2]
episode: 29   score: 226.0   memory length: 4096   epsilon: 0.9157933004517167
Exploring Available actions  [[1 4]
 [0 4]
 [4 1]
 [4 3]
 [2 0]
 [0 0]]  Selected action  [2 0]
episode: 29   score: 246.0   memory length: 4096   epsilon: 0.9157933004517167
Exploring Available actions  [[0 2]
 [2 1]
 [1 2]
 [0 0]]  Selected action  [0 0]
episode: 29   score: 241.0   memory length: 4096   epsilon: 0.9157933004517167
Exploring Available actions  [[3 1]
 [0 0]]  Selected action  [0 0]
episode: 29   score: 236.0   memory length: 4096   epsilon: 0.9157933004517167
Exploring Available actions  [[4 2]
 [2 0]
 [0 3]
 [0 1]
 [2 1]
 [2 3]
 [0 0]]  Selected action  [4 2]
episode: 29   score: 235.0   memory length: 4096   epsilon: 0.91

episode: 29   score: 138.0   memory length: 4096   epsilon: 0.9157933004517167
(1, 21) [0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[2 1]
 [4 2]
 [0 0]]  Selected action  [2 1]
episode: 29   score: 132.0   memory length: 4096   epsilon: 0.9157933004517167
Exploring Available actions  [[3 0]
 [4 3]
 [0 4]
 [2 0]
 [1 3]
 [1 2]
 [1 0]
 [3 2]
 [2 1]
 [0 2]
 [0 0]]  Selected action  [0 0]
episode: 29   score: 127.0   memory length: 4096   epsilon: 0.9157933004517167
Exploring Available actions  [[2 4]
 [1 2]
 [3 0]
 [2 1]
 [4 3]
 [4 1]
 [1 3]
 [3 2]
 [0 4]
 [0 2]
 [0 3]
 [1 0]
 [0 0]]  Selected action  [3 0]
episode: 29   score: 129.0   memory length: 4096   epsilon: 0.9157933004517167
Exploring Available actions  [[0 3]
 [0 0]]  Selected action  [0 3]
episode: 29   score: 133.0   memory length: 4096   epsilon: 0.9157933004517167
Exploring Available actions  [[3 2]
 [1 4]
 [2 3]
 [2 0]
 [0 1]
 [4 1]
 [0 0]]  Selected action  [2 3]
episode: 29   score: 134.0   memory length: 4096  

episode: 30   score: 35.0   memory length: 4096   epsilon: 0.912920765476763
Exploring Available actions  [[4 1]
 [2 0]
 [4 2]
 [2 4]
 [1 0]
 [3 2]
 [3 0]
 [3 1]
 [1 4]
 [0 0]]  Selected action  [0 0]
episode: 30   score: 30.0   memory length: 4096   epsilon: 0.912920765476763
Exploring Available actions  [[4 2]
 [4 0]
 [1 3]
 [3 0]
 [0 0]]  Selected action  [3 0]
episode: 30   score: 54.0   memory length: 4096   epsilon: 0.912920765476763
Exploring Available actions  [[4 2]
 [4 0]
 [0 0]]  Selected action  [4 2]
episode: 30   score: 69.0   memory length: 4096   epsilon: 0.912920765476763
Exploring Available actions  [[3 4]
 [4 1]
 [3 1]
 [0 0]]  Selected action  [3 1]
episode: 30   score: 62.0   memory length: 4096   epsilon: 0.912920765476763
Exploring Available actions  [[0 2]
 [4 0]
 [4 2]
 [2 0]
 [3 0]
 [2 4]
 [4 3]
 [2 1]
 [2 3]
 [1 3]
 [0 0]]  Selected action  [1 3]
episode: 30   score: 70.0   memory length: 4096   epsilon: 0.912920765476763
Exploring Available actions  [[1 4]
 

episode: 30   score: -97.0   memory length: 4096   epsilon: 0.912920765476763
Exploring Available actions  [[2 4]
 [0 4]
 [1 0]
 [0 0]]  Selected action  [0 4]
episode: 30   score: -93.0   memory length: 4096   epsilon: 0.912920765476763
Exploring Available actions  [[3 0]
 [1 3]
 [2 1]
 [2 4]
 [3 1]
 [2 3]
 [1 0]
 [2 0]
 [0 3]
 [3 2]
 [0 0]]  Selected action  [0 0]
episode: 30   score: -98.0   memory length: 4096   epsilon: 0.912920765476763
Exploring Available actions  [[3 1]
 [4 3]
 [1 3]
 [2 4]
 [2 0]
 [2 3]
 [0 1]
 [3 2]
 [1 4]
 [0 0]]  Selected action  [3 2]
episode: 30   score: -92.0   memory length: 4096   epsilon: 0.912920765476763
Exploring Available actions  [[1 3]
 [0 0]]  Selected action  [0 0]
episode: 30   score: -97.0   memory length: 4096   epsilon: 0.912920765476763
Exploring Available actions  [[4 2]
 [3 2]
 [0 2]
 [0 0]]  Selected action  [4 2]
episode: 30   score: -97.0   memory length: 4096   epsilon: 0.912920765476763
Exploring Available actions  [[0 1]
 [4 2]
 [

episode: 30   score: -90.0   memory length: 4096   epsilon: 0.912920765476763
Exploring Available actions  [[2 4]
 [0 0]]  Selected action  [0 0]
episode: 30   score: -95.0   memory length: 4096   epsilon: 0.912920765476763
Exploring Available actions  [[0 1]
 [1 4]
 [0 0]]  Selected action  [0 1]
episode: 30   score: -83.0   memory length: 4096   epsilon: 0.912920765476763
Exploring Available actions  [[0 4]
 [1 2]
 [2 4]
 [0 1]
 [2 3]
 [1 4]
 [4 1]
 [3 4]
 [0 3]
 [1 0]
 [3 1]
 [2 0]
 [0 2]
 [0 0]]  Selected action  [0 2]
episode: 30   score: -78.0   memory length: 4096   epsilon: 0.912920765476763
Exploring Available actions  [[3 1]
 [3 0]
 [4 1]
 [4 3]
 [1 0]
 [0 0]]  Selected action  [0 0]
episode: 30   score: -83.0   memory length: 4096   epsilon: 0.912920765476763
Exploring Available actions  [[3 0]
 [0 0]]  Selected action  [0 0]
episode: 30   score: -88.0   memory length: 4096   epsilon: 0.912920765476763
Exploring Available actions  [[1 4]
 [3 2]
 [1 3]
 [2 1]
 [2 4]
 [4 1]
 [

episode: 31   score: 30.0   memory length: 4096   epsilon: 0.9100572406760248
Exploring Available actions  [[0 1]
 [1 4]
 [0 3]
 [0 2]
 [2 0]
 [3 0]
 [0 0]]  Selected action  [0 3]
episode: 31   score: 24.0   memory length: 4096   epsilon: 0.9100572406760248
Exploring Available actions  [[1 0]
 [0 3]
 [0 1]
 [1 3]
 [3 2]
 [2 0]
 [4 2]
 [1 2]
 [2 3]
 [2 4]
 [3 4]
 [0 0]]  Selected action  [0 0]
episode: 31   score: 19.0   memory length: 4096   epsilon: 0.9100572406760248
Exploring Available actions  [[3 1]
 [0 3]
 [2 0]
 [4 0]
 [4 2]
 [1 0]
 [1 2]
 [0 0]]  Selected action  [3 1]
episode: 31   score: 27.0   memory length: 4096   epsilon: 0.9100572406760248
Exploring Available actions  [[2 4]
 [4 3]
 [2 1]
 [2 0]
 [1 3]
 [1 0]
 [0 2]
 [1 4]
 [4 2]
 [0 4]
 [0 0]]  Selected action  [2 0]
episode: 31   score: 24.0   memory length: 4096   epsilon: 0.9100572406760248
Exploring Available actions  [[2 0]
 [0 0]]  Selected action  [0 0]
episode: 31   score: 19.0   memory length: 4096   epsilon: 0

episode: 31   score: 47.0   memory length: 4096   epsilon: 0.9100572406760248
Exploring Available actions  [[4 3]
 [1 4]
 [1 0]
 [4 1]
 [3 0]
 [0 1]
 [4 2]
 [0 0]]  Selected action  [1 0]
episode: 31   score: 75.0   memory length: 4096   epsilon: 0.9100572406760248
(1, 21) [0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[1 2]
 [4 0]
 [0 0]]  Selected action  [1 2]
episode: 31   score: 80.0   memory length: 4096   epsilon: 0.9100572406760248
Exploring Available actions  [[2 3]
 [4 1]
 [0 1]
 [2 1]
 [1 4]
 [3 0]
 [0 0]]  Selected action  [2 3]
episode: 31   score: 92.0   memory length: 4096   epsilon: 0.9100572406760248
Exploring Available actions  [[2 3]
 [1 0]
 [2 0]
 [3 1]
 [3 4]
 [0 0]]  Selected action  [2 3]
episode: 31   score: 93.0   memory length: 4096   epsilon: 0.9100572406760248
Exploring Available actions  [[2 1]
 [3 4]
 [2 3]
 [2 4]
 [0 0]]  Selected action  [3 4]
episode: 31   score: 109.0   memory length: 4096   epsilon: 0.9100572406760248
Exploring Available actio

episode: 31   score: -73.0   memory length: 4096   epsilon: 0.9100572406760248
(1, 21) [0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[3 2]
 [1 3]
 [0 0]]  Selected action  [3 2]
episode: 31   score: -47.0   memory length: 4096   epsilon: 0.9100572406760248
(1, 21) [0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[3 1]
 [0 3]
 [0 0]]  Selected action  [3 1]
episode: 31   score: -69.0   memory length: 4096   epsilon: 0.9100572406760248
Exploring Available actions  [[1 0]
 [0 2]
 [0 4]
 [4 0]
 [1 2]
 [2 1]
 [3 2]
 [0 0]]  Selected action  [0 4]
episode: 31   score: -100.0   memory length: 4096   epsilon: 0.9100572406760248
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[1 0]
 [0 2]
 [0 1]
 [4 0]
 [4 2]
 [2 3]
 [3 0]
 [4 3]
 [1 2]
 [0 0]]  Selected action  [1 0]
episode: 31   score: -88.0   memory length: 4096   epsilon: 0.9100572406760248
Exploring Available actions  [[1 0]
 [3 4]
 [0 0]]  Selected action  [3 4]
episode: 31   score: -85.

episode: 32   score: 30.0   memory length: 4096   epsilon: 0.9072026977876215
Exploring Available actions  [[1 4]
 [3 2]
 [0 0]]  Selected action  [1 4]
episode: 32   score: -5.0   memory length: 4096   epsilon: 0.9072026977876215
Exploring Available actions  [[3 4]
 [0 4]
 [0 1]
 [1 4]
 [0 3]
 [2 0]
 [0 0]]  Selected action  [1 4]
episode: 32   score: -5.0   memory length: 4096   epsilon: 0.9072026977876215
Exploring Available actions  [[3 0]
 [1 4]
 [2 1]
 [3 1]
 [0 4]
 [4 3]
 [3 2]
 [0 0]]  Selected action  [2 1]
episode: 32   score: 2.0   memory length: 4096   epsilon: 0.9072026977876215
Exploring Available actions  [[1 4]
 [4 1]
 [4 3]
 [3 0]
 [2 0]
 [2 3]
 [1 0]
 [0 3]
 [4 2]
 [0 1]
 [3 2]
 [2 4]
 [0 4]
 [3 1]
 [0 0]]  Selected action  [3 0]
episode: 32   score: -4.0   memory length: 4096   epsilon: 0.9072026977876215
Exploring Available actions  [[4 0]
 [0 0]]  Selected action  [0 0]
episode: 32   score: -9.0   memory length: 4096   epsilon: 0.9072026977876215
Exploring Availabl

 [0 0]]  Selected action  [4 2]
episode: 32   score: -173.0   memory length: 4096   epsilon: 0.9072026977876215
Exploring Available actions  [[0 1]
 [2 0]
 [4 1]
 [4 3]
 [0 0]]  Selected action  [2 0]
episode: 32   score: -153.0   memory length: 4096   epsilon: 0.9072026977876215
(1, 21) [0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[4 3]
 [3 0]
 [3 2]
 [1 3]
 [2 1]
 [0 0]]  Selected action  [4 3]
episode: 32   score: -171.0   memory length: 4096   epsilon: 0.9072026977876215
Exploring Available actions  [[3 1]
 [1 3]
 [3 2]
 [4 1]
 [0 0]]  Selected action  [4 1]
episode: 32   score: -188.0   memory length: 4096   epsilon: 0.9072026977876215
Exploring Available actions  [[3 0]
 [1 0]
 [4 1]
 [4 2]
 [0 2]
 [1 4]
 [4 0]
 [0 1]
 [2 0]
 [3 2]
 [0 4]
 [1 3]
 [2 1]
 [2 4]
 [0 0]]  Selected action  [1 0]
episode: 32   score: -176.0   memory length: 4096   epsilon: 0.9072026977876215
Exploring Available actions  [[3 4]
 [0 2]
 [3 1]
 [0 0]]  Selected action  [0 0]
episode: 32

episode: 32   score: -87.0   memory length: 4096   epsilon: 0.9072026977876215
Exploring Available actions  [[2 1]
 [2 0]
 [0 2]
 [4 0]
 [4 1]
 [3 1]
 [0 4]
 [1 3]
 [1 0]
 [0 0]]  Selected action  [1 0]
episode: 32   score: -71.0   memory length: 4096   epsilon: 0.9072026977876215
Exploring Available actions  [[4 1]
 [1 3]
 [2 0]
 [2 3]
 [0 0]]  Selected action  [2 3]
episode: 32   score: -77.0   memory length: 4096   epsilon: 0.9072026977876215
Exploring Available actions  [[3 4]
 [4 3]
 [4 2]
 [2 0]
 [1 4]
 [2 3]
 [1 3]
 [4 0]
 [0 0]]  Selected action  [2 3]
episode: 32   score: -90.0   memory length: 4096   epsilon: 0.9072026977876215
(1, 21) [0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[4 3]
 [2 4]
 [1 4]
 [0 3]
 [3 0]
 [0 1]
 [0 0]]  Selected action  [4 3]
episode: 32   score: -112.0   memory length: 4096   epsilon: 0.9072026977876215
Exploring Available actions  [[2 4]
 [3 2]
 [1 0]
 [0 3]
 [4 3]
 [4 0]
 [1 3]
 [4 2]
 [2 3]
 [0 0]]  Selected action  [4 3]
ep

episode: 33   score: -17.0   memory length: 4096   epsilon: 0.9043571086383213
Exploring Available actions  [[4 2]
 [0 3]
 [1 4]
 [1 0]
 [0 0]]  Selected action  [0 3]
episode: 33   score: -17.0   memory length: 4096   epsilon: 0.9043571086383213
Exploring Available actions  [[1 0]
 [2 1]
 [1 4]
 [0 2]
 [1 2]
 [1 3]
 [4 2]
 [3 0]
 [4 3]
 [0 0]]  Selected action  [0 2]
episode: 33   score: 7.0   memory length: 4096   epsilon: 0.9043571086383213
Exploring Available actions  [[1 2]
 [2 3]
 [1 0]
 [1 4]
 [0 0]]  Selected action  [1 2]
episode: 33   score: 13.0   memory length: 4096   epsilon: 0.9043571086383213
Exploring Available actions  [[0 4]
 [4 1]
 [1 0]
 [2 4]
 [3 0]
 [0 0]]  Selected action  [3 0]
episode: 33   score: 2.0   memory length: 4096   epsilon: 0.9043571086383213
Exploring Available actions  [[1 0]
 [2 4]
 [0 0]]  Selected action  [1 0]
episode: 33   score: -1.0   memory length: 4096   epsilon: 0.9043571086383213
Exploring Available actions  [[0 2]
 [0 0]]  Selected actio

episode: 33   score: -65.0   memory length: 4096   epsilon: 0.9043571086383213
Exploring Available actions  [[3 2]
 [2 4]
 [2 1]
 [0 1]
 [4 2]
 [2 0]
 [0 0]]  Selected action  [2 0]
episode: 33   score: -75.0   memory length: 4096   epsilon: 0.9043571086383213
Exploring Available actions  [[1 4]
 [4 1]
 [0 0]]  Selected action  [1 4]
episode: 33   score: -82.0   memory length: 4096   epsilon: 0.9043571086383213
Exploring Available actions  [[1 2]
 [0 2]
 [1 4]
 [4 3]
 [2 1]
 [3 0]
 [2 0]
 [3 1]
 [0 0]]  Selected action  [2 1]
episode: 33   score: -52.0   memory length: 4096   epsilon: 0.9043571086383213
Exploring Available actions  [[2 3]
 [0 1]
 [4 3]
 [1 2]
 [1 0]
 [1 4]
 [3 4]
 [0 4]
 [0 2]
 [2 0]
 [1 3]
 [0 3]
 [0 0]]  Selected action  [2 3]
episode: 33   score: -65.0   memory length: 4096   epsilon: 0.9043571086383213
Exploring Available actions  [[2 0]
 [0 2]
 [3 2]
 [4 3]
 [1 3]
 [4 1]
 [0 0]]  Selected action  [1 3]
episode: 33   score: -59.0   memory length: 4096   epsilon: 0.

episode: 33   score: -70.0   memory length: 4096   epsilon: 0.9043571086383213
Exploring Available actions  [[1 3]
 [3 2]
 [3 1]
 [4 2]
 [4 3]
 [0 0]]  Selected action  [4 3]
episode: 33   score: -64.0   memory length: 4096   epsilon: 0.9043571086383213
Exploring Available actions  [[4 1]
 [2 0]
 [1 0]
 [0 0]]  Selected action  [4 1]
episode: 33   score: -84.0   memory length: 4096   epsilon: 0.9043571086383213
Exploring Available actions  [[4 1]
 [0 2]
 [1 4]
 [3 2]
 [3 4]
 [1 3]
 [4 0]
 [0 3]
 [3 0]
 [4 2]
 [2 0]
 [2 4]
 [1 2]
 [0 0]]  Selected action  [0 2]
episode: 33   score: -84.0   memory length: 4096   epsilon: 0.9043571086383213
Exploring Available actions  [[4 3]
 [3 0]
 [2 3]
 [0 0]]  Selected action  [4 3]
episode: 33   score: -101.0   memory length: 4096   epsilon: 0.9043571086383213
Exploring Available actions  [[1 2]
 [3 2]
 [1 0]
 [0 0]]  Selected action  [3 2]
episode: 33   score: -77.0   memory length: 4096   epsilon: 0.9043571086383213
Exploring Available actions  [[

episode: 34   score: -1.0   memory length: 4096   epsilon: 0.9015204451432617
Exploring Available actions  [[0 3]
 [2 4]
 [3 4]
 [0 1]
 [4 3]
 [4 0]
 [3 2]
 [0 0]]  Selected action  [2 4]
episode: 34   score: -24.0   memory length: 4096   epsilon: 0.9015204451432617
Exploring Available actions  [[4 3]
 [2 1]
 [1 2]
 [0 1]
 [0 4]
 [0 3]
 [3 2]
 [1 3]
 [0 0]]  Selected action  [0 0]
episode: 34   score: -29.0   memory length: 4096   epsilon: 0.9015204451432617
Exploring Available actions  [[3 0]
 [0 3]
 [4 2]
 [4 0]
 [1 2]
 [4 1]
 [4 3]
 [1 0]
 [3 4]
 [2 0]
 [1 3]
 [0 0]]  Selected action  [3 0]
episode: 34   score: -35.0   memory length: 4096   epsilon: 0.9015204451432617
Exploring Available actions  [[3 0]
 [0 3]
 [1 3]
 [4 1]
 [0 0]]  Selected action  [0 0]
episode: 34   score: -40.0   memory length: 4096   epsilon: 0.9015204451432617
Exploring Available actions  [[4 1]
 [3 1]
 [0 0]]  Selected action  [4 1]
episode: 34   score: -27.0   memory length: 4096   epsilon: 0.901520445143261

episode: 34   score: -118.0   memory length: 4096   epsilon: 0.9015204451432617
Exploring Available actions  [[1 4]
 [3 4]
 [4 2]
 [2 3]
 [0 2]
 [3 0]
 [4 3]
 [2 4]
 [1 3]
 [0 0]]  Selected action  [3 0]
episode: 34   score: -96.0   memory length: 4096   epsilon: 0.9015204451432617
Exploring Available actions  [[2 3]
 [3 1]
 [0 0]]  Selected action  [3 1]
episode: 34   score: -132.0   memory length: 4096   epsilon: 0.9015204451432617
Exploring Available actions  [[2 1]
 [2 4]
 [1 2]
 [4 1]
 [3 2]
 [3 4]
 [1 3]
 [1 4]
 [3 1]
 [2 0]
 [2 3]
 [4 3]
 [0 0]]  Selected action  [1 3]
episode: 34   score: -128.0   memory length: 4096   epsilon: 0.9015204451432617
Exploring Available actions  [[4 1]
 [4 3]
 [3 2]
 [2 1]
 [0 4]
 [2 3]
 [0 3]
 [4 0]
 [1 4]
 [0 0]]  Selected action  [4 1]
episode: 34   score: -136.0   memory length: 4096   epsilon: 0.9015204451432617
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[2 4]
 [3 1]
 [4 1]
 [0 2]
 [1 4]


episode: 34   score: 52.0   memory length: 4096   epsilon: 0.9015204451432617
Exploring Available actions  [[0 4]
 [4 1]
 [2 4]
 [4 2]
 [2 0]
 [3 1]
 [0 0]]  Selected action  [2 4]
episode: 34   score: 52.0   memory length: 4096   epsilon: 0.9015204451432617
Exploring Available actions  [[1 4]
 [3 0]
 [0 2]
 [1 3]
 [3 4]
 [0 4]
 [0 0]]  Selected action  [3 4]
episode: 34   score: 46.0   memory length: 4096   epsilon: 0.9015204451432617
Exploring Available actions  [[0 2]
 [2 4]
 [1 0]
 [2 3]
 [2 0]
 [3 2]
 [3 0]
 [0 0]]  Selected action  [0 2]
episode: 34   score: 69.0   memory length: 4096   epsilon: 0.9015204451432617
Exploring Available actions  [[3 4]
 [2 1]
 [3 0]
 [2 4]
 [0 0]]  Selected action  [2 1]
episode: 34   score: 85.0   memory length: 4096   epsilon: 0.9015204451432617
Exploring Available actions  [[2 1]
 [3 4]
 [1 4]
 [4 1]
 [2 4]
 [0 3]
 [3 0]
 [3 1]
 [3 2]
 [2 0]
 [1 3]
 [0 4]
 [4 2]
 [1 2]
 [0 0]]  Selected action  [2 4]
episode: 34   score: 75.0   memory length: 409

episode: 35   score: -82.0   memory length: 4096   epsilon: 0.8986926793056731
Exploring Available actions  [[2 3]
 [3 2]
 [3 0]
 [1 4]
 [0 0]]  Selected action  [3 0]
episode: 35   score: -83.0   memory length: 4096   epsilon: 0.8986926793056731
Exploring Available actions  [[3 4]
 [4 1]
 [0 0]]  Selected action  [3 4]
episode: 35   score: -72.0   memory length: 4096   epsilon: 0.8986926793056731
Exploring Available actions  [[0 4]
 [3 4]
 [3 0]
 [4 1]
 [0 2]
 [0 1]
 [0 0]]  Selected action  [3 0]
episode: 35   score: -66.0   memory length: 4096   epsilon: 0.8986926793056731
Exploring Available actions  [[2 0]
 [1 2]
 [0 0]]  Selected action  [1 2]
episode: 35   score: -83.0   memory length: 4096   epsilon: 0.8986926793056731
Exploring Available actions  [[0 3]
 [3 4]
 [0 4]
 [0 2]
 [2 1]
 [1 3]
 [2 3]
 [4 0]
 [0 0]]  Selected action  [1 3]
episode: 35   score: -85.0   memory length: 4096   epsilon: 0.8986926793056731
Exploring Available actions  [[2 0]
 [1 4]
 [0 4]
 [1 0]
 [3 2]
 [4

episode: 35   score: -65.0   memory length: 4096   epsilon: 0.8986926793056731
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[3 4]
 [3 1]
 [2 1]
 [4 1]
 [0 3]
 [4 2]
 [1 2]
 [2 0]
 [0 1]
 [3 0]
 [1 0]
 [3 2]
 [4 0]
 [1 3]
 [2 3]
 [0 0]]  Selected action  [3 4]
episode: 35   score: -61.0   memory length: 4096   epsilon: 0.8986926793056731
Exploring Available actions  [[1 3]
 [4 2]
 [4 1]
 [0 0]]  Selected action  [1 3]
episode: 35   score: -56.0   memory length: 4096   epsilon: 0.8986926793056731
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[3 4]
 [2 1]
 [0 4]
 [0 2]
 [2 0]
 [4 1]
 [1 0]
 [0 3]
 [0 0]]  Selected action  [3 4]
episode: 35   score: -36.0   memory length: 4096   epsilon: 0.8986926793056731
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[0 3]
 [3 2]
 [4 0]
 [2 4]
 [4 3]
 [0 4]
 [1 2]
 [0 0]]  Selected action  [0 3]
episode: 35   score: -52.0   memory le

episode: 35   score: -136.0   memory length: 4096   epsilon: 0.8986926793056731
Episode : 36
Exploring Available actions  [[4 0]
 [4 2]
 [3 0]
 [0 2]
 [1 2]
 [4 3]
 [2 1]
 [4 1]
 [2 0]
 [0 1]
 [0 0]]  Selected action  [2 1]
episode: 36   score: -33.0   memory length: 4096   epsilon: 0.8958737832166025
Exploring Available actions  [[4 3]
 [4 1]
 [0 4]
 [3 4]
 [4 0]
 [1 0]
 [0 1]
 [4 2]
 [3 0]
 [1 3]
 [0 3]
 [2 0]
 [0 0]]  Selected action  [4 1]
episode: 36   score: -33.0   memory length: 4096   epsilon: 0.8958737832166025
Exploring Available actions  [[4 1]
 [3 0]
 [3 2]
 [4 3]
 [1 3]
 [0 4]
 [4 0]
 [3 4]
 [1 4]
 [0 2]
 [1 2]
 [2 3]
 [4 2]
 [2 0]
 [0 0]]  Selected action  [2 3]
episode: 36   score: -76.0   memory length: 4096   epsilon: 0.8958737832166025
Exploring Available actions  [[3 0]
 [4 2]
 [1 4]
 [0 1]
 [2 0]
 [0 3]
 [3 1]
 [1 3]
 [2 4]
 [0 0]]  Selected action  [3 0]
episode: 36   score: -44.0   memory length: 4096   epsilon: 0.8958737832166025
Exploring Available actions  [[4

episode: 36   score: -148.0   memory length: 4096   epsilon: 0.8958737832166025
Exploring Available actions  [[1 4]
 [2 1]
 [1 3]
 [3 2]
 [2 3]
 [3 1]
 [4 0]
 [1 2]
 [2 0]
 [0 4]
 [0 0]]  Selected action  [1 2]
episode: 36   score: -140.0   memory length: 4096   epsilon: 0.8958737832166025
(1, 21) [0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[4 1]
 [2 1]
 [0 0]]  Selected action  [4 1]
episode: 36   score: -138.0   memory length: 4096   epsilon: 0.8958737832166025
Exploring Available actions  [[3 1]
 [2 0]
 [1 4]
 [0 1]
 [0 3]
 [1 3]
 [4 1]
 [0 2]
 [3 0]
 [2 1]
 [0 0]]  Selected action  [0 0]
episode: 36   score: -143.0   memory length: 4096   epsilon: 0.8958737832166025
Exploring Available actions  [[0 3]
 [0 2]
 [2 1]
 [1 2]
 [0 1]
 [2 3]
 [3 0]
 [4 3]
 [4 2]
 [0 0]]  Selected action  [0 0]
episode: 36   score: -148.0   memory length: 4096   epsilon: 0.8958737832166025
Exploring Available actions  [[4 1]
 [1 0]
 [3 2]
 [2 3]
 [1 4]
 [3 0]
 [1 3]
 [1 2]
 [0 0]]  Selected act

episode: 36   score: -256.0   memory length: 4096   epsilon: 0.8958737832166025
Exploring Available actions  [[0 3]
 [3 4]
 [2 3]
 [0 2]
 [2 1]
 [1 2]
 [3 2]
 [3 0]
 [2 4]
 [0 1]
 [4 1]
 [0 0]]  Selected action  [0 3]
episode: 36   score: -266.0   memory length: 4096   epsilon: 0.8958737832166025
Exploring Available actions  [[2 1]
 [2 3]
 [4 2]
 [2 0]
 [0 2]
 [1 0]
 [4 3]
 [0 4]
 [0 3]
 [0 0]]  Selected action  [0 0]
episode: 36   score: -271.0   memory length: 4096   epsilon: 0.8958737832166025
Exploring Available actions  [[2 1]
 [1 3]
 [0 2]
 [2 4]
 [4 2]
 [4 1]
 [3 2]
 [0 0]]  Selected action  [4 1]
episode: 36   score: -292.0   memory length: 4096   epsilon: 0.8958737832166025
Exploring Available actions  [[0 1]
 [0 3]
 [4 2]
 [0 4]
 [3 0]
 [1 0]
 [0 2]
 [2 3]
 [2 0]
 [3 4]
 [3 1]
 [2 4]
 [4 1]
 [1 4]
 [0 0]]  Selected action  [2 4]
episode: 36   score: -302.0   memory length: 4096   epsilon: 0.8958737832166025
Exploring Available actions  [[3 1]
 [2 3]
 [2 4]
 [1 0]
 [4 0]
 [1 2

episode: 36   score: -163.0   memory length: 4096   epsilon: 0.8958737832166025
Exploring Available actions  [[3 1]
 [4 0]
 [1 4]
 [2 0]
 [0 3]
 [1 2]
 [1 3]
 [0 2]
 [4 3]
 [3 4]
 [2 4]
 [3 2]
 [3 0]
 [0 4]
 [0 1]
 [0 0]]  Selected action  [3 0]
episode: 36   score: -156.0   memory length: 4096   epsilon: 0.8958737832166025
Exploring Available actions  [[3 2]
 [0 0]]  Selected action  [3 2]
episode: 36   score: -150.0   memory length: 4096   epsilon: 0.8958737832166025
Exploring Available actions  [[4 1]
 [4 2]
 [2 1]
 [0 1]
 [3 0]
 [4 3]
 [1 3]
 [0 0]]  Selected action  [4 3]
episode: 36   score: -152.0   memory length: 4096   epsilon: 0.8958737832166025
Exploring Available actions  [[1 0]
 [0 2]
 [4 1]
 [4 3]
 [2 1]
 [0 4]
 [4 0]
 [0 0]]  Selected action  [4 0]
episode: 36   score: -154.0   memory length: 4096   epsilon: 0.8958737832166025
Exploring Available actions  [[2 1]
 [0 3]
 [0 0]]  Selected action  [0 0]
episode: 36   score: -159.0   memory length: 4096   epsilon: 0.89587378

episode: 37   score: 30.0   memory length: 4096   epsilon: 0.8930637290546378
Exploring Available actions  [[0 1]
 [2 0]
 [2 1]
 [4 1]
 [4 0]
 [3 0]
 [2 4]
 [2 3]
 [3 4]
 [0 0]]  Selected action  [4 0]
episode: 37   score: 29.0   memory length: 4096   epsilon: 0.8930637290546378
Exploring Available actions  [[1 0]
 [0 0]]  Selected action  [1 0]
episode: 37   score: 26.0   memory length: 4096   epsilon: 0.8930637290546378
Exploring Available actions  [[4 2]
 [2 4]
 [0 0]]  Selected action  [4 2]
episode: 37   score: 27.0   memory length: 4096   epsilon: 0.8930637290546378
Exploring Available actions  [[2 4]
 [2 0]
 [0 0]]  Selected action  [2 0]
episode: 37   score: 31.0   memory length: 4096   epsilon: 0.8930637290546378
Exploring Available actions  [[4 1]
 [4 3]
 [3 2]
 [2 3]
 [0 0]]  Selected action  [0 0]
episode: 37   score: 26.0   memory length: 4096   epsilon: 0.8930637290546378
Exploring Available actions  [[3 4]
 [2 1]
 [0 0]]  Selected action  [3 4]
episode: 37   score: 32.0 

episode: 37   score: 156.0   memory length: 4096   epsilon: 0.8930637290546378
Exploring Available actions  [[1 4]
 [2 1]
 [0 2]
 [3 2]
 [4 3]
 [4 1]
 [2 0]
 [3 4]
 [0 0]]  Selected action  [1 4]
episode: 37   score: 134.0   memory length: 4096   epsilon: 0.8930637290546378
Exploring Available actions  [[0 2]
 [1 2]
 [4 1]
 [2 1]
 [3 4]
 [0 3]
 [1 0]
 [3 0]
 [3 2]
 [0 0]]  Selected action  [3 2]
episode: 37   score: 133.0   memory length: 4096   epsilon: 0.8930637290546378
Exploring Available actions  [[3 2]
 [3 1]
 [0 3]
 [0 4]
 [2 0]
 [3 0]
 [0 0]]  Selected action  [3 2]
episode: 37   score: 110.0   memory length: 4096   epsilon: 0.8930637290546378
Exploring Available actions  [[4 1]
 [2 3]
 [3 4]
 [2 0]
 [4 3]
 [0 0]]  Selected action  [2 0]
episode: 37   score: 130.0   memory length: 4096   epsilon: 0.8930637290546378
Exploring Available actions  [[4 3]
 [1 0]
 [0 0]]  Selected action  [1 0]
episode: 37   score: 124.0   memory length: 4096   epsilon: 0.8930637290546378
Exploring A

episode: 38   score: 14.0   memory length: 4096   epsilon: 0.8902624890856332
Exploring Available actions  [[1 0]
 [3 1]
 [2 0]
 [1 4]
 [0 3]
 [0 1]
 [2 1]
 [4 2]
 [4 1]
 [4 3]
 [0 0]]  Selected action  [4 3]
episode: 38   score: 32.0   memory length: 4096   epsilon: 0.8902624890856332
Exploring Available actions  [[4 2]
 [3 1]
 [1 3]
 [2 3]
 [0 0]]  Selected action  [3 1]
episode: 38   score: 32.0   memory length: 4096   epsilon: 0.8902624890856332
Exploring Available actions  [[2 4]
 [2 3]
 [3 1]
 [3 2]
 [3 0]
 [2 1]
 [0 4]
 [1 0]
 [0 3]
 [0 0]]  Selected action  [2 3]
episode: 38   score: 54.0   memory length: 4096   epsilon: 0.8902624890856332
Exploring Available actions  [[3 1]
 [0 3]
 [0 2]
 [0 1]
 [3 2]
 [3 0]
 [2 4]
 [4 1]
 [4 2]
 [0 0]]  Selected action  [4 1]
episode: 38   score: 55.0   memory length: 4096   epsilon: 0.8902624890856332
Exploring Available actions  [[1 3]
 [0 1]
 [0 4]
 [2 0]
 [3 2]
 [3 1]
 [1 2]
 [4 2]
 [1 4]
 [1 0]
 [2 3]
 [4 3]
 [0 2]
 [0 0]]  Selected acti

episode: 38   score: -62.0   memory length: 4096   epsilon: 0.8902624890856332
Exploring Available actions  [[0 1]
 [0 0]]  Selected action  [0 0]
episode: 38   score: -67.0   memory length: 4096   epsilon: 0.8902624890856332
Exploring Available actions  [[1 2]
 [2 3]
 [4 0]
 [2 1]
 [0 0]]  Selected action  [2 1]
episode: 38   score: -44.0   memory length: 4096   epsilon: 0.8902624890856332
Exploring Available actions  [[2 3]
 [4 3]
 [1 3]
 [1 4]
 [3 4]
 [1 0]
 [4 0]
 [3 1]
 [3 2]
 [0 0]]  Selected action  [1 0]
episode: 38   score: -24.0   memory length: 4096   epsilon: 0.8902624890856332
Exploring Available actions  [[4 1]
 [0 0]]  Selected action  [4 1]
episode: 38   score: -10.0   memory length: 4096   epsilon: 0.8902624890856332
Exploring Available actions  [[2 4]
 [4 0]
 [0 2]
 [3 1]
 [1 4]
 [3 4]
 [2 0]
 [4 1]
 [4 3]
 [4 2]
 [0 4]
 [3 2]
 [1 3]
 [0 0]]  Selected action  [3 1]
episode: 38   score: -26.0   memory length: 4096   epsilon: 0.8902624890856332
Exploring Available actio

episode: 38   score: -156.0   memory length: 4096   epsilon: 0.8902624890856332
Exploring Available actions  [[4 1]
 [0 2]
 [0 0]]  Selected action  [0 0]
episode: 38   score: -161.0   memory length: 4096   epsilon: 0.8902624890856332
Exploring Available actions  [[3 4]
 [0 1]
 [0 0]]  Selected action  [0 0]
episode: 38   score: -166.0   memory length: 4096   epsilon: 0.8902624890856332
Exploring Available actions  [[4 3]
 [1 0]
 [0 0]]  Selected action  [0 0]
episode: 38   score: -171.0   memory length: 4096   epsilon: 0.8902624890856332
(1, 21) [0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[1 3]
 [4 1]
 [0 4]
 [0 0]]  Selected action  [1 3]
episode: 38   score: -181.0   memory length: 4096   epsilon: 0.8902624890856332
Exploring Available actions  [[3 2]
 [4 0]
 [1 0]
 [2 0]
 [0 4]
 [3 1]
 [2 1]
 [1 2]
 [2 3]
 [0 2]
 [0 0]]  Selected action  [3 1]
episode: 38   score: -181.0   memory length: 4096   epsilon: 0.8902624890856332
Exploring Available actions  [[1 3]
 [4 2]
 [2

episode: 39   score: -72.0   memory length: 4096   epsilon: 0.8874700356624355
(1, 21) [0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[1 0]
 [4 2]
 [0 0]]  Selected action  [1 0]
episode: 39   score: -74.0   memory length: 4096   epsilon: 0.8874700356624355
(1, 21) [0. 0.] Index  0 0.0
Exploiting Available actions  [[4 1]
 [0 0]]  Selected action  [4 1]
episode: 39   score: -98.0   memory length: 4096   epsilon: 0.8874700356624355
Exploring Available actions  [[0 1]
 [3 2]
 [1 4]
 [1 0]
 [3 4]
 [3 0]
 [0 3]
 [2 1]
 [0 4]
 [2 4]
 [3 1]
 [0 2]
 [1 2]
 [2 0]
 [4 1]
 [0 0]]  Selected action  [3 1]
episode: 39   score: -99.0   memory length: 4096   epsilon: 0.8874700356624355
Exploring Available actions  [[1 3]
 [2 0]
 [3 4]
 [0 4]
 [1 0]
 [4 1]
 [3 1]
 [0 0]]  Selected action  [0 0]
episode: 39   score: -104.0   memory length: 4096   epsilon: 0.8874700356624355
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[4 3]
 [4 2]
 [1 4]


episode: 39   score: -242.0   memory length: 4096   epsilon: 0.8874700356624355
Exploring Available actions  [[2 0]
 [4 0]
 [2 1]
 [3 0]
 [1 2]
 [0 4]
 [1 4]
 [1 3]
 [0 0]]  Selected action  [0 0]
episode: 39   score: -247.0   memory length: 4096   epsilon: 0.8874700356624355
Exploring Available actions  [[0 3]
 [2 4]
 [1 3]
 [3 0]
 [1 4]
 [0 2]
 [3 2]
 [2 1]
 [3 4]
 [0 4]
 [0 0]]  Selected action  [3 4]
episode: 39   score: -223.0   memory length: 4096   epsilon: 0.8874700356624355
Exploring Available actions  [[1 4]
 [0 2]
 [4 3]
 [4 1]
 [1 2]
 [2 4]
 [1 3]
 [0 0]]  Selected action  [1 4]
episode: 39   score: -226.0   memory length: 4096   epsilon: 0.8874700356624355
Exploring Available actions  [[1 2]
 [3 1]
 [2 3]
 [0 3]
 [0 1]
 [4 1]
 [3 4]
 [3 0]
 [2 1]
 [0 0]]  Selected action  [0 1]
episode: 39   score: -206.0   memory length: 4096   epsilon: 0.8874700356624355
Exploring Available actions  [[3 1]
 [3 0]
 [2 1]
 [0 2]
 [1 3]
 [4 0]
 [3 4]
 [4 1]
 [2 0]
 [0 1]
 [3 2]
 [0 0]]  Sel

episode: 39   score: -144.0   memory length: 4096   epsilon: 0.8874700356624355
Exploring Available actions  [[2 0]
 [4 0]
 [2 1]
 [4 1]
 [0 0]]  Selected action  [2 1]
episode: 39   score: -120.0   memory length: 4096   epsilon: 0.8874700356624355
Exploring Available actions  [[3 0]
 [0 2]
 [3 1]
 [1 2]
 [4 0]
 [4 2]
 [4 1]
 [0 1]
 [1 4]
 [3 4]
 [1 3]
 [3 2]
 [0 4]
 [2 3]
 [0 0]]  Selected action  [2 3]
episode: 39   score: -131.0   memory length: 4096   epsilon: 0.8874700356624355
Exploring Available actions  [[3 0]
 [2 3]
 [1 4]
 [3 4]
 [0 0]]  Selected action  [2 3]
episode: 39   score: -163.0   memory length: 4096   epsilon: 0.8874700356624355
Exploring Available actions  [[1 4]
 [2 0]
 [0 0]]  Selected action  [1 4]
episode: 39   score: -151.0   memory length: 4096   epsilon: 0.8874700356624355
Exploring Available actions  [[0 4]
 [0 2]
 [1 0]
 [1 4]
 [4 3]
 [0 3]
 [2 4]
 [0 0]]  Selected action  [0 3]
episode: 39   score: -116.0   memory length: 4096   epsilon: 0.887470035662435

 [0 0]]  Selected action  [0 1]
episode: 40   score: -24.0   memory length: 4096   epsilon: 0.8846863412246118
Exploring Available actions  [[3 1]
 [3 4]
 [2 4]
 [1 0]
 [4 0]
 [0 2]
 [3 0]
 [0 4]
 [0 0]]  Selected action  [0 2]
episode: 40   score: -24.0   memory length: 4096   epsilon: 0.8846863412246118
Exploring Available actions  [[2 4]
 [0 0]]  Selected action  [2 4]
episode: 40   score: -4.0   memory length: 4096   epsilon: 0.8846863412246118
Exploring Available actions  [[2 0]
 [1 4]
 [4 0]
 [2 4]
 [3 0]
 [0 2]
 [0 0]]  Selected action  [0 2]
episode: 40   score: 5.0   memory length: 4096   epsilon: 0.8846863412246118
Exploring Available actions  [[2 4]
 [2 3]
 [1 0]
 [0 0]]  Selected action  [2 4]
episode: 40   score: 5.0   memory length: 4096   epsilon: 0.8846863412246118
Exploring Available actions  [[1 0]
 [0 2]
 [1 4]
 [4 3]
 [2 4]
 [3 2]
 [0 4]
 [3 1]
 [0 0]]  Selected action  [4 3]
episode: 40   score: 17.0   memory length: 4096   epsilon: 0.8846863412246118
Exploring Ava

episode: 40   score: -28.0   memory length: 4096   epsilon: 0.8846863412246118
Exploring Available actions  [[3 0]
 [3 2]
 [2 3]
 [4 3]
 [3 1]
 [0 0]]  Selected action  [3 0]
episode: 40   score: -31.0   memory length: 4096   epsilon: 0.8846863412246118
Exploring Available actions  [[2 0]
 [0 0]]  Selected action  [0 0]
episode: 40   score: -36.0   memory length: 4096   epsilon: 0.8846863412246118
Exploring Available actions  [[1 2]
 [2 3]
 [0 0]]  Selected action  [2 3]
episode: 40   score: -19.0   memory length: 4096   epsilon: 0.8846863412246118
Exploring Available actions  [[3 2]
 [1 0]
 [3 4]
 [1 2]
 [2 3]
 [0 0]]  Selected action  [3 2]
episode: 40   score: -11.0   memory length: 4096   epsilon: 0.8846863412246118
Exploring Available actions  [[0 4]
 [3 4]
 [3 0]
 [0 0]]  Selected action  [0 4]
episode: 40   score: -17.0   memory length: 4096   epsilon: 0.8846863412246118
Exploring Available actions  [[4 3]
 [3 4]
 [3 2]
 [1 0]
 [4 1]
 [2 0]
 [2 1]
 [1 3]
 [0 0]]  Selected action

episode: 40   score: 145.0   memory length: 4096   epsilon: 0.8846863412246118
Exploring Available actions  [[1 2]
 [2 3]
 [0 1]
 [4 1]
 [3 4]
 [2 1]
 [4 0]
 [0 3]
 [1 0]
 [3 0]
 [4 3]
 [2 0]
 [0 0]]  Selected action  [4 0]
episode: 40   score: 149.0   memory length: 4096   epsilon: 0.8846863412246118
Exploring Available actions  [[2 1]
 [3 1]
 [1 0]
 [0 0]]  Selected action  [0 0]
episode: 40   score: 144.0   memory length: 4096   epsilon: 0.8846863412246118
Exploring Available actions  [[3 0]
 [2 3]
 [0 0]]  Selected action  [3 0]
episode: 40   score: 136.0   memory length: 4096   epsilon: 0.8846863412246118
Exploring Available actions  [[2 1]
 [3 0]
 [0 0]]  Selected action  [3 0]
episode: 40   score: 134.0   memory length: 4096   epsilon: 0.8846863412246118
(1, 21) [0. 0.] Index  0 0.0
Exploiting Available actions  [[0 1]
 [0 0]]  Selected action  [0 1]
episode: 40   score: 158.0   memory length: 4096   epsilon: 0.8846863412246118
Exploring Available actions  [[3 1]
 [3 2]
 [3 0]
 

episode: 40   score: 81.0   memory length: 4096   epsilon: 0.8846863412246118
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[1 3]
 [2 1]
 [0 4]
 [1 2]
 [2 4]
 [2 3]
 [4 0]
 [0 2]
 [2 0]
 [0 0]]  Selected action  [1 3]
episode: 40   score: 81.0   memory length: 4096   epsilon: 0.8846863412246118
Exploring Available actions  [[2 1]
 [4 3]
 [1 0]
 [1 3]
 [0 3]
 [2 0]
 [3 1]
 [0 4]
 [2 4]
 [0 0]]  Selected action  [0 4]
episode: 40   score: 85.0   memory length: 4096   epsilon: 0.8846863412246118
Exploring Available actions  [[1 2]
 [0 3]
 [0 4]
 [3 0]
 [2 3]
 [0 2]
 [1 4]
 [3 4]
 [0 1]
 [0 0]]  Selected action  [2 3]
episode: 40   score: 117.0   memory length: 4096   epsilon: 0.8846863412246118
Exploring Available actions  [[0 4]
 [3 1]
 [1 2]
 [1 3]
 [4 2]
 [2 3]
 [3 2]
 [3 4]
 [4 1]
 [4 3]
 [0 0]]  Selected action  [0 4]
episode: 40   score: 106.0   memory length: 4096   epsilon: 0.8846863412246118
Exploring Available actions  [[2 4]
 [4 2]
 [1 4]
 

episode: 41   score: 17.0   memory length: 4096   epsilon: 0.8819113782981763
Exploring Available actions  [[1 3]
 [0 0]]  Selected action  [1 3]
episode: 41   score: 11.0   memory length: 4096   epsilon: 0.8819113782981763
Exploring Available actions  [[4 0]
 [3 2]
 [3 0]
 [0 4]
 [3 1]
 [1 2]
 [0 2]
 [0 0]]  Selected action  [3 0]
episode: 41   score: 39.0   memory length: 4096   epsilon: 0.8819113782981763
Exploring Available actions  [[3 2]
 [2 4]
 [1 0]
 [0 0]]  Selected action  [0 0]
episode: 41   score: 34.0   memory length: 4096   epsilon: 0.8819113782981763
Exploring Available actions  [[4 2]
 [0 0]]  Selected action  [0 0]
episode: 41   score: 29.0   memory length: 4096   epsilon: 0.8819113782981763
Exploring Available actions  [[4 0]
 [0 3]
 [4 2]
 [0 0]]  Selected action  [4 0]
episode: 41   score: 29.0   memory length: 4096   epsilon: 0.8819113782981763
Exploring Available actions  [[3 4]
 [0 0]]  Selected action  [3 4]
episode: 41   score: 44.0   memory length: 4096   epsi

episode: 41   score: -21.0   memory length: 4096   epsilon: 0.8819113782981763
Exploring Available actions  [[3 1]
 [4 3]
 [0 0]]  Selected action  [4 3]
episode: 41   score: -21.0   memory length: 4096   epsilon: 0.8819113782981763
Exploring Available actions  [[0 2]
 [3 4]
 [4 1]
 [0 4]
 [2 3]
 [2 1]
 [1 3]
 [1 4]
 [0 0]]  Selected action  [0 2]
episode: 41   score: -10.0   memory length: 4096   epsilon: 0.8819113782981763
Exploring Available actions  [[0 1]
 [2 0]
 [1 0]
 [4 0]
 [4 1]
 [3 2]
 [2 4]
 [3 1]
 [0 4]
 [0 0]]  Selected action  [1 0]
episode: 41   score: -39.0   memory length: 4096   epsilon: 0.8819113782981763
Exploring Available actions  [[2 3]
 [3 4]
 [4 3]
 [0 0]]  Selected action  [3 4]
episode: 41   score: -35.0   memory length: 4096   epsilon: 0.8819113782981763
Exploring Available actions  [[3 0]
 [1 0]
 [3 4]
 [0 2]
 [1 4]
 [4 2]
 [0 4]
 [4 0]
 [1 2]
 [0 0]]  Selected action  [1 4]
episode: 41   score: -37.0   memory length: 4096   epsilon: 0.8819113782981763
Expl

episode: 41   score: -3.0   memory length: 4096   epsilon: 0.8819113782981763
Exploring Available actions  [[0 3]
 [4 0]
 [0 0]]  Selected action  [0 0]
episode: 41   score: -8.0   memory length: 4096   epsilon: 0.8819113782981763
Exploring Available actions  [[4 2]
 [0 2]
 [1 4]
 [0 0]]  Selected action  [0 2]
episode: 41   score: -14.0   memory length: 4096   epsilon: 0.8819113782981763
Exploring Available actions  [[2 3]
 [4 3]
 [4 2]
 [0 0]]  Selected action  [2 3]
episode: 41   score: -2.0   memory length: 4096   epsilon: 0.8819113782981763
Exploring Available actions  [[2 1]
 [4 2]
 [3 2]
 [2 3]
 [1 4]
 [3 4]
 [0 0]]  Selected action  [1 4]
episode: 41   score: -12.0   memory length: 4096   epsilon: 0.8819113782981763
Exploring Available actions  [[4 2]
 [1 0]
 [3 4]
 [1 4]
 [0 4]
 [0 1]
 [3 2]
 [0 0]]  Selected action  [0 4]
episode: 41   score: -16.0   memory length: 4096   epsilon: 0.8819113782981763
Exploring Available actions  [[4 2]
 [4 3]
 [0 2]
 [2 4]
 [0 3]
 [4 0]
 [1 2]

episode: 42   score: -112.0   memory length: 4096   epsilon: 0.8791451194953203
Exploring Available actions  [[4 3]
 [4 1]
 [1 3]
 [4 2]
 [2 4]
 [0 0]]  Selected action  [0 0]
episode: 42   score: -117.0   memory length: 4096   epsilon: 0.8791451194953203
Exploring Available actions  [[0 1]
 [2 3]
 [0 0]]  Selected action  [2 3]
episode: 42   score: -109.0   memory length: 4096   epsilon: 0.8791451194953203
Exploring Available actions  [[3 2]
 [3 1]
 [4 2]
 [2 0]
 [0 0]]  Selected action  [3 2]
episode: 42   score: -101.0   memory length: 4096   epsilon: 0.8791451194953203
Exploring Available actions  [[2 0]
 [3 1]
 [1 2]
 [1 0]
 [4 0]
 [0 2]
 [0 0]]  Selected action  [2 0]
episode: 42   score: -93.0   memory length: 4096   epsilon: 0.8791451194953203
Exploring Available actions  [[4 2]
 [0 0]]  Selected action  [4 2]
episode: 42   score: -108.0   memory length: 4096   epsilon: 0.8791451194953203
Exploring Available actions  [[2 4]
 [1 0]
 [0 0]]  Selected action  [1 0]
episode: 42   s

episode: 42   score: -156.0   memory length: 4096   epsilon: 0.8791451194953203
Exploring Available actions  [[2 0]
 [0 1]
 [1 2]
 [2 1]
 [3 2]
 [4 3]
 [4 2]
 [3 4]
 [3 0]
 [0 0]]  Selected action  [0 1]
episode: 42   score: -143.0   memory length: 4096   epsilon: 0.8791451194953203
Exploring Available actions  [[4 1]
 [2 1]
 [3 2]
 [0 3]
 [2 3]
 [4 3]
 [1 0]
 [3 1]
 [2 4]
 [1 2]
 [3 4]
 [1 3]
 [0 0]]  Selected action  [1 2]
episode: 42   score: -99.0   memory length: 4096   epsilon: 0.8791451194953203
Exploring Available actions  [[2 3]
 [4 0]
 [0 2]
 [4 2]
 [0 0]]  Selected action  [0 2]
episode: 42   score: -125.0   memory length: 4096   epsilon: 0.8791451194953203
Exploring Available actions  [[2 1]
 [0 0]]  Selected action  [2 1]
episode: 42   score: -121.0   memory length: 4096   epsilon: 0.8791451194953203
Exploring Available actions  [[3 1]
 [0 4]
 [4 3]
 [1 2]
 [0 3]
 [3 0]
 [0 2]
 [2 3]
 [4 0]
 [2 1]
 [2 4]
 [3 2]
 [1 3]
 [4 1]
 [2 0]
 [0 0]]  Selected action  [0 4]
episode: 

episode: 42   score: -123.0   memory length: 4096   epsilon: 0.8791451194953203
Exploring Available actions  [[1 0]
 [0 4]
 [4 1]
 [0 1]
 [2 0]
 [4 0]
 [3 0]
 [0 0]]  Selected action  [3 0]
episode: 42   score: -168.0   memory length: 4096   epsilon: 0.8791451194953203
Exploring Available actions  [[1 2]
 [2 4]
 [0 0]]  Selected action  [1 2]
episode: 42   score: -158.0   memory length: 4096   epsilon: 0.8791451194953203
Exploring Available actions  [[2 4]
 [4 0]
 [0 0]]  Selected action  [4 0]
episode: 42   score: -154.0   memory length: 4096   epsilon: 0.8791451194953203
Exploring Available actions  [[4 0]
 [4 1]
 [2 4]
 [4 2]
 [0 0]]  Selected action  [4 0]
episode: 42   score: -155.0   memory length: 4096   epsilon: 0.8791451194953203
Exploring Available actions  [[3 4]
 [0 0]]  Selected action  [3 4]
episode: 42   score: -142.0   memory length: 4096   epsilon: 0.8791451194953203
Exploring Available actions  [[4 0]
 [1 0]
 [3 4]
 [0 4]
 [1 4]
 [0 2]
 [2 3]
 [0 0]]  Selected action 

 [0 0]]  Selected action  [2 3]
episode: 43   score: -37.0   memory length: 4096   epsilon: 0.8763875375141412
Exploring Available actions  [[2 3]
 [1 0]
 [2 0]
 [3 1]
 [2 1]
 [4 2]
 [1 3]
 [0 3]
 [3 4]
 [1 2]
 [0 0]]  Selected action  [1 2]
episode: 43   score: -48.0   memory length: 4096   epsilon: 0.8763875375141412
Exploring Available actions  [[0 3]
 [4 3]
 [0 1]
 [0 4]
 [3 1]
 [0 0]]  Selected action  [3 1]
episode: 43   score: -74.0   memory length: 4096   epsilon: 0.8763875375141412
Exploring Available actions  [[3 4]
 [1 0]
 [0 3]
 [0 1]
 [4 1]
 [3 0]
 [2 4]
 [4 0]
 [0 0]]  Selected action  [0 3]
episode: 43   score: -72.0   memory length: 4096   epsilon: 0.8763875375141412
Exploring Available actions  [[0 1]
 [4 1]
 [3 0]
 [1 2]
 [0 2]
 [3 1]
 [1 0]
 [4 0]
 [0 0]]  Selected action  [1 2]
episode: 43   score: -69.0   memory length: 4096   epsilon: 0.8763875375141412
Exploring Available actions  [[4 3]
 [3 1]
 [0 0]]  Selected action  [4 3]
episode: 43   score: -59.0   memory l

episode: 43   score: -126.0   memory length: 4096   epsilon: 0.8763875375141412
Exploring Available actions  [[1 2]
 [4 1]
 [2 4]
 [2 1]
 [1 3]
 [1 0]
 [3 1]
 [4 2]
 [0 3]
 [0 0]]  Selected action  [1 2]
episode: 43   score: -91.0   memory length: 4096   epsilon: 0.8763875375141412
Exploring Available actions  [[3 4]
 [2 0]
 [4 2]
 [1 4]
 [4 1]
 [0 0]]  Selected action  [3 4]
episode: 43   score: -86.0   memory length: 4096   epsilon: 0.8763875375141412
Exploring Available actions  [[0 2]
 [3 0]
 [1 3]
 [4 3]
 [1 2]
 [4 2]
 [0 0]]  Selected action  [4 3]
episode: 43   score: -66.0   memory length: 4096   epsilon: 0.8763875375141412
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[3 1]
 [4 3]
 [1 3]
 [2 1]
 [4 0]
 [2 0]
 [0 4]
 [0 0]]  Selected action  [3 1]
episode: 43   score: -58.0   memory length: 4096   epsilon: 0.8763875375141412
Exploring Available actions  [[3 2]
 [4 0]
 [0 3]
 [4 1]
 [2 1]
 [1 4]
 [2 3]
 [0 4]
 [0 2]
 [1 3]
 [0 0]]  Selected action

episode: 43   score: -167.0   memory length: 4096   epsilon: 0.8763875375141412
Exploring Available actions  [[1 3]
 [4 1]
 [0 0]]  Selected action  [0 0]
episode: 43   score: -172.0   memory length: 4096   epsilon: 0.8763875375141412
Exploring Available actions  [[1 4]
 [4 3]
 [0 0]]  Selected action  [0 0]
episode: 43   score: -177.0   memory length: 4096   epsilon: 0.8763875375141412
Exploring Available actions  [[2 3]
 [1 4]
 [1 2]
 [0 0]]  Selected action  [0 0]
episode: 43   score: -182.0   memory length: 4096   epsilon: 0.8763875375141412
Exploring Available actions  [[2 0]
 [4 2]
 [0 2]
 [0 0]]  Selected action  [2 0]
episode: 43   score: -183.0   memory length: 4096   epsilon: 0.8763875375141412
Exploring Available actions  [[4 3]
 [1 0]
 [0 0]]  Selected action  [4 3]
episode: 43   score: -164.0   memory length: 4096   epsilon: 0.8763875375141412
Exploring Available actions  [[3 0]
 [1 0]
 [0 3]
 [2 4]
 [4 1]
 [3 1]
 [2 3]
 [0 1]
 [1 2]
 [0 0]]  Selected action  [1 2]
episode

episode: 44   score: 131.0   memory length: 4096   epsilon: 0.8736386051383735
Exploring Available actions  [[2 0]
 [0 2]
 [2 4]
 [3 4]
 [0 4]
 [4 1]
 [3 0]
 [3 1]
 [0 3]
 [0 0]]  Selected action  [0 3]
episode: 44   score: 126.0   memory length: 4096   epsilon: 0.8736386051383735
Exploring Available actions  [[4 3]
 [0 2]
 [2 1]
 [4 0]
 [0 3]
 [3 0]
 [0 0]]  Selected action  [4 3]
episode: 44   score: 123.0   memory length: 4096   epsilon: 0.8736386051383735
Exploring Available actions  [[1 2]
 [4 3]
 [4 1]
 [3 0]
 [2 0]
 [4 0]
 [3 4]
 [1 4]
 [0 4]
 [0 0]]  Selected action  [3 4]
episode: 44   score: 135.0   memory length: 4096   epsilon: 0.8736386051383735
Exploring Available actions  [[3 4]
 [4 0]
 [0 3]
 [3 2]
 [1 3]
 [3 0]
 [4 2]
 [0 0]]  Selected action  [3 2]
episode: 44   score: 144.0   memory length: 4096   epsilon: 0.8736386051383735
Exploring Available actions  [[3 2]
 [4 0]
 [0 0]]  Selected action  [4 0]
episode: 44   score: 126.0   memory length: 4096   epsilon: 0.8736386

episode: 44   score: 159.0   memory length: 4096   epsilon: 0.8736386051383735
Exploring Available actions  [[3 1]
 [2 4]
 [2 1]
 [0 4]
 [3 0]
 [0 0]]  Selected action  [0 4]
episode: 44   score: 159.0   memory length: 4096   epsilon: 0.8736386051383735
Exploring Available actions  [[3 4]
 [2 1]
 [1 3]
 [2 0]
 [1 4]
 [2 4]
 [0 3]
 [3 0]
 [0 4]
 [0 1]
 [1 2]
 [0 2]
 [2 3]
 [0 0]]  Selected action  [2 3]
episode: 44   score: 178.0   memory length: 4096   epsilon: 0.8736386051383735
Exploring Available actions  [[1 0]
 [1 2]
 [2 0]
 [0 0]]  Selected action  [2 0]
episode: 44   score: 198.0   memory length: 4096   epsilon: 0.8736386051383735
Exploring Available actions  [[4 3]
 [2 3]
 [0 0]]  Selected action  [2 3]
episode: 44   score: 195.0   memory length: 4096   epsilon: 0.8736386051383735
Exploring Available actions  [[2 4]
 [4 0]
 [2 1]
 [1 3]
 [3 2]
 [1 2]
 [3 0]
 [0 2]
 [0 0]]  Selected action  [3 0]
episode: 44   score: 199.0   memory length: 4096   epsilon: 0.8736386051383735
Expl

episode: 44   score: 90.0   memory length: 4096   epsilon: 0.8736386051383735
Exploring Available actions  [[4 0]
 [1 4]
 [0 4]
 [4 2]
 [4 1]
 [3 4]
 [1 0]
 [2 1]
 [1 3]
 [0 0]]  Selected action  [1 0]
episode: 44   score: 92.0   memory length: 4096   epsilon: 0.8736386051383735
Exploring Available actions  [[3 1]
 [3 4]
 [3 0]
 [0 0]]  Selected action  [3 1]
episode: 44   score: 56.0   memory length: 4096   epsilon: 0.8736386051383735
Exploring Available actions  [[4 0]
 [0 1]
 [2 4]
 [3 0]
 [2 3]
 [4 3]
 [3 2]
 [2 0]
 [1 3]
 [1 2]
 [3 1]
 [1 0]
 [2 1]
 [3 4]
 [0 2]
 [0 0]]  Selected action  [0 0]
episode: 44   score: 51.0   memory length: 4096   epsilon: 0.8736386051383735
Exploring Available actions  [[3 1]
 [0 4]
 [4 0]
 [0 2]
 [1 4]
 [3 0]
 [1 0]
 [3 4]
 [4 1]
 [4 3]
 [3 2]
 [2 3]
 [4 2]
 [2 4]
 [1 3]
 [0 0]]  Selected action  [4 1]
episode: 44   score: 51.0   memory length: 4096   epsilon: 0.8736386051383735
Exploring Available actions  [[0 3]
 [1 2]
 [1 4]
 [4 2]
 [2 0]
 [0 4]
 

(1, 21) [0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[3 2]
 [3 4]
 [1 4]
 [0 4]
 [3 0]
 [0 0]]  Selected action  [3 2]
episode: 45   score: 44.0   memory length: 4096   epsilon: 0.8708982952371197
Exploring Available actions  [[2 4]
 [3 1]
 [4 3]
 [0 4]
 [3 2]
 [3 0]
 [0 0]]  Selected action  [2 4]
episode: 45   score: 48.0   memory length: 4096   epsilon: 0.8708982952371197
Exploring Available actions  [[1 0]
 [4 3]
 [3 1]
 [2 3]
 [2 0]
 [0 0]]  Selected action  [2 3]
episode: 45   score: 67.0   memory length: 4096   epsilon: 0.8708982952371197
Exploring Available actions  [[3 2]
 [1 4]
 [0 3]
 [0 4]
 [4 3]
 [1 2]
 [3 1]
 [4 1]
 [3 4]
 [2 3]
 [1 0]
 [0 0]]  Selected action  [1 4]
episode: 45   score: 62.0   memory length: 4096   epsilon: 0.8708982952371197
Exploring Available actions  [[4 3]
 [4 1]
 [3 0]
 [1 2]
 [1 0]
 [0 4]
 [0 0]]  Selected action  [1 0]
episode: 45   score: 70.0   memory length: 4096   epsilon: 0.8708982952371197
Exploring Available actions  [[4

episode: 45   score: -154.0   memory length: 4096   epsilon: 0.8708982952371197
Exploring Available actions  [[0 3]
 [0 1]
 [1 2]
 [4 2]
 [4 3]
 [3 1]
 [2 1]
 [0 0]]  Selected action  [0 3]
episode: 45   score: -172.0   memory length: 4096   epsilon: 0.8708982952371197
Exploring Available actions  [[3 4]
 [4 0]
 [1 3]
 [4 1]
 [0 2]
 [0 1]
 [4 2]
 [3 2]
 [3 0]
 [2 1]
 [0 0]]  Selected action  [3 0]
episode: 45   score: -172.0   memory length: 4096   epsilon: 0.8708982952371197
Exploring Available actions  [[0 1]
 [1 4]
 [0 0]]  Selected action  [0 0]
episode: 45   score: -177.0   memory length: 4096   epsilon: 0.8708982952371197
Exploring Available actions  [[0 4]
 [1 3]
 [0 0]]  Selected action  [1 3]
episode: 45   score: -187.0   memory length: 4096   epsilon: 0.8708982952371197
Exploring Available actions  [[4 1]
 [3 2]
 [0 2]
 [1 0]
 [3 1]
 [0 1]
 [1 4]
 [0 3]
 [0 0]]  Selected action  [0 2]
episode: 45   score: -175.0   memory length: 4096   epsilon: 0.8708982952371197
Exploring Av

episode: 45   score: -252.0   memory length: 4096   epsilon: 0.8708982952371197
Episode : 46
Exploring Available actions  [[4 2]
 [2 0]
 [3 4]
 [2 3]
 [0 3]
 [1 0]
 [0 1]
 [0 2]
 [0 4]
 [0 0]]  Selected action  [2 0]
episode: 46   score: -8.0   memory length: 4096   epsilon: 0.868166580764583
Exploring Available actions  [[2 3]
 [1 3]
 [0 0]]  Selected action  [0 0]
episode: 46   score: -13.0   memory length: 4096   epsilon: 0.868166580764583
Exploring Available actions  [[4 3]
 [4 0]
 [0 0]]  Selected action  [4 0]
episode: 46   score: -14.0   memory length: 4096   epsilon: 0.868166580764583
Exploring Available actions  [[0 3]
 [0 0]]  Selected action  [0 0]
episode: 46   score: -19.0   memory length: 4096   epsilon: 0.868166580764583
Exploring Available actions  [[4 2]
 [4 3]
 [3 2]
 [2 1]
 [0 0]]  Selected action  [0 0]
episode: 46   score: -24.0   memory length: 4096   epsilon: 0.868166580764583
Exploring Available actions  [[2 0]
 [0 0]]  Selected action  [0 0]
episode: 46   score

episode: 46   score: 98.0   memory length: 4096   epsilon: 0.868166580764583
Exploring Available actions  [[0 4]
 [0 2]
 [1 2]
 [3 0]
 [3 4]
 [1 0]
 [4 3]
 [4 1]
 [3 2]
 [2 1]
 [4 0]
 [3 1]
 [0 1]
 [0 0]]  Selected action  [0 1]
episode: 46   score: 63.0   memory length: 4096   epsilon: 0.868166580764583
Exploring Available actions  [[2 3]
 [1 2]
 [1 0]
 [4 0]
 [4 2]
 [0 1]
 [0 3]
 [3 0]
 [4 3]
 [0 4]
 [0 2]
 [2 1]
 [2 0]
 [0 0]]  Selected action  [2 1]
episode: 46   score: 62.0   memory length: 4096   epsilon: 0.868166580764583
Exploring Available actions  [[1 2]
 [2 3]
 [1 0]
 [0 4]
 [4 2]
 [4 3]
 [4 1]
 [3 0]
 [3 1]
 [0 2]
 [2 0]
 [2 4]
 [1 4]
 [3 4]
 [4 0]
 [0 0]]  Selected action  [1 0]
episode: 46   score: 78.0   memory length: 4096   epsilon: 0.868166580764583
Exploring Available actions  [[2 4]
 [0 0]]  Selected action  [2 4]
episode: 46   score: 59.0   memory length: 4096   epsilon: 0.868166580764583
Exploring Available actions  [[4 3]
 [4 2]
 [3 1]
 [0 4]
 [4 0]
 [1 4]
 [2 0]

episode: 46   score: 65.0   memory length: 4096   epsilon: 0.868166580764583
Exploring Available actions  [[0 2]
 [3 4]
 [1 0]
 [4 3]
 [4 0]
 [0 0]]  Selected action  [1 0]
episode: 46   score: 81.0   memory length: 4096   epsilon: 0.868166580764583
Exploring Available actions  [[3 2]
 [1 3]
 [2 0]
 [4 0]
 [2 3]
 [0 0]]  Selected action  [2 3]
episode: 46   score: 56.0   memory length: 4096   epsilon: 0.868166580764583
Exploring Available actions  [[0 3]
 [2 1]
 [0 1]
 [0 4]
 [3 0]
 [2 4]
 [4 3]
 [0 0]]  Selected action  [3 0]
episode: 46   score: 68.0   memory length: 4096   epsilon: 0.868166580764583
(1, 21) [0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[0 4]
 [2 1]
 [1 0]
 [0 0]]  Selected action  [0 4]
episode: 46   score: 72.0   memory length: 4096   epsilon: 0.868166580764583
Exploring Available actions  [[1 4]
 [1 0]
 [1 3]
 [2 1]
 [4 1]
 [4 2]
 [3 4]
 [3 1]
 [3 0]
 [2 3]
 [0 0]]  Selected action  [4 1]
episode: 46   score: 84.0   memory length: 4096   epsilon: 0.868

episode: 47   score: -36.0   memory length: 4096   epsilon: 0.8654434347597999
Exploring Available actions  [[4 0]
 [1 2]
 [0 0]]  Selected action  [1 2]
episode: 47   score: -59.0   memory length: 4096   epsilon: 0.8654434347597999
Exploring Available actions  [[4 0]
 [0 2]
 [0 4]
 [2 4]
 [4 3]
 [0 3]
 [0 0]]  Selected action  [4 0]
episode: 47   score: -53.0   memory length: 4096   epsilon: 0.8654434347597999
Exploring Available actions  [[0 1]
 [0 3]
 [3 2]
 [0 0]]  Selected action  [3 2]
episode: 47   score: -81.0   memory length: 4096   epsilon: 0.8654434347597999
Exploring Available actions  [[3 2]
 [2 3]
 [0 1]
 [4 2]
 [0 0]]  Selected action  [4 2]
episode: 47   score: -83.0   memory length: 4096   epsilon: 0.8654434347597999
Exploring Available actions  [[1 3]
 [2 1]
 [1 2]
 [2 3]
 [0 1]
 [1 0]
 [4 0]
 [0 0]]  Selected action  [0 0]
episode: 47   score: -88.0   memory length: 4096   epsilon: 0.8654434347597999
Exploring Available actions  [[3 2]
 [2 4]
 [3 0]
 [3 1]
 [0 0]]  S

episode: 47   score: -128.0   memory length: 4096   epsilon: 0.8654434347597999
Exploring Available actions  [[4 2]
 [4 0]
 [3 1]
 [2 3]
 [2 4]
 [2 1]
 [4 3]
 [0 4]
 [4 1]
 [0 0]]  Selected action  [0 0]
episode: 47   score: -133.0   memory length: 4096   epsilon: 0.8654434347597999
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[4 0]
 [0 3]
 [1 3]
 [2 4]
 [2 0]
 [0 2]
 [2 3]
 [0 0]]  Selected action  [4 0]
episode: 47   score: -159.0   memory length: 4096   epsilon: 0.8654434347597999
Exploring Available actions  [[2 3]
 [0 3]
 [0 0]]  Selected action  [0 0]
episode: 47   score: -164.0   memory length: 4096   epsilon: 0.8654434347597999
Exploring Available actions  [[0 3]
 [1 4]
 [0 0]]  Selected action  [0 3]
episode: 47   score: -140.0   memory length: 4096   epsilon: 0.8654434347597999
Exploring Available actions  [[4 3]
 [2 3]
 [3 4]
 [3 1]
 [4 2]
 [0 0]]  Selected action  [4 2]
episode: 47   score: -167.0   memory length: 4096   epsilon: 0.865443434

episode: 47   score: -293.0   memory length: 4096   epsilon: 0.8654434347597999
Exploring Available actions  [[2 1]
 [2 0]
 [3 0]
 [3 2]
 [2 4]
 [0 4]
 [0 0]]  Selected action  [2 4]
episode: 47   score: -300.0   memory length: 4096   epsilon: 0.8654434347597999
Exploring Available actions  [[3 4]
 [0 3]
 [1 3]
 [4 2]
 [3 1]
 [3 0]
 [0 0]]  Selected action  [1 3]
episode: 47   score: -314.0   memory length: 4096   epsilon: 0.8654434347597999
(1, 21) [0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[1 0]
 [2 0]
 [3 4]
 [2 1]
 [2 3]
 [0 0]]  Selected action  [1 0]
episode: 47   score: -326.0   memory length: 4096   epsilon: 0.8654434347597999
Exploring Available actions  [[0 3]
 [1 4]
 [1 2]
 [0 0]]  Selected action  [0 0]
episode: 47   score: -331.0   memory length: 4096   epsilon: 0.8654434347597999
Exploring Available actions  [[3 4]
 [3 1]
 [0 1]
 [0 0]]  Selected action  [0 1]
episode: 47   score: -323.0   memory length: 4096   epsilon: 0.8654434347597999
Exploring Av

episode: 48   score: -90.0   memory length: 4096   epsilon: 0.8627288303463745
Exploring Available actions  [[3 2]
 [1 2]
 [4 0]
 [0 3]
 [3 0]
 [0 4]
 [2 3]
 [1 4]
 [0 0]]  Selected action  [1 4]
episode: 48   score: -92.0   memory length: 4096   epsilon: 0.8627288303463745
(1, 21) [0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[0 2]
 [2 0]
 [1 4]
 [0 1]
 [0 0]]  Selected action  [0 2]
episode: 48   score: -77.0   memory length: 4096   epsilon: 0.8627288303463745
Exploring Available actions  [[0 1]
 [2 3]
 [1 4]
 [4 3]
 [2 4]
 [0 0]]  Selected action  [2 3]
episode: 48   score: -41.0   memory length: 4096   epsilon: 0.8627288303463745
Exploring Available actions  [[1 4]
 [1 0]
 [2 4]
 [3 1]
 [1 3]
 [0 0]]  Selected action  [1 4]
episode: 48   score: -33.0   memory length: 4096   epsilon: 0.8627288303463745
Exploring Available actions  [[2 4]
 [2 0]
 [4 2]
 [3 4]
 [2 1]
 [1 4]
 [0 2]
 [4 0]
 [3 1]
 [1 2]
 [0 4]
 [0 0]]  Selected action  [4 2]
episode: 48   score: -33.0   m

episode: 48   score: -47.0   memory length: 4096   epsilon: 0.8627288303463745
(1, 21) [0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[0 1]
 [3 2]
 [2 0]
 [1 0]
 [4 2]
 [0 0]]  Selected action  [0 1]
episode: 48   score: -24.0   memory length: 4096   epsilon: 0.8627288303463745
Exploring Available actions  [[1 0]
 [4 0]
 [2 0]
 [0 4]
 [1 2]
 [1 4]
 [0 3]
 [2 4]
 [1 3]
 [3 2]
 [4 2]
 [4 3]
 [0 0]]  Selected action  [0 3]
episode: 48   score: -52.0   memory length: 4096   epsilon: 0.8627288303463745
Exploring Available actions  [[2 3]
 [3 4]
 [0 4]
 [4 0]
 [3 0]
 [1 3]
 [0 0]]  Selected action  [0 4]
episode: 48   score: -45.0   memory length: 4096   epsilon: 0.8627288303463745
Exploring Available actions  [[0 3]
 [1 4]
 [2 1]
 [4 2]
 [3 1]
 [0 0]]  Selected action  [3 1]
episode: 48   score: -39.0   memory length: 4096   epsilon: 0.8627288303463745
Exploring Available actions  [[3 4]
 [0 4]
 [2 4]
 [1 4]
 [1 2]
 [0 2]
 [4 3]
 [3 1]
 [3 2]
 [4 2]
 [0 1]
 [2 1]
 [4 1]
 [0

episode: 48   score: -104.0   memory length: 4096   epsilon: 0.8627288303463745
Exploring Available actions  [[0 3]
 [4 0]
 [4 2]
 [1 4]
 [3 1]
 [1 0]
 [2 1]
 [0 0]]  Selected action  [3 1]
episode: 48   score: -94.0   memory length: 4096   epsilon: 0.8627288303463745
Exploring Available actions  [[4 1]
 [4 2]
 [4 3]
 [4 0]
 [3 0]
 [3 2]
 [0 3]
 [3 4]
 [0 2]
 [1 2]
 [2 0]
 [2 4]
 [1 0]
 [0 0]]  Selected action  [2 0]
episode: 48   score: -95.0   memory length: 4096   epsilon: 0.8627288303463745
Exploring Available actions  [[3 0]
 [1 4]
 [2 0]
 [0 0]]  Selected action  [1 4]
episode: 48   score: -113.0   memory length: 4096   epsilon: 0.8627288303463745
Exploring Available actions  [[2 1]
 [0 1]
 [2 4]
 [1 3]
 [0 4]
 [2 0]
 [4 3]
 [1 0]
 [4 0]
 [3 4]
 [4 1]
 [4 2]
 [0 0]]  Selected action  [0 0]
episode: 48   score: -118.0   memory length: 4096   epsilon: 0.8627288303463745
Exploring Available actions  [[4 0]
 [3 1]
 [3 2]
 [0 1]
 [4 1]
 [0 3]
 [0 4]
 [0 0]]  Selected action  [4 1]
epi

episode: 49   score: -253.0   memory length: 4096   epsilon: 0.8600227407322133
Exploring Available actions  [[3 2]
 [0 2]
 [4 0]
 [0 0]]  Selected action  [0 0]
episode: 49   score: -258.0   memory length: 4096   epsilon: 0.8600227407322133
Exploring Available actions  [[3 1]
 [0 2]
 [4 1]
 [2 4]
 [3 4]
 [2 1]
 [2 3]
 [4 0]
 [3 2]
 [0 0]]  Selected action  [3 4]
episode: 49   score: -264.0   memory length: 4096   epsilon: 0.8600227407322133
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[2 4]
 [1 4]
 [2 1]
 [3 4]
 [0 4]
 [2 3]
 [4 3]
 [0 0]]  Selected action  [2 4]
episode: 49   score: -273.0   memory length: 4096   epsilon: 0.8600227407322133
Exploring Available actions  [[1 2]
 [0 1]
 [2 4]
 [4 0]
 [0 3]
 [4 1]
 [0 0]]  Selected action  [4 0]
episode: 49   score: -261.0   memory length: 4096   epsilon: 0.8600227407322133
Exploring Available actions  [[4 3]
 [3 0]
 [0 0]]  Selected action  [3 0]
episode: 49   score: -261.0   memory length: 4096   epsilo

episode: 49   score: -372.0   memory length: 4096   epsilon: 0.8600227407322133
Exploring Available actions  [[4 2]
 [4 0]
 [2 1]
 [0 0]]  Selected action  [4 0]
episode: 49   score: -373.0   memory length: 4096   epsilon: 0.8600227407322133
Exploring Available actions  [[2 1]
 [4 0]
 [0 0]]  Selected action  [4 0]
episode: 49   score: -366.0   memory length: 4096   epsilon: 0.8600227407322133
Exploring Available actions  [[2 0]
 [2 3]
 [1 3]
 [0 0]]  Selected action  [2 0]
episode: 49   score: -392.0   memory length: 4096   epsilon: 0.8600227407322133
Exploring Available actions  [[0 3]
 [0 0]]  Selected action  [0 0]
episode: 49   score: -397.0   memory length: 4096   epsilon: 0.8600227407322133
Exploring Available actions  [[3 0]
 [3 4]
 [0 1]
 [4 0]
 [2 4]
 [0 0]]  Selected action  [0 0]
episode: 49   score: -402.0   memory length: 4096   epsilon: 0.8600227407322133
Exploring Available actions  [[1 2]
 [0 3]
 [0 0]]  Selected action  [1 2]
episode: 49   score: -404.0   memory lengt

episode: 50   score: -17.0   memory length: 4096   epsilon: 0.8573251392092601
Exploring Available actions  [[0 3]
 [4 2]
 [2 1]
 [0 4]
 [4 0]
 [0 0]]  Selected action  [0 3]
episode: 50   score: -35.0   memory length: 4096   epsilon: 0.8573251392092601
(1, 21) [0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[3 2]
 [2 0]
 [0 2]
 [2 3]
 [0 4]
 [1 4]
 [0 0]]  Selected action  [3 2]
episode: 50   score: -3.0   memory length: 4096   epsilon: 0.8573251392092601
Exploring Available actions  [[4 2]
 [0 0]]  Selected action  [0 0]
episode: 50   score: -8.0   memory length: 4096   epsilon: 0.8573251392092601
Exploring Available actions  [[0 1]
 [4 3]
 [3 4]
 [0 2]
 [2 3]
 [3 0]
 [2 4]
 [0 0]]  Selected action  [3 4]
episode: 50   score: -6.0   memory length: 4096   epsilon: 0.8573251392092601
Exploring Available actions  [[4 1]
 [2 0]
 [3 2]
 [1 4]
 [4 3]
 [2 1]
 [3 0]
 [0 3]
 [4 2]
 [1 0]
 [4 0]
 [0 0]]  Selected action  [3 0]
episode: 50   score: 24.0   memory length: 4096 

episode: 50   score: 74.0   memory length: 4096   epsilon: 0.8573251392092601
Exploring Available actions  [[4 3]
 [0 0]]  Selected action  [4 3]
episode: 50   score: 76.0   memory length: 4096   epsilon: 0.8573251392092601
Exploring Available actions  [[2 4]
 [3 0]
 [1 3]
 [2 0]
 [1 4]
 [0 2]
 [4 3]
 [4 1]
 [2 1]
 [3 4]
 [0 0]]  Selected action  [4 1]
episode: 50   score: 46.0   memory length: 4096   epsilon: 0.8573251392092601
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[4 1]
 [0 2]
 [4 2]
 [1 0]
 [4 3]
 [0 4]
 [4 0]
 [2 3]
 [3 1]
 [0 0]]  Selected action  [4 1]
episode: 50   score: 46.0   memory length: 4096   epsilon: 0.8573251392092601
Exploring Available actions  [[4 2]
 [4 1]
 [2 3]
 [3 2]
 [2 4]
 [0 4]
 [4 0]
 [0 1]
 [4 3]
 [1 4]
 [3 1]
 [0 3]
 [1 0]
 [2 0]
 [3 4]
 [0 0]]  Selected action  [4 2]
episode: 50   score: 46.0   memory length: 4096   epsilon: 0.8573251392092601
Exploring Available actions  [[4 1]
 [1 0]
 [1 4]
 [0 0]]  Selected

episode: 50   score: -133.0   memory length: 4096   epsilon: 0.8573251392092601
Exploring Available actions  [[4 1]
 [2 1]
 [0 4]
 [3 4]
 [3 0]
 [4 0]
 [0 0]]  Selected action  [3 4]
episode: 50   score: -138.0   memory length: 4096   epsilon: 0.8573251392092601
Exploring Available actions  [[1 4]
 [0 1]
 [4 2]
 [3 2]
 [2 3]
 [0 4]
 [1 0]
 [1 3]
 [0 0]]  Selected action  [1 4]
episode: 50   score: -148.0   memory length: 4096   epsilon: 0.8573251392092601
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[1 2]
 [2 0]
 [4 2]
 [1 0]
 [2 3]
 [0 2]
 [4 0]
 [2 4]
 [0 1]
 [3 4]
 [0 0]]  Selected action  [1 2]
episode: 50   score: -136.0   memory length: 4096   epsilon: 0.8573251392092601
Exploring Available actions  [[1 3]
 [3 2]
 [4 3]
 [3 4]
 [3 0]
 [1 0]
 [0 3]
 [2 1]
 [1 4]
 [3 1]
 [4 0]
 [0 2]
 [0 4]
 [0 0]]  Selected action  [0 2]
episode: 50   score: -143.0   memory length: 4096   epsilon: 0.8573251392092601
Exploring Available actions  [[4 1]
 [0 

episode: 51   score: 5.0   memory length: 4096   epsilon: 0.8546359991532334
Exploring Available actions  [[1 2]
 [0 0]]  Selected action  [0 0]
episode: 51   score: 0.0   memory length: 4096   epsilon: 0.8546359991532334
(1, 21) [0. 0.] Index  0 0.0
Exploiting Available actions  [[0 4]
 [0 0]]  Selected action  [0 4]
episode: 51   score: 4.0   memory length: 4096   epsilon: 0.8546359991532334
Exploring Available actions  [[4 1]
 [0 4]
 [3 0]
 [2 4]
 [1 2]
 [1 3]
 [3 2]
 [4 3]
 [0 1]
 [2 1]
 [0 0]]  Selected action  [3 2]
episode: 51   score: -8.0   memory length: 4096   epsilon: 0.8546359991532334
Exploring Available actions  [[1 2]
 [1 3]
 [2 1]
 [0 3]
 [0 0]]  Selected action  [2 1]
episode: 51   score: 0.0   memory length: 4096   epsilon: 0.8546359991532334
Exploring Available actions  [[2 3]
 [0 2]
 [4 1]
 [3 2]
 [3 4]
 [3 1]
 [4 3]
 [0 1]
 [4 0]
 [2 4]
 [3 0]
 [1 4]
 [0 4]
 [0 0]]  Selected action  [3 4]
episode: 51   score: 14.0   memory length: 4096   epsilon: 0.854635999153233

episode: 51   score: -52.0   memory length: 4096   epsilon: 0.8546359991532334
Exploring Available actions  [[1 3]
 [3 4]
 [0 3]
 [2 4]
 [0 0]]  Selected action  [1 3]
episode: 51   score: -36.0   memory length: 4096   epsilon: 0.8546359991532334
Exploring Available actions  [[1 0]
 [0 2]
 [2 4]
 [4 3]
 [3 4]
 [0 4]
 [0 0]]  Selected action  [2 4]
episode: 51   score: -36.0   memory length: 4096   epsilon: 0.8546359991532334
Exploring Available actions  [[1 2]
 [1 3]
 [4 2]
 [3 1]
 [3 4]
 [0 2]
 [2 4]
 [4 1]
 [2 1]
 [0 0]]  Selected action  [0 2]
episode: 51   score: -54.0   memory length: 4096   epsilon: 0.8546359991532334
Exploring Available actions  [[0 2]
 [0 3]
 [3 1]
 [2 4]
 [4 1]
 [1 2]
 [4 3]
 [1 3]
 [0 0]]  Selected action  [3 1]
episode: 51   score: -72.0   memory length: 4096   epsilon: 0.8546359991532334
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[0 2]
 [3 4]
 [4 2]
 [4 3]
 [1 3]
 [2 4]
 [0 1]
 [3 1]
 [4 1]
 [4 0]
 [1 2]
 [0 0]

episode: 51   score: -223.0   memory length: 4096   epsilon: 0.8546359991532334
Exploring Available actions  [[3 1]
 [0 0]]  Selected action  [3 1]
episode: 51   score: -229.0   memory length: 4096   epsilon: 0.8546359991532334
Exploring Available actions  [[0 1]
 [0 2]
 [0 3]
 [2 1]
 [1 3]
 [3 4]
 [0 4]
 [2 4]
 [2 0]
 [4 0]
 [1 0]
 [3 1]
 [4 3]
 [0 0]]  Selected action  [2 0]
episode: 51   score: -226.0   memory length: 4096   epsilon: 0.8546359991532334
Exploring Available actions  [[4 3]
 [0 3]
 [0 2]
 [4 2]
 [3 4]
 [3 2]
 [0 0]]  Selected action  [0 0]
episode: 51   score: -231.0   memory length: 4096   epsilon: 0.8546359991532334
Exploring Available actions  [[3 0]
 [0 0]]  Selected action  [0 0]
episode: 51   score: -236.0   memory length: 4096   epsilon: 0.8546359991532334
Exploring Available actions  [[0 1]
 [0 0]]  Selected action  [0 1]
episode: 51   score: -220.0   memory length: 4096   epsilon: 0.8546359991532334
Exploring Available actions  [[3 0]
 [2 1]
 [0 3]
 [2 0]
 [3 

episode: 52   score: 101.0   memory length: 4096   epsilon: 0.8519552940233627
Exploring Available actions  [[1 0]
 [4 0]
 [2 1]
 [0 2]
 [1 4]
 [0 0]]  Selected action  [1 4]
episode: 52   score: 109.0   memory length: 4096   epsilon: 0.8519552940233627
Exploring Available actions  [[1 2]
 [4 0]
 [4 3]
 [1 3]
 [3 4]
 [2 4]
 [2 3]
 [0 0]]  Selected action  [1 2]
episode: 52   score: 122.0   memory length: 4096   epsilon: 0.8519552940233627
(1, 21) [0. 0.] Index  0 0.0
Exploiting Available actions  [[4 2]
 [0 0]]  Selected action  [4 2]
episode: 52   score: 132.0   memory length: 4096   epsilon: 0.8519552940233627
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[0 4]
 [4 1]
 [4 0]
 [1 2]
 [4 2]
 [2 4]
 [0 3]
 [0 0]]  Selected action  [0 4]
episode: 52   score: 102.0   memory length: 4096   epsilon: 0.8519552940233627
(1, 21) [0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[2 0]
 [0 3]
 [4 2]
 [1 3]
 [1 2]
 [0 0]]  Selected action  [2 0]
episo

episode: 52   score: 79.0   memory length: 4096   epsilon: 0.8519552940233627
Exploring Available actions  [[2 4]
 [4 3]
 [3 1]
 [0 0]]  Selected action  [3 1]
episode: 52   score: 79.0   memory length: 4096   epsilon: 0.8519552940233627
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[0 1]
 [4 2]
 [2 4]
 [3 0]
 [1 4]
 [1 3]
 [4 1]
 [2 1]
 [3 1]
 [0 2]
 [0 0]]  Selected action  [0 1]
episode: 52   score: 77.0   memory length: 4096   epsilon: 0.8519552940233627
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[1 0]
 [1 4]
 [2 0]
 [3 1]
 [0 1]
 [3 2]
 [3 0]
 [2 1]
 [0 3]
 [4 3]
 [1 2]
 [4 0]
 [2 4]
 [0 2]
 [4 1]
 [0 0]]  Selected action  [1 0]
episode: 52   score: 85.0   memory length: 4096   epsilon: 0.8519552940233627
(1, 21) [0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[0 2]
 [0 3]
 [3 0]
 [0 1]
 [0 0]]  Selected action  [0 2]
episode: 52   score: 105.0   memory length: 4096   e

episode: 52   score: -41.0   memory length: 4096   epsilon: 0.8519552940233627
(1, 21) [0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[3 2]
 [2 3]
 [4 1]
 [0 0]]  Selected action  [3 2]
episode: 52   score: -44.0   memory length: 4096   epsilon: 0.8519552940233627
Exploring Available actions  [[2 4]
 [1 3]
 [3 0]
 [3 2]
 [2 0]
 [4 2]
 [4 3]
 [0 0]]  Selected action  [3 2]
episode: 52   score: -52.0   memory length: 4096   epsilon: 0.8519552940233627
(1, 21) [0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[0 2]
 [2 1]
 [2 3]
 [4 2]
 [4 3]
 [1 3]
 [0 0]]  Selected action  [0 2]
episode: 52   score: -53.0   memory length: 4096   epsilon: 0.8519552940233627
Exploring Available actions  [[1 3]
 [0 0]]  Selected action  [0 0]
episode: 52   score: -58.0   memory length: 4096   epsilon: 0.8519552940233627
Exploring Available actions  [[1 3]
 [4 3]
 [2 1]
 [0 0]]  Selected action  [0 0]
episode: 52   score: -63.0   memory length: 4096   epsilon: 0.8519552940233627


episode: 53   score: -80.0   memory length: 4096   epsilon: 0.8492829973621271
Exploring Available actions  [[1 3]
 [2 3]
 [4 1]
 [1 4]
 [0 0]]  Selected action  [1 4]
episode: 53   score: -100.0   memory length: 4096   epsilon: 0.8492829973621271
Exploring Available actions  [[0 2]
 [1 4]
 [4 3]
 [3 2]
 [0 3]
 [4 0]
 [2 0]
 [1 2]
 [4 1]
 [0 4]
 [0 0]]  Selected action  [0 4]
episode: 53   score: -126.0   memory length: 4096   epsilon: 0.8492829973621271
Exploring Available actions  [[2 0]
 [2 1]
 [4 1]
 [0 2]
 [2 4]
 [3 0]
 [3 4]
 [1 4]
 [3 2]
 [2 3]
 [0 0]]  Selected action  [0 2]
episode: 53   score: -119.0   memory length: 4096   epsilon: 0.8492829973621271
Exploring Available actions  [[2 4]
 [0 0]]  Selected action  [2 4]
episode: 53   score: -87.0   memory length: 4096   epsilon: 0.8492829973621271
Exploring Available actions  [[0 3]
 [2 4]
 [4 0]
 [4 2]
 [2 3]
 [4 1]
 [1 2]
 [0 2]
 [2 0]
 [0 0]]  Selected action  [2 4]
episode: 53   score: -89.0   memory length: 4096   epsilon:

episode: 53   score: -181.0   memory length: 4096   epsilon: 0.8492829973621271
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[4 0]
 [1 4]
 [0 2]
 [0 4]
 [3 4]
 [0 1]
 [1 2]
 [3 2]
 [2 4]
 [4 3]
 [0 0]]  Selected action  [4 0]
episode: 53   score: -197.0   memory length: 4096   epsilon: 0.8492829973621271
Exploring Available actions  [[4 0]
 [4 1]
 [0 0]]  Selected action  [4 0]
episode: 53   score: -198.0   memory length: 4096   epsilon: 0.8492829973621271
Exploring Available actions  [[2 1]
 [4 3]
 [0 0]]  Selected action  [0 0]
episode: 53   score: -203.0   memory length: 4096   epsilon: 0.8492829973621271
Exploring Available actions  [[3 0]
 [0 0]]  Selected action  [3 0]
episode: 53   score: -206.0   memory length: 4096   epsilon: 0.8492829973621271
Exploring Available actions  [[2 0]
 [4 1]
 [3 0]
 [0 0]]  Selected action  [0 0]
episode: 53   score: -211.0   memory length: 4096   epsilon: 0.8492829973621271
(1, 21) [0. 0.] Index  0 0.0
Exp

episode: 54   score: -19.0   memory length: 4096   epsilon: 0.846619082794994
Exploring Available actions  [[4 3]
 [3 4]
 [2 0]
 [3 1]
 [1 4]
 [4 1]
 [2 4]
 [0 2]
 [0 1]
 [2 1]
 [0 0]]  Selected action  [0 0]
episode: 54   score: -24.0   memory length: 4096   epsilon: 0.846619082794994
Exploring Available actions  [[0 4]
 [3 1]
 [1 3]
 [0 1]
 [3 4]
 [4 0]
 [3 0]
 [2 1]
 [2 3]
 [0 0]]  Selected action  [2 3]
episode: 54   score: -17.0   memory length: 4096   epsilon: 0.846619082794994
Exploring Available actions  [[0 4]
 [1 0]
 [0 3]
 [3 0]
 [3 2]
 [2 4]
 [0 0]]  Selected action  [2 4]
episode: 54   score: -32.0   memory length: 4096   epsilon: 0.846619082794994
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[2 0]
 [3 2]
 [2 3]
 [3 4]
 [4 2]
 [4 3]
 [1 4]
 [0 1]
 [2 4]
 [0 0]]  Selected action  [2 0]
episode: 54   score: -8.0   memory length: 4096   epsilon: 0.846619082794994
Exploring Available actions  [[4 3]
 [1 4]
 [3 2]
 [4 2]
 [2 3]
 [0 0]]  Se

episode: 54   score: 102.0   memory length: 4096   epsilon: 0.846619082794994
Exploring Available actions  [[0 2]
 [2 4]
 [1 4]
 [0 4]
 [0 3]
 [4 0]
 [2 0]
 [0 0]]  Selected action  [0 2]
episode: 54   score: 87.0   memory length: 4096   epsilon: 0.846619082794994
Exploring Available actions  [[0 3]
 [4 2]
 [4 0]
 [0 0]]  Selected action  [4 2]
episode: 54   score: 85.0   memory length: 4096   epsilon: 0.846619082794994
(1, 21) [0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[3 2]
 [4 3]
 [1 2]
 [0 0]]  Selected action  [3 2]
episode: 54   score: 99.0   memory length: 4096   epsilon: 0.846619082794994
Exploring Available actions  [[2 1]
 [0 4]
 [2 4]
 [1 4]
 [0 0]]  Selected action  [2 4]
episode: 54   score: 131.0   memory length: 4096   epsilon: 0.846619082794994
Exploring Available actions  [[0 2]
 [3 2]
 [4 1]
 [4 0]
 [2 3]
 [0 0]]  Selected action  [4 0]
episode: 54   score: 135.0   memory length: 4096   epsilon: 0.846619082794994
(1, 21) [0. 0. 0. 0. 0. 0.] Index  0 0.0

episode: 54   score: 101.0   memory length: 4096   epsilon: 0.846619082794994
Exploring Available actions  [[0 4]
 [2 0]
 [2 3]
 [1 2]
 [2 4]
 [3 0]
 [3 2]
 [4 0]
 [3 4]
 [0 1]
 [0 0]]  Selected action  [2 3]
episode: 54   score: 96.0   memory length: 4096   epsilon: 0.846619082794994
Exploring Available actions  [[2 4]
 [0 1]
 [4 3]
 [1 3]
 [2 3]
 [1 4]
 [2 0]
 [1 2]
 [0 0]]  Selected action  [2 4]
episode: 54   score: 66.0   memory length: 4096   epsilon: 0.846619082794994
(1, 21) [0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[3 4]
 [2 1]
 [1 2]
 [2 0]
 [4 1]
 [2 4]
 [0 0]]  Selected action  [3 4]
episode: 54   score: 48.0   memory length: 4096   epsilon: 0.846619082794994
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[0 3]
 [2 1]
 [3 0]
 [3 2]
 [1 4]
 [1 2]
 [0 2]
 [3 4]
 [4 1]
 [0 0]]  Selected action  [0 3]
episode: 54   score: 52.0   memory length: 4096   epsilon: 0.846619082794994
Exploring Available actions  [[0 3]
 [3 

episode: 55   score: 23.0   memory length: 4096   epsilon: 0.843963524030159
(1, 21) [0. 0.] Index  0 0.0
Exploiting Available actions  [[2 1]
 [0 0]]  Selected action  [2 1]
episode: 55   score: 48.0   memory length: 4096   epsilon: 0.843963524030159
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[3 0]
 [0 1]
 [2 1]
 [1 0]
 [2 4]
 [0 3]
 [3 4]
 [1 2]
 [4 3]
 [4 2]
 [3 2]
 [0 0]]  Selected action  [3 0]
episode: 55   score: 40.0   memory length: 4096   epsilon: 0.843963524030159
Exploring Available actions  [[2 1]
 [0 1]
 [2 4]
 [3 0]
 [0 0]]  Selected action  [3 0]
episode: 55   score: 9.0   memory length: 4096   epsilon: 0.843963524030159
Exploring Available actions  [[1 2]
 [2 4]
 [1 3]
 [3 2]
 [1 4]
 [0 0]]  Selected action  [1 4]
episode: 55   score: -23.0   memory length: 4096   epsilon: 0.843963524030159
Exploring Available actions  [[3 2]
 [3 1]
 [0 2]
 [3 0]
 [4 3]
 [1 4]
 [4 2]
 [4 0]
 [0 4]
 [3 4]
 [0 1]
 [0 0]]  Selected action  [1

episode: 55   score: -57.0   memory length: 4096   epsilon: 0.843963524030159
Exploring Available actions  [[0 1]
 [2 3]
 [4 1]
 [3 1]
 [1 0]
 [4 0]
 [2 0]
 [0 0]]  Selected action  [2 3]
episode: 55   score: -65.0   memory length: 4096   epsilon: 0.843963524030159
Exploring Available actions  [[2 1]
 [0 1]
 [3 1]
 [1 2]
 [0 3]
 [0 0]]  Selected action  [0 1]
episode: 55   score: -81.0   memory length: 4096   epsilon: 0.843963524030159
Exploring Available actions  [[1 0]
 [4 1]
 [0 3]
 [1 2]
 [1 3]
 [2 1]
 [3 4]
 [0 2]
 [3 0]
 [2 4]
 [3 2]
 [0 0]]  Selected action  [3 0]
episode: 55   score: -78.0   memory length: 4096   epsilon: 0.843963524030159
Exploring Available actions  [[0 1]
 [1 3]
 [1 4]
 [0 0]]  Selected action  [1 3]
episode: 55   score: -96.0   memory length: 4096   epsilon: 0.843963524030159
Exploring Available actions  [[1 2]
 [1 3]
 [2 1]
 [2 3]
 [0 1]
 [1 0]
 [4 0]
 [3 4]
 [0 0]]  Selected action  [2 3]
episode: 55   score: -97.0   memory length: 4096   epsilon: 0.84396

episode: 55   score: -60.0   memory length: 4096   epsilon: 0.843963524030159
Exploring Available actions  [[0 1]
 [0 0]]  Selected action  [0 1]
episode: 55   score: -78.0   memory length: 4096   epsilon: 0.843963524030159
Exploring Available actions  [[4 1]
 [3 2]
 [3 4]
 [1 0]
 [4 0]
 [0 4]
 [2 4]
 [0 2]
 [0 3]
 [3 0]
 [1 2]
 [0 0]]  Selected action  [3 2]
episode: 55   score: -68.0   memory length: 4096   epsilon: 0.843963524030159
Exploring Available actions  [[1 3]
 [3 1]
 [0 4]
 [2 1]
 [0 0]]  Selected action  [3 1]
episode: 55   score: -75.0   memory length: 4096   epsilon: 0.843963524030159
Exploring Available actions  [[0 3]
 [2 4]
 [2 3]
 [3 1]
 [2 0]
 [4 0]
 [1 2]
 [0 0]]  Selected action  [4 0]
episode: 55   score: -59.0   memory length: 4096   epsilon: 0.843963524030159
Exploring Available actions  [[0 4]
 [0 0]]  Selected action  [0 4]
episode: 55   score: -43.0   memory length: 4096   epsilon: 0.843963524030159
Exploring Available actions  [[0 3]
 [3 4]
 [0 2]
 [1 0]
 [

episode: 56   score: -70.0   memory length: 4096   epsilon: 0.8413162948582857
Exploring Available actions  [[3 1]
 [0 4]
 [3 4]
 [1 3]
 [1 2]
 [0 2]
 [4 3]
 [3 2]
 [2 0]
 [4 1]
 [0 0]]  Selected action  [0 2]
episode: 56   score: -85.0   memory length: 4096   epsilon: 0.8413162948582857
Exploring Available actions  [[3 2]
 [3 1]
 [1 0]
 [4 2]
 [2 0]
 [0 3]
 [0 0]]  Selected action  [4 2]
episode: 56   score: -87.0   memory length: 4096   epsilon: 0.8413162948582857
(1, 21) [0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[4 3]
 [2 4]
 [1 0]
 [0 0]]  Selected action  [4 3]
episode: 56   score: -81.0   memory length: 4096   epsilon: 0.8413162948582857
Exploring Available actions  [[0 1]
 [4 2]
 [2 1]
 [3 2]
 [0 2]
 [2 4]
 [3 4]
 [2 0]
 [3 1]
 [0 0]]  Selected action  [2 1]
episode: 56   score: -75.0   memory length: 4096   epsilon: 0.8413162948582857
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[0 4]
 [3 1]
 [1 2]
 [2 3]
 [2 1]

episode: 56   score: -20.0   memory length: 4096   epsilon: 0.8413162948582857
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[0 3]
 [3 1]
 [2 3]
 [3 4]
 [3 0]
 [2 0]
 [4 3]
 [0 0]]  Selected action  [0 3]
episode: 56   score: -38.0   memory length: 4096   epsilon: 0.8413162948582857
Exploring Available actions  [[4 0]
 [2 1]
 [3 0]
 [0 1]
 [1 4]
 [4 2]
 [3 1]
 [0 0]]  Selected action  [0 0]
episode: 56   score: -43.0   memory length: 4096   epsilon: 0.8413162948582857
Exploring Available actions  [[0 1]
 [4 2]
 [4 1]
 [4 0]
 [3 0]
 [0 2]
 [0 0]]  Selected action  [0 2]
episode: 56   score: -46.0   memory length: 4096   epsilon: 0.8413162948582857
(1, 21) [0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[1 3]
 [2 0]
 [1 4]
 [0 1]
 [1 0]
 [0 4]
 [0 0]]  Selected action  [1 3]
episode: 56   score: -53.0   memory length: 4096   epsilon: 0.8413162948582857
Exploring Available actions  [[4 1]
 [4 3]
 [3 0]
 [4 2]
 [3 2]
 [0 0]]  Selected acti

episode: 56   score: 9.0   memory length: 4096   epsilon: 0.8413162948582857
Exploring Available actions  [[2 1]
 [1 3]
 [1 2]
 [2 0]
 [2 3]
 [3 0]
 [4 2]
 [1 0]
 [4 1]
 [4 0]
 [0 0]]  Selected action  [4 1]
episode: 56   score: 10.0   memory length: 4096   epsilon: 0.8413162948582857
Exploring Available actions  [[0 3]
 [4 1]
 [1 2]
 [3 2]
 [0 4]
 [1 3]
 [3 1]
 [1 4]
 [3 0]
 [2 3]
 [0 1]
 [3 4]
 [2 0]
 [1 0]
 [2 4]
 [0 0]]  Selected action  [3 1]
episode: 56   score: -11.0   memory length: 4096   epsilon: 0.8413162948582857
Exploring Available actions  [[1 0]
 [4 2]
 [4 0]
 [3 4]
 [0 3]
 [1 2]
 [0 0]]  Selected action  [1 2]
episode: 56   score: 29.0   memory length: 4096   epsilon: 0.8413162948582857
Exploring Available actions  [[1 0]
 [4 2]
 [0 0]]  Selected action  [4 2]
episode: 56   score: 29.0   memory length: 4096   epsilon: 0.8413162948582857
Exploring Available actions  [[0 3]
 [0 1]
 [3 0]
 [0 0]]  Selected action  [0 0]
episode: 56   score: 24.0   memory length: 4096   eps

episode: 57   score: 26.0   memory length: 4096   epsilon: 0.8386773691522483
Exploring Available actions  [[3 0]
 [0 2]
 [4 2]
 [0 4]
 [1 0]
 [0 3]
 [4 0]
 [3 1]
 [1 3]
 [2 1]
 [2 4]
 [2 3]
 [1 4]
 [0 0]]  Selected action  [0 3]
episode: 57   score: 8.0   memory length: 4096   epsilon: 0.8386773691522483
Exploring Available actions  [[3 0]
 [4 0]
 [4 3]
 [1 3]
 [1 2]
 [0 4]
 [3 4]
 [0 0]]  Selected action  [1 3]
episode: 57   score: -12.0   memory length: 4096   epsilon: 0.8386773691522483
Exploring Available actions  [[0 4]
 [2 1]
 [0 2]
 [3 1]
 [2 4]
 [0 0]]  Selected action  [2 4]
episode: 57   score: -44.0   memory length: 4096   epsilon: 0.8386773691522483
Exploring Available actions  [[0 2]
 [4 0]
 [3 0]
 [1 2]
 [4 2]
 [4 1]
 [0 1]
 [0 0]]  Selected action  [3 0]
episode: 57   score: -14.0   memory length: 4096   epsilon: 0.8386773691522483
(1, 21) [0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[1 4]
 [2 3]
 [0 0]]  Selected action  [1 4]
episode: 57   score: -34.0   mem

episode: 57   score: -90.0   memory length: 4096   epsilon: 0.8386773691522483
Exploring Available actions  [[3 4]
 [1 3]
 [0 0]]  Selected action  [1 3]
episode: 57   score: -103.0   memory length: 4096   epsilon: 0.8386773691522483
Exploring Available actions  [[1 4]
 [1 3]
 [3 2]
 [3 1]
 [3 0]
 [2 4]
 [0 3]
 [0 2]
 [1 2]
 [2 3]
 [4 0]
 [0 0]]  Selected action  [3 0]
episode: 57   score: -79.0   memory length: 4096   epsilon: 0.8386773691522483
(1, 21) [0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[4 1]
 [3 4]
 [3 1]
 [1 2]
 [0 0]]  Selected action  [4 1]
episode: 57   score: -79.0   memory length: 4096   epsilon: 0.8386773691522483
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[0 3]
 [2 3]
 [0 2]
 [0 1]
 [3 0]
 [3 2]
 [0 4]
 [4 0]
 [3 1]
 [1 2]
 [4 3]
 [1 4]
 [4 2]
 [2 1]
 [0 0]]  Selected action  [0 3]
episode: 57   score: -87.0   memory length: 4096   epsilon: 0.8386773691522483
Exploring Available actions  [[0 2]

episode: 57   score: -26.0   memory length: 4096   epsilon: 0.8386773691522483
Exploring Available actions  [[0 4]
 [2 1]
 [3 2]
 [0 1]
 [0 2]
 [2 0]
 [0 0]]  Selected action  [0 1]
episode: 57   score: -28.0   memory length: 4096   epsilon: 0.8386773691522483
Exploring Available actions  [[2 0]
 [4 1]
 [2 4]
 [3 4]
 [4 2]
 [0 4]
 [0 3]
 [0 2]
 [1 2]
 [1 4]
 [3 2]
 [1 0]
 [0 0]]  Selected action  [0 3]
episode: 57   score: -14.0   memory length: 4096   epsilon: 0.8386773691522483
Exploring Available actions  [[3 0]
 [0 1]
 [2 4]
 [3 1]
 [3 2]
 [0 0]]  Selected action  [0 0]
episode: 57   score: -19.0   memory length: 4096   epsilon: 0.8386773691522483
Exploring Available actions  [[1 3]
 [4 3]
 [3 4]
 [2 3]
 [0 3]
 [0 1]
 [1 0]
 [0 0]]  Selected action  [1 0]
episode: 57   score: -1.0   memory length: 4096   epsilon: 0.8386773691522483
Exploring Available actions  [[3 0]
 [0 0]]  Selected action  [0 0]
episode: 57   score: -6.0   memory length: 4096   epsilon: 0.8386773691522483
(1, 21

episode: 58   score: 15.0   memory length: 4096   epsilon: 0.8360467208668725
(1, 21) [0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[0 2]
 [2 3]
 [1 3]
 [0 0]]  Selected action  [0 2]
episode: 58   score: 39.0   memory length: 4096   epsilon: 0.8360467208668725
Exploring Available actions  [[2 1]
 [0 1]
 [4 3]
 [2 0]
 [0 0]]  Selected action  [4 3]
episode: 58   score: 59.0   memory length: 4096   epsilon: 0.8360467208668725
Exploring Available actions  [[2 1]
 [1 3]
 [2 4]
 [3 2]
 [1 0]
 [3 0]
 [2 0]
 [4 2]
 [0 0]]  Selected action  [1 0]
episode: 58   score: 57.0   memory length: 4096   epsilon: 0.8360467208668725
(1, 21) [0. 0.] Index  0 0.0
Exploiting Available actions  [[4 3]
 [0 0]]  Selected action  [4 3]
episode: 58   score: 45.0   memory length: 4096   epsilon: 0.8360467208668725
Exploring Available actions  [[2 0]
 [2 4]
 [0 3]
 [0 4]
 [3 2]
 [3 4]
 [4 1]
 [3 0]
 [0 0]]  Selected action  [0 3]
episode: 58   score: 37.0   memory length: 4096   epsilon: 0.8360467208

episode: 58   score: -95.0   memory length: 4096   epsilon: 0.8360467208668725
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[0 3]
 [3 0]
 [2 1]
 [4 1]
 [3 2]
 [3 4]
 [1 0]
 [1 3]
 [1 2]
 [0 4]
 [3 1]
 [4 2]
 [2 3]
 [2 0]
 [0 0]]  Selected action  [0 3]
episode: 58   score: -97.0   memory length: 4096   epsilon: 0.8360467208668725
Exploring Available actions  [[2 3]
 [4 2]
 [2 1]
 [0 0]]  Selected action  [2 3]
episode: 58   score: -100.0   memory length: 4096   epsilon: 0.8360467208668725
(1, 21) [0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[4 1]
 [4 0]
 [1 2]
 [0 0]]  Selected action  [4 1]
episode: 58   score: -125.0   memory length: 4096   epsilon: 0.8360467208668725
Exploring Available actions  [[0 4]
 [1 4]
 [0 2]
 [0 1]
 [3 2]
 [0 3]
 [2 4]
 [2 1]
 [3 4]
 [1 3]
 [1 0]
 [0 0]]  Selected action  [0 1]
episode: 58   score: -160.0   memory length: 4096   epsilon: 0.8360467208668725
Exploring Available actions  [[3 2]


episode: 58   score: 91.0   memory length: 4096   epsilon: 0.8360467208668725
Exploring Available actions  [[2 1]
 [1 3]
 [0 0]]  Selected action  [2 1]
episode: 58   score: 95.0   memory length: 4096   epsilon: 0.8360467208668725
Exploring Available actions  [[4 2]
 [0 4]
 [2 0]
 [0 3]
 [0 2]
 [1 4]
 [3 0]
 [0 1]
 [3 2]
 [1 0]
 [1 2]
 [4 3]
 [1 3]
 [2 4]
 [2 3]
 [0 0]]  Selected action  [0 2]
episode: 58   score: 95.0   memory length: 4096   epsilon: 0.8360467208668725
Exploring Available actions  [[1 4]
 [4 3]
 [0 4]
 [0 3]
 [0 0]]  Selected action  [0 4]
episode: 58   score: 84.0   memory length: 4096   epsilon: 0.8360467208668725
Exploring Available actions  [[2 4]
 [4 0]
 [1 2]
 [3 0]
 [0 2]
 [0 1]
 [0 4]
 [4 2]
 [2 3]
 [4 1]
 [4 3]
 [1 3]
 [0 0]]  Selected action  [0 0]
episode: 58   score: 79.0   memory length: 4096   epsilon: 0.8360467208668725
Exploring Available actions  [[4 1]
 [3 1]
 [4 3]
 [4 0]
 [2 0]
 [2 4]
 [1 0]
 [2 1]
 [0 3]
 [0 0]]  Selected action  [0 3]
episode: 58

episode: 59   score: -54.0   memory length: 4096   epsilon: 0.833424324038679
Exploring Available actions  [[0 2]
 [2 4]
 [0 0]]  Selected action  [2 4]
episode: 59   score: -63.0   memory length: 4096   epsilon: 0.833424324038679
Exploring Available actions  [[1 3]
 [0 2]
 [4 2]
 [2 1]
 [3 0]
 [4 3]
 [0 1]
 [1 0]
 [0 0]]  Selected action  [1 0]
episode: 59   score: -39.0   memory length: 4096   epsilon: 0.833424324038679
Exploring Available actions  [[3 0]
 [0 0]]  Selected action  [3 0]
episode: 59   score: -41.0   memory length: 4096   epsilon: 0.833424324038679
Exploring Available actions  [[0 2]
 [1 0]
 [3 2]
 [4 0]
 [0 0]]  Selected action  [3 2]
episode: 59   score: -46.0   memory length: 4096   epsilon: 0.833424324038679
Exploring Available actions  [[2 1]
 [3 4]
 [0 1]
 [1 4]
 [3 2]
 [0 4]
 [0 0]]  Selected action  [1 4]
episode: 59   score: -96.0   memory length: 4096   epsilon: 0.833424324038679
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Availab

episode: 59   score: -115.0   memory length: 4096   epsilon: 0.833424324038679
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[3 0]
 [1 0]
 [4 1]
 [2 4]
 [4 3]
 [1 2]
 [0 4]
 [0 0]]  Selected action  [3 0]
episode: 59   score: -115.0   memory length: 4096   epsilon: 0.833424324038679
Exploring Available actions  [[0 4]
 [1 2]
 [0 2]
 [0 0]]  Selected action  [0 4]
episode: 59   score: -111.0   memory length: 4096   epsilon: 0.833424324038679
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[2 4]
 [0 2]
 [4 2]
 [1 0]
 [0 4]
 [1 3]
 [0 3]
 [0 0]]  Selected action  [2 4]
episode: 59   score: -111.0   memory length: 4096   epsilon: 0.833424324038679
Exploring Available actions  [[3 4]
 [1 4]
 [3 1]
 [0 2]
 [1 2]
 [2 1]
 [3 0]
 [2 3]
 [1 0]
 [0 0]]  Selected action  [1 2]
episode: 59   score: -113.0   memory length: 4096   epsilon: 0.833424324038679
Exploring Available actions  [[4 3]
 [3 2]
 [0 0]]  Selected action  [3 2]
episode:

(1, 21) [0. 0.] Index  0 0.0
Exploiting Available actions  [[1 0]
 [0 0]]  Selected action  [1 0]
episode: 59   score: 2.0   memory length: 4096   epsilon: 0.833424324038679
Exploring Available actions  [[3 4]
 [0 0]]  Selected action  [3 4]
episode: 59   score: -11.0   memory length: 4096   epsilon: 0.833424324038679
Exploring Available actions  [[4 1]
 [0 4]
 [0 2]
 [3 4]
 [2 3]
 [1 2]
 [4 0]
 [0 0]]  Selected action  [0 2]
episode: 59   score: 3.0   memory length: 4096   epsilon: 0.833424324038679
(1, 21) [0. 0.] Index  0 0.0
Exploiting Available actions  [[1 2]
 [0 0]]  Selected action  [1 2]
episode: 59   score: 30.0   memory length: 4096   epsilon: 0.833424324038679
Exploring Available actions  [[1 3]
 [3 4]
 [4 3]
 [1 4]
 [0 0]]  Selected action  [4 3]
episode: 59   score: 37.0   memory length: 4096   epsilon: 0.833424324038679
Exploring Available actions  [[4 2]
 [1 4]
 [0 2]
 [0 0]]  Selected action  [0 0]
episode: 59   score: 32.0   memory length: 4096   epsilon: 0.8334243240

episode: 60   score: 17.0   memory length: 4096   epsilon: 0.8308101527856273
Exploring Available actions  [[1 4]
 [0 0]]  Selected action  [0 0]
episode: 60   score: 12.0   memory length: 4096   epsilon: 0.8308101527856273
Exploring Available actions  [[1 0]
 [2 4]
 [0 0]]  Selected action  [0 0]
episode: 60   score: 7.0   memory length: 4096   epsilon: 0.8308101527856273
Exploring Available actions  [[0 1]
 [3 2]
 [1 2]
 [0 4]
 [0 0]]  Selected action  [0 1]
episode: 60   score: 19.0   memory length: 4096   epsilon: 0.8308101527856273
Exploring Available actions  [[3 4]
 [4 0]
 [1 4]
 [4 1]
 [4 2]
 [0 3]
 [2 1]
 [0 1]
 [3 0]
 [4 3]
 [0 4]
 [0 0]]  Selected action  [0 3]
episode: 60   score: 8.0   memory length: 4096   epsilon: 0.8308101527856273
Exploring Available actions  [[0 2]
 [2 3]
 [0 1]
 [3 2]
 [2 4]
 [1 2]
 [0 0]]  Selected action  [2 3]
episode: 60   score: 22.0   memory length: 4096   epsilon: 0.8308101527856273
Exploring Available actions  [[2 4]
 [3 2]
 [0 2]
 [2 0]
 [3 

episode: 60   score: -38.0   memory length: 4096   epsilon: 0.8308101527856273
Exploring Available actions  [[0 2]
 [3 0]
 [4 1]
 [2 4]
 [4 2]
 [4 0]
 [1 4]
 [2 3]
 [0 1]
 [3 4]
 [0 0]]  Selected action  [2 4]
episode: 60   score: -48.0   memory length: 4096   epsilon: 0.8308101527856273
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[1 3]
 [0 2]
 [4 0]
 [3 1]
 [2 1]
 [0 4]
 [2 4]
 [0 3]
 [1 2]
 [1 0]
 [2 0]
 [0 0]]  Selected action  [1 3]
episode: 60   score: -58.0   memory length: 4096   epsilon: 0.8308101527856273
Exploring Available actions  [[3 0]
 [1 0]
 [0 2]
 [1 3]
 [3 2]
 [2 3]
 [1 2]
 [0 0]]  Selected action  [3 0]
episode: 60   score: -58.0   memory length: 4096   epsilon: 0.8308101527856273
Exploring Available actions  [[4 2]
 [0 0]]  Selected action  [4 2]
episode: 60   score: -55.0   memory length: 4096   epsilon: 0.8308101527856273
Exploring Available actions  [[3 4]
 [3 1]
 [0 2]
 [1 3]
 [4 1]
 [2 0]
 [2 1]
 [0 0]]  Selected ac

episode: 60   score: -72.0   memory length: 4096   epsilon: 0.8308101527856273
Exploring Available actions  [[2 4]
 [1 0]
 [1 4]
 [0 0]]  Selected action  [1 4]
episode: 60   score: -71.0   memory length: 4096   epsilon: 0.8308101527856273
Exploring Available actions  [[2 4]
 [1 2]
 [4 2]
 [0 2]
 [2 0]
 [3 1]
 [3 2]
 [0 1]
 [2 1]
 [2 3]
 [0 0]]  Selected action  [1 2]
episode: 60   score: -102.0   memory length: 4096   epsilon: 0.8308101527856273
Exploring Available actions  [[0 1]
 [3 4]
 [4 0]
 [2 1]
 [0 0]]  Selected action  [2 1]
episode: 60   score: -98.0   memory length: 4096   epsilon: 0.8308101527856273
Exploring Available actions  [[3 0]
 [4 3]
 [3 2]
 [3 1]
 [1 4]
 [4 2]
 [0 3]
 [2 3]
 [0 0]]  Selected action  [2 3]
episode: 60   score: -113.0   memory length: 4096   epsilon: 0.8308101527856273
Exploring Available actions  [[4 3]
 [2 3]
 [1 2]
 [0 4]
 [3 2]
 [2 0]
 [0 3]
 [2 1]
 [0 2]
 [3 1]
 [0 0]]  Selected action  [4 3]
episode: 60   score: -116.0   memory length: 4096   e

episode: 61   score: -4.0   memory length: 4096   epsilon: 0.82820418130686
Exploring Available actions  [[3 2]
 [0 0]]  Selected action  [3 2]
episode: 61   score: 17.0   memory length: 4096   epsilon: 0.82820418130686
Exploring Available actions  [[3 0]
 [2 1]
 [0 1]
 [0 0]]  Selected action  [2 1]
episode: 61   score: 33.0   memory length: 4096   epsilon: 0.82820418130686
Exploring Available actions  [[2 0]
 [3 1]
 [3 0]
 [2 3]
 [4 2]
 [2 1]
 [1 0]
 [2 4]
 [1 2]
 [0 0]]  Selected action  [4 2]
episode: 61   score: 23.0   memory length: 4096   epsilon: 0.82820418130686
Exploring Available actions  [[2 4]
 [2 0]
 [3 1]
 [1 4]
 [0 1]
 [0 0]]  Selected action  [1 4]
episode: 61   score: 21.0   memory length: 4096   epsilon: 0.82820418130686
Exploring Available actions  [[2 3]
 [2 4]
 [3 4]
 [4 0]
 [0 1]
 [1 4]
 [3 0]
 [3 2]
 [2 1]
 [0 0]]  Selected action  [2 4]
episode: 61   score: 19.0   memory length: 4096   epsilon: 0.82820418130686
Exploring Available actions  [[2 0]
 [3 2]
 [4 1]


episode: 61   score: -245.0   memory length: 4096   epsilon: 0.82820418130686
Exploring Available actions  [[3 4]
 [2 4]
 [4 3]
 [1 3]
 [0 0]]  Selected action  [0 0]
episode: 61   score: -250.0   memory length: 4096   epsilon: 0.82820418130686
Exploring Available actions  [[4 1]
 [2 0]
 [3 2]
 [0 1]
 [3 0]
 [1 0]
 [4 0]
 [2 4]
 [0 0]]  Selected action  [0 1]
episode: 61   score: -258.0   memory length: 4096   epsilon: 0.82820418130686
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[2 4]
 [4 1]
 [2 0]
 [3 4]
 [0 4]
 [3 2]
 [1 2]
 [4 0]
 [2 3]
 [1 0]
 [0 0]]  Selected action  [2 4]
episode: 61   score: -276.0   memory length: 4096   epsilon: 0.82820418130686
Exploring Available actions  [[3 4]
 [2 4]
 [1 3]
 [0 2]
 [3 0]
 [0 4]
 [4 1]
 [0 3]
 [0 0]]  Selected action  [1 3]
episode: 61   score: -268.0   memory length: 4096   epsilon: 0.82820418130686
(1, 21) [0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[2 1]
 [0 4]
 [3 2]
 [0 1]


episode: 62   score: -8.0   memory length: 4096   epsilon: 0.8256063838824484
(1, 21) [0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[0 2]
 [2 1]
 [0 1]
 [3 4]
 [2 0]
 [3 1]
 [0 0]]  Selected action  [0 2]
episode: 62   score: 1.0   memory length: 4096   epsilon: 0.8256063838824484
Exploring Available actions  [[3 4]
 [0 1]
 [0 2]
 [2 3]
 [0 0]]  Selected action  [3 4]
episode: 62   score: -24.0   memory length: 4096   epsilon: 0.8256063838824484
Exploring Available actions  [[1 3]
 [2 1]
 [0 1]
 [2 3]
 [3 1]
 [0 0]]  Selected action  [0 1]
episode: 62   score: -1.0   memory length: 4096   epsilon: 0.8256063838824484
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[0 1]
 [4 3]
 [4 2]
 [1 0]
 [1 4]
 [1 3]
 [2 0]
 [4 1]
 [1 2]
 [3 4]
 [0 2]
 [2 3]
 [0 0]]  Selected action  [0 1]
episode: 62   score: -24.0   memory length: 4096   epsilon: 0.8256063838824484
Exploring Available actions  [[2 0]
 [2 3]
 [3 1]
 [1 0]
 [4 1]
 [0 

episode: 62   score: -108.0   memory length: 4096   epsilon: 0.8256063838824484
Exploring Available actions  [[4 3]
 [1 4]
 [3 1]
 [3 0]
 [1 0]
 [0 1]
 [0 4]
 [4 2]
 [0 3]
 [4 1]
 [2 4]
 [3 4]
 [3 2]
 [0 0]]  Selected action  [3 2]
episode: 62   score: -76.0   memory length: 4096   epsilon: 0.8256063838824484
Exploring Available actions  [[3 0]
 [0 3]
 [0 1]
 [1 4]
 [0 0]]  Selected action  [3 0]
episode: 62   score: -46.0   memory length: 4096   epsilon: 0.8256063838824484
(1, 21) [0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[1 4]
 [3 4]
 [0 0]]  Selected action  [1 4]
episode: 62   score: -75.0   memory length: 4096   epsilon: 0.8256063838824484
Exploring Available actions  [[2 1]
 [0 2]
 [1 4]
 [1 0]
 [4 0]
 [0 0]]  Selected action  [4 0]
episode: 62   score: -71.0   memory length: 4096   epsilon: 0.8256063838824484
Exploring Available actions  [[0 2]
 [4 2]
 [0 0]]  Selected action  [0 0]
episode: 62   score: -76.0   memory length: 4096   epsilon: 0.8256063838824484
(1, 2

episode: 62   score: -93.0   memory length: 4096   epsilon: 0.8256063838824484
Exploring Available actions  [[3 1]
 [2 0]
 [1 3]
 [4 3]
 [2 4]
 [1 2]
 [3 4]
 [0 4]
 [4 1]
 [0 2]
 [2 1]
 [0 0]]  Selected action  [3 1]
episode: 62   score: -96.0   memory length: 4096   epsilon: 0.8256063838824484
Exploring Available actions  [[2 0]
 [4 1]
 [0 3]
 [1 0]
 [4 2]
 [1 4]
 [2 1]
 [0 1]
 [2 3]
 [3 0]
 [2 4]
 [0 2]
 [0 4]
 [3 4]
 [4 3]
 [0 0]]  Selected action  [3 0]
episode: 62   score: -99.0   memory length: 4096   epsilon: 0.8256063838824484
Exploring Available actions  [[1 3]
 [0 0]]  Selected action  [1 3]
episode: 62   score: -102.0   memory length: 4096   epsilon: 0.8256063838824484
Exploring Available actions  [[4 1]
 [1 0]
 [4 0]
 [2 0]
 [2 1]
 [0 1]
 [4 3]
 [0 0]]  Selected action  [2 0]
episode: 62   score: -104.0   memory length: 4096   epsilon: 0.8256063838824484
Exploring Available actions  [[1 2]
 [4 1]
 [2 3]
 [0 0]]  Selected action  [4 1]
episode: 62   score: -84.0   memory len

episode: 63   score: -25.0   memory length: 4096   epsilon: 0.8230167348731386
Exploring Available actions  [[0 4]
 [2 1]
 [1 4]
 [4 0]
 [0 0]]  Selected action  [1 4]
episode: 63   score: -24.0   memory length: 4096   epsilon: 0.8230167348731386
Exploring Available actions  [[3 2]
 [2 4]
 [2 3]
 [0 2]
 [0 4]
 [2 1]
 [3 0]
 [0 3]
 [3 4]
 [0 1]
 [0 0]]  Selected action  [2 1]
episode: 63   score: -40.0   memory length: 4096   epsilon: 0.8230167348731386
Exploring Available actions  [[0 3]
 [1 3]
 [0 1]
 [0 2]
 [3 4]
 [2 1]
 [4 3]
 [1 0]
 [4 0]
 [4 2]
 [1 2]
 [4 1]
 [1 4]
 [2 3]
 [0 0]]  Selected action  [4 1]
episode: 63   score: -40.0   memory length: 4096   epsilon: 0.8230167348731386
Exploring Available actions  [[3 0]
 [1 0]
 [0 1]
 [0 3]
 [2 1]
 [4 0]
 [3 2]
 [0 0]]  Selected action  [3 2]
episode: 63   score: -29.0   memory length: 4096   epsilon: 0.8230167348731386
Exploring Available actions  [[2 1]
 [4 1]
 [3 1]
 [4 0]
 [0 4]
 [1 3]
 [0 0]]  Selected action  [0 4]
episode: 63  

episode: 63   score: 31.0   memory length: 4096   epsilon: 0.8230167348731386
Exploring Available actions  [[4 3]
 [1 0]
 [2 4]
 [2 3]
 [3 2]
 [0 0]]  Selected action  [2 4]
episode: 63   score: 24.0   memory length: 4096   epsilon: 0.8230167348731386
Exploring Available actions  [[0 4]
 [1 0]
 [2 1]
 [1 4]
 [1 2]
 [0 2]
 [0 1]
 [4 2]
 [3 4]
 [0 0]]  Selected action  [4 2]
episode: 63   score: 32.0   memory length: 4096   epsilon: 0.8230167348731386
Exploring Available actions  [[4 2]
 [3 4]
 [0 0]]  Selected action  [3 4]
episode: 63   score: 0.0   memory length: 4096   epsilon: 0.8230167348731386
Exploring Available actions  [[1 0]
 [0 2]
 [4 2]
 [1 3]
 [4 3]
 [0 4]
 [4 1]
 [1 4]
 [3 2]
 [0 1]
 [0 0]]  Selected action  [0 0]
episode: 63   score: -5.0   memory length: 4096   epsilon: 0.8230167348731386
Exploring Available actions  [[3 2]
 [3 4]
 [1 0]
 [1 3]
 [2 3]
 [0 2]
 [0 4]
 [3 0]
 [2 4]
 [4 1]
 [0 0]]  Selected action  [2 3]
episode: 63   score: 39.0   memory length: 4096   epsi

episode: 63   score: -136.0   memory length: 4096   epsilon: 0.8230167348731386
Exploring Available actions  [[1 4]
 [3 2]
 [3 1]
 [4 3]
 [3 4]
 [3 0]
 [2 4]
 [4 0]
 [0 1]
 [0 0]]  Selected action  [3 1]
episode: 63   score: -136.0   memory length: 4096   epsilon: 0.8230167348731386
Exploring Available actions  [[4 1]
 [2 4]
 [3 2]
 [3 0]
 [0 1]
 [1 2]
 [0 2]
 [1 0]
 [3 4]
 [2 0]
 [4 3]
 [1 3]
 [2 1]
 [2 3]
 [0 0]]  Selected action  [1 3]
episode: 63   score: -128.0   memory length: 4096   epsilon: 0.8230167348731386
Exploring Available actions  [[1 0]
 [4 3]
 [3 4]
 [0 4]
 [0 1]
 [1 4]
 [3 0]
 [0 0]]  Selected action  [0 4]
episode: 63   score: -140.0   memory length: 4096   epsilon: 0.8230167348731386
Exploring Available actions  [[2 1]
 [2 4]
 [0 3]
 [4 3]
 [3 4]
 [2 3]
 [0 2]
 [4 2]
 [2 0]
 [0 0]]  Selected action  [0 2]
episode: 63   score: -128.0   memory length: 4096   epsilon: 0.8230167348731386
Exploring Available actions  [[0 4]
 [1 2]
 [3 2]
 [1 3]
 [2 4]
 [0 0]]  Selected a

episode: 64   score: 62.0   memory length: 4096   epsilon: 0.8204352087200982
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[0 4]
 [3 0]
 [3 2]
 [4 0]
 [3 1]
 [1 4]
 [1 2]
 [4 2]
 [1 0]
 [2 3]
 [0 0]]  Selected action  [0 4]
episode: 64   score: 59.0   memory length: 4096   epsilon: 0.8204352087200982
Exploring Available actions  [[0 2]
 [3 2]
 [4 3]
 [3 1]
 [2 4]
 [1 3]
 [0 0]]  Selected action  [0 2]
episode: 64   score: 72.0   memory length: 4096   epsilon: 0.8204352087200982
Exploring Available actions  [[4 2]
 [1 3]
 [2 3]
 [3 4]
 [1 2]
 [4 3]
 [2 1]
 [0 3]
 [0 0]]  Selected action  [0 0]
episode: 64   score: 67.0   memory length: 4096   epsilon: 0.8204352087200982
Exploring Available actions  [[3 0]
 [4 3]
 [2 4]
 [0 0]]  Selected action  [2 4]
episode: 64   score: 99.0   memory length: 4096   epsilon: 0.8204352087200982
Exploring Available actions  [[4 0]
 [4 2]
 [3 2]
 [0 0]]  Selected action  [0 0]
episode: 64   score: 94.0   memory len

episode: 64   score: 126.0   memory length: 4096   epsilon: 0.8204352087200982
Exploring Available actions  [[3 0]
 [2 3]
 [1 3]
 [0 3]
 [1 0]
 [2 0]
 [3 1]
 [0 0]]  Selected action  [1 3]
episode: 64   score: 130.0   memory length: 4096   epsilon: 0.8204352087200982
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[1 3]
 [2 1]
 [0 1]
 [0 3]
 [3 2]
 [2 3]
 [0 2]
 [3 0]
 [1 0]
 [1 4]
 [4 0]
 [2 4]
 [0 0]]  Selected action  [1 3]
episode: 64   score: 129.0   memory length: 4096   epsilon: 0.8204352087200982
Exploring Available actions  [[4 0]
 [1 4]
 [3 4]
 [3 1]
 [0 0]]  Selected action  [3 4]
episode: 64   score: 145.0   memory length: 4096   epsilon: 0.8204352087200982
Exploring Available actions  [[1 4]
 [4 3]
 [2 4]
 [2 3]
 [3 1]
 [0 2]
 [3 0]
 [0 4]
 [1 3]
 [1 0]
 [0 3]
 [2 0]
 [0 1]
 [3 2]
 [0 0]]  Selected action  [3 2]
episode: 64   score: 145.0   memory length: 4096   epsilon: 0.8204352087200982
Exploring Available actions  [[1 3]
 [0

episode: 64   score: 96.0   memory length: 4096   epsilon: 0.8204352087200982
Exploring Available actions  [[3 0]
 [0 3]
 [2 4]
 [1 2]
 [3 1]
 [1 4]
 [3 4]
 [4 0]
 [2 1]
 [4 1]
 [0 1]
 [1 0]
 [0 2]
 [1 3]
 [0 0]]  Selected action  [3 0]
episode: 64   score: 100.0   memory length: 4096   epsilon: 0.8204352087200982
Exploring Available actions  [[4 2]
 [1 2]
 [3 4]
 [3 2]
 [0 0]]  Selected action  [1 2]
episode: 64   score: 136.0   memory length: 4096   epsilon: 0.8204352087200982
Exploring Available actions  [[1 0]
 [3 4]
 [0 0]]  Selected action  [1 0]
episode: 64   score: 97.0   memory length: 4096   epsilon: 0.8204352087200982
Exploring Available actions  [[4 2]
 [0 0]]  Selected action  [0 0]
episode: 64   score: 92.0   memory length: 4096   epsilon: 0.8204352087200982
Exploring Available actions  [[0 3]
 [0 0]]  Selected action  [0 0]
episode: 64   score: 87.0   memory length: 4096   epsilon: 0.8204352087200982
(1, 21) [0. 0.] Index  0 0.0
Exploiting Available actions  [[2 1]
 [0 0

episode: 65   score: -21.0   memory length: 4096   epsilon: 0.8178617799446645
Exploring Available actions  [[2 0]
 [3 1]
 [1 4]
 [4 0]
 [4 1]
 [0 0]]  Selected action  [4 1]
episode: 65   score: -51.0   memory length: 4096   epsilon: 0.8178617799446645
Exploring Available actions  [[2 1]
 [3 0]
 [4 0]
 [2 3]
 [0 1]
 [0 3]
 [2 4]
 [0 4]
 [4 2]
 [1 3]
 [1 2]
 [0 0]]  Selected action  [0 0]
episode: 65   score: -56.0   memory length: 4096   epsilon: 0.8178617799446645
Exploring Available actions  [[4 2]
 [1 0]
 [2 0]
 [2 3]
 [0 1]
 [3 4]
 [4 3]
 [2 1]
 [1 3]
 [0 0]]  Selected action  [1 0]
episode: 65   score: -44.0   memory length: 4096   epsilon: 0.8178617799446645
Exploring Available actions  [[4 3]
 [2 1]
 [0 0]]  Selected action  [2 1]
episode: 65   score: -34.0   memory length: 4096   epsilon: 0.8178617799446645
Exploring Available actions  [[4 3]
 [0 1]
 [4 1]
 [1 4]
 [2 1]
 [4 2]
 [2 3]
 [0 4]
 [0 0]]  Selected action  [4 3]
episode: 65   score: -45.0   memory length: 4096   epsi

episode: 65   score: 183.0   memory length: 4096   epsilon: 0.8178617799446645
(1, 21) [0. 0.] Index  0 0.0
Exploiting Available actions  [[2 1]
 [0 0]]  Selected action  [2 1]
episode: 65   score: 164.0   memory length: 4096   epsilon: 0.8178617799446645
Exploring Available actions  [[1 2]
 [1 0]
 [0 4]
 [1 4]
 [0 1]
 [0 3]
 [0 2]
 [4 0]
 [3 1]
 [2 1]
 [2 0]
 [3 4]
 [3 2]
 [0 0]]  Selected action  [0 1]
episode: 65   score: 162.0   memory length: 4096   epsilon: 0.8178617799446645
Exploring Available actions  [[0 3]
 [4 0]
 [0 1]
 [0 4]
 [4 3]
 [4 1]
 [3 2]
 [2 4]
 [1 2]
 [2 1]
 [4 2]
 [2 0]
 [1 3]
 [3 1]
 [2 3]
 [0 0]]  Selected action  [4 0]
episode: 65   score: 184.0   memory length: 4096   epsilon: 0.8178617799446645
Exploring Available actions  [[0 4]
 [3 0]
 [0 0]]  Selected action  [0 0]
episode: 65   score: 179.0   memory length: 4096   epsilon: 0.8178617799446645
Exploring Available actions  [[3 0]
 [0 0]]  Selected action  [0 0]
episode: 65   score: 174.0   memory length: 40

episode: 65   score: 76.0   memory length: 4096   epsilon: 0.8178617799446645
Exploring Available actions  [[2 1]
 [4 2]
 [1 3]
 [1 2]
 [1 4]
 [3 4]
 [0 0]]  Selected action  [1 4]
episode: 65   score: 48.0   memory length: 4096   epsilon: 0.8178617799446645
(1, 21) [0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[4 0]
 [0 3]
 [1 3]
 [4 2]
 [2 3]
 [2 0]
 [0 0]]  Selected action  [4 0]
episode: 65   score: 48.0   memory length: 4096   epsilon: 0.8178617799446645
Exploring Available actions  [[2 4]
 [0 0]]  Selected action  [0 0]
episode: 65   score: 43.0   memory length: 4096   epsilon: 0.8178617799446645
Exploring Available actions  [[0 1]
 [2 4]
 [0 0]]  Selected action  [2 4]
episode: 65   score: 50.0   memory length: 4096   epsilon: 0.8178617799446645
Exploring Available actions  [[3 2]
 [1 2]
 [2 1]
 [0 0]]  Selected action  [0 0]
episode: 65   score: 45.0   memory length: 4096   epsilon: 0.8178617799446645
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploitin

Exploring Available actions  [[1 2]
 [0 0]]  Selected action  [1 2]
episode: 66   score: -112.0   memory length: 4096   epsilon: 0.8152964231480925
Exploring Available actions  [[4 0]
 [4 3]
 [4 1]
 [2 1]
 [0 2]
 [2 0]
 [0 0]]  Selected action  [4 0]
episode: 66   score: -106.0   memory length: 4096   epsilon: 0.8152964231480925
Exploring Available actions  [[3 1]
 [2 4]
 [4 3]
 [0 0]]  Selected action  [3 1]
episode: 66   score: -99.0   memory length: 4096   epsilon: 0.8152964231480925
Exploring Available actions  [[2 3]
 [3 0]
 [4 0]
 [1 2]
 [3 4]
 [0 3]
 [3 1]
 [0 0]]  Selected action  [1 2]
episode: 66   score: -63.0   memory length: 4096   epsilon: 0.8152964231480925
Exploring Available actions  [[2 0]
 [1 3]
 [3 1]
 [1 4]
 [1 0]
 [3 4]
 [0 0]]  Selected action  [1 0]
episode: 66   score: -102.0   memory length: 4096   epsilon: 0.8152964231480925
Exploring Available actions  [[3 0]
 [0 0]]  Selected action  [3 0]
episode: 66   score: -120.0   memory length: 4096   epsilon: 0.81529

episode: 66   score: -20.0   memory length: 4096   epsilon: 0.8152964231480925
Exploring Available actions  [[3 4]
 [0 1]
 [1 2]
 [1 3]
 [4 1]
 [0 3]
 [4 3]
 [3 2]
 [2 0]
 [3 1]
 [1 0]
 [2 1]
 [2 3]
 [4 0]
 [0 0]]  Selected action  [2 0]
episode: 66   score: -16.0   memory length: 4096   epsilon: 0.8152964231480925
Exploring Available actions  [[4 3]
 [3 4]
 [0 0]]  Selected action  [3 4]
episode: 66   score: -10.0   memory length: 4096   epsilon: 0.8152964231480925
Exploring Available actions  [[0 1]
 [3 2]
 [1 2]
 [0 0]]  Selected action  [0 0]
episode: 66   score: -15.0   memory length: 4096   epsilon: 0.8152964231480925
Exploring Available actions  [[0 1]
 [4 1]
 [1 4]
 [1 2]
 [2 3]
 [4 3]
 [3 4]
 [1 3]
 [2 0]
 [0 0]]  Selected action  [2 3]
episode: 66   score: -5.0   memory length: 4096   epsilon: 0.8152964231480925
Exploring Available actions  [[1 0]
 [4 2]
 [3 1]
 [0 4]
 [4 1]
 [0 2]
 [2 3]
 [3 4]
 [2 4]
 [1 2]
 [0 0]]  Selected action  [2 4]
episode: 66   score: -10.0   memory

episode: 66   score: -63.0   memory length: 4096   epsilon: 0.8152964231480925
Exploring Available actions  [[4 3]
 [3 2]
 [4 2]
 [1 3]
 [0 0]]  Selected action  [3 2]
episode: 66   score: -63.0   memory length: 4096   epsilon: 0.8152964231480925
Exploring Available actions  [[1 3]
 [1 0]
 [4 3]
 [0 4]
 [3 2]
 [0 0]]  Selected action  [0 0]
episode: 66   score: -68.0   memory length: 4096   epsilon: 0.8152964231480925
Exploring Available actions  [[3 4]
 [2 1]
 [4 0]
 [1 3]
 [0 0]]  Selected action  [1 3]
episode: 66   score: -88.0   memory length: 4096   epsilon: 0.8152964231480925
Exploring Available actions  [[0 3]
 [4 3]
 [2 3]
 [4 2]
 [3 1]
 [3 2]
 [1 0]
 [2 4]
 [4 0]
 [1 3]
 [1 2]
 [3 0]
 [0 2]
 [0 0]]  Selected action  [4 2]
episode: 66   score: -105.0   memory length: 4096   epsilon: 0.8152964231480925
Exploring Available actions  [[1 3]
 [0 2]
 [0 4]
 [4 2]
 [2 0]
 [0 3]
 [0 0]]  Selected action  [2 0]
episode: 66   score: -93.0   memory length: 4096   epsilon: 0.8152964231480

episode: 67   score: 51.0   memory length: 4096   epsilon: 0.8127391130113046
Exploring Available actions  [[1 0]
 [1 2]
 [2 3]
 [2 0]
 [2 4]
 [3 0]
 [0 0]]  Selected action  [2 3]
episode: 67   score: 75.0   memory length: 4096   epsilon: 0.8127391130113046
Exploring Available actions  [[2 0]
 [1 2]
 [4 2]
 [1 4]
 [1 3]
 [4 3]
 [4 1]
 [1 0]
 [0 3]
 [3 1]
 [0 1]
 [2 1]
 [0 0]]  Selected action  [0 0]
episode: 67   score: 70.0   memory length: 4096   epsilon: 0.8127391130113046
Exploring Available actions  [[0 2]
 [3 1]
 [0 3]
 [4 1]
 [0 0]]  Selected action  [4 1]
episode: 67   score: 35.0   memory length: 4096   epsilon: 0.8127391130113046
Exploring Available actions  [[2 4]
 [0 3]
 [4 2]
 [3 0]
 [3 4]
 [1 3]
 [0 2]
 [0 4]
 [3 1]
 [4 3]
 [3 2]
 [1 4]
 [2 1]
 [2 3]
 [0 0]]  Selected action  [2 3]
episode: 67   score: 36.0   memory length: 4096   epsilon: 0.8127391130113046
Exploring Available actions  [[4 3]
 [3 4]
 [4 1]
 [4 2]
 [2 3]
 [0 4]
 [4 0]
 [0 0]]  Selected action  [4 0]
epis

episode: 67   score: 106.0   memory length: 4096   epsilon: 0.8127391130113046
Exploring Available actions  [[3 2]
 [0 3]
 [4 0]
 [3 4]
 [1 4]
 [1 3]
 [4 3]
 [1 0]
 [4 1]
 [2 0]
 [2 1]
 [1 2]
 [0 0]]  Selected action  [1 3]
episode: 67   score: 110.0   memory length: 4096   epsilon: 0.8127391130113046
Exploring Available actions  [[1 2]
 [2 4]
 [2 1]
 [1 0]
 [0 3]
 [3 1]
 [3 2]
 [0 0]]  Selected action  [0 0]
episode: 67   score: 105.0   memory length: 4096   epsilon: 0.8127391130113046
Exploring Available actions  [[1 3]
 [2 0]
 [1 0]
 [4 2]
 [3 0]
 [3 2]
 [0 2]
 [4 1]
 [4 0]
 [1 2]
 [0 0]]  Selected action  [4 0]
episode: 67   score: 79.0   memory length: 4096   epsilon: 0.8127391130113046
Exploring Available actions  [[3 4]
 [1 4]
 [2 1]
 [0 0]]  Selected action  [2 1]
episode: 67   score: 92.0   memory length: 4096   epsilon: 0.8127391130113046
Exploring Available actions  [[2 3]
 [1 3]
 [0 1]
 [1 0]
 [4 1]
 [0 3]
 [1 4]
 [2 1]
 [4 3]
 [0 0]]  Selected action  [4 3]
episode: 67   s

episode: 67   score: -9.0   memory length: 4096   epsilon: 0.8127391130113046
Exploring Available actions  [[2 0]
 [0 0]]  Selected action  [0 0]
episode: 67   score: -14.0   memory length: 4096   epsilon: 0.8127391130113046
Exploring Available actions  [[0 1]
 [0 0]]  Selected action  [0 1]
episode: 67   score: 2.0   memory length: 4096   epsilon: 0.8127391130113046
Exploring Available actions  [[3 0]
 [4 3]
 [1 4]
 [1 3]
 [2 1]
 [4 0]
 [0 0]]  Selected action  [4 0]
episode: 67   score: -4.0   memory length: 4096   epsilon: 0.8127391130113046
Exploring Available actions  [[2 3]
 [1 0]
 [0 0]]  Selected action  [0 0]
episode: 67   score: -9.0   memory length: 4096   epsilon: 0.8127391130113046
Exploring Available actions  [[3 4]
 [3 0]
 [1 4]
 [0 0]]  Selected action  [3 4]
episode: 67   score: -16.0   memory length: 4096   epsilon: 0.8127391130113046
Exploring Available actions  [[4 1]
 [1 0]
 [1 4]
 [1 2]
 [4 3]
 [4 0]
 [3 1]
 [4 2]
 [0 0]]  Selected action  [1 2]
episode: 67   scor

episode: 68   score: -135.0   memory length: 4096   epsilon: 0.8101898242946409
Exploring Available actions  [[4 3]
 [1 0]
 [0 3]
 [0 0]]  Selected action  [1 0]
episode: 68   score: -158.0   memory length: 4096   epsilon: 0.8101898242946409
Exploring Available actions  [[0 4]
 [3 1]
 [3 2]
 [2 3]
 [0 0]]  Selected action  [3 2]
episode: 68   score: -139.0   memory length: 4096   epsilon: 0.8101898242946409
Exploring Available actions  [[0 1]
 [2 0]
 [4 3]
 [1 0]
 [1 4]
 [0 0]]  Selected action  [1 0]
episode: 68   score: -164.0   memory length: 4096   epsilon: 0.8101898242946409
Exploring Available actions  [[2 3]
 [3 4]
 [0 0]]  Selected action  [3 4]
episode: 68   score: -160.0   memory length: 4096   epsilon: 0.8101898242946409
Exploring Available actions  [[0 2]
 [1 4]
 [4 2]
 [4 0]
 [3 1]
 [3 2]
 [3 0]
 [0 4]
 [2 3]
 [2 4]
 [0 0]]  Selected action  [3 2]
episode: 68   score: -134.0   memory length: 4096   epsilon: 0.8101898242946409
Exploring Available actions  [[3 2]
 [3 0]
 [0 

episode: 68   score: -320.0   memory length: 4096   epsilon: 0.8101898242946409
Exploring Available actions  [[2 3]
 [4 1]
 [1 4]
 [4 3]
 [2 4]
 [0 3]
 [4 0]
 [0 0]]  Selected action  [4 1]
episode: 68   score: -320.0   memory length: 4096   epsilon: 0.8101898242946409
Exploring Available actions  [[1 4]
 [4 1]
 [4 2]
 [3 0]
 [4 3]
 [0 2]
 [3 1]
 [0 3]
 [0 4]
 [4 0]
 [3 4]
 [2 0]
 [2 3]
 [0 0]]  Selected action  [4 2]
episode: 68   score: -320.0   memory length: 4096   epsilon: 0.8101898242946409
Exploring Available actions  [[0 4]
 [2 4]
 [0 1]
 [0 0]]  Selected action  [0 4]
episode: 68   score: -336.0   memory length: 4096   epsilon: 0.8101898242946409
Exploring Available actions  [[3 0]
 [0 3]
 [2 1]
 [4 3]
 [1 2]
 [0 0]]  Selected action  [3 0]
episode: 68   score: -330.0   memory length: 4096   epsilon: 0.8101898242946409
Exploring Available actions  [[4 3]
 [0 0]]  Selected action  [4 3]
episode: 68   score: -314.0   memory length: 4096   epsilon: 0.8101898242946409
(1, 21) [0. 

episode: 68   score: -288.0   memory length: 4096   epsilon: 0.8101898242946409
Exploring Available actions  [[0 4]
 [0 3]
 [2 0]
 [4 0]
 [1 0]
 [1 2]
 [0 0]]  Selected action  [0 0]
episode: 68   score: -293.0   memory length: 4096   epsilon: 0.8101898242946409
Exploring Available actions  [[3 1]
 [0 4]
 [0 2]
 [1 3]
 [4 0]
 [1 2]
 [0 0]]  Selected action  [0 4]
episode: 68   score: -319.0   memory length: 4096   epsilon: 0.8101898242946409
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[4 2]
 [3 0]
 [4 3]
 [1 2]
 [0 2]
 [4 0]
 [2 3]
 [4 1]
 [0 0]]  Selected action  [4 2]
episode: 68   score: -299.0   memory length: 4096   epsilon: 0.8101898242946409
(1, 21) [0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[2 1]
 [2 3]
 [0 1]
 [2 4]
 [4 3]
 [0 0]]  Selected action  [2 1]
episode: 68   score: -287.0   memory length: 4096   epsilon: 0.8101898242946409
Episode : 69
Exploring Available actions  [[1 2]
 [2 1]
 [0 3]
 [0 2]
 [1 4]
 [4 3]
 [0 

episode: 69   score: -143.0   memory length: 4096   epsilon: 0.8076485318376094
Exploring Available actions  [[3 1]
 [2 4]
 [1 2]
 [1 3]
 [0 0]]  Selected action  [3 1]
episode: 69   score: -146.0   memory length: 4096   epsilon: 0.8076485318376094
Exploring Available actions  [[2 0]
 [1 3]
 [4 3]
 [0 2]
 [0 4]
 [3 0]
 [3 2]
 [0 1]
 [1 2]
 [4 0]
 [0 0]]  Selected action  [0 1]
episode: 69   score: -149.0   memory length: 4096   epsilon: 0.8076485318376094
Exploring Available actions  [[0 4]
 [0 1]
 [2 0]
 [2 4]
 [1 2]
 [0 2]
 [4 1]
 [3 4]
 [0 3]
 [4 2]
 [2 1]
 [1 4]
 [4 3]
 [0 0]]  Selected action  [4 1]
episode: 69   score: -149.0   memory length: 4096   epsilon: 0.8076485318376094
Exploring Available actions  [[0 1]
 [4 1]
 [4 3]
 [0 4]
 [2 4]
 [3 1]
 [1 0]
 [3 2]
 [3 0]
 [4 2]
 [1 4]
 [0 2]
 [3 4]
 [2 1]
 [1 2]
 [0 0]]  Selected action  [4 2]
episode: 69   score: -149.0   memory length: 4096   epsilon: 0.8076485318376094
Exploring Available actions  [[2 1]
 [4 1]
 [1 4]
 [0 3]
 [0 0

episode: 69   score: -28.0   memory length: 4096   epsilon: 0.8076485318376094
Exploring Available actions  [[1 3]
 [1 2]
 [3 1]
 [2 1]
 [0 4]
 [0 0]]  Selected action  [1 2]
episode: 69   score: 4.0   memory length: 4096   epsilon: 0.8076485318376094
Exploring Available actions  [[4 2]
 [3 1]
 [2 3]
 [0 2]
 [3 4]
 [0 0]]  Selected action  [2 3]
episode: 69   score: 12.0   memory length: 4096   epsilon: 0.8076485318376094
Exploring Available actions  [[2 0]
 [0 3]
 [0 0]]  Selected action  [0 0]
episode: 69   score: 7.0   memory length: 4096   epsilon: 0.8076485318376094
Exploring Available actions  [[3 0]
 [2 1]
 [0 3]
 [1 2]
 [2 3]
 [1 0]
 [3 2]
 [1 4]
 [0 4]
 [0 1]
 [2 4]
 [0 0]]  Selected action  [0 1]
episode: 69   score: 8.0   memory length: 4096   epsilon: 0.8076485318376094
Exploring Available actions  [[1 4]
 [3 4]
 [3 0]
 [4 0]
 [3 1]
 [1 0]
 [1 3]
 [4 1]
 [2 1]
 [0 3]
 [4 3]
 [2 3]
 [2 0]
 [0 1]
 [1 2]
 [0 0]]  Selected action  [2 0]
episode: 69   score: 11.0   memory length

episode: 70   score: -22.0   memory length: 4096   epsilon: 0.8051152105586383
Exploring Available actions  [[2 1]
 [4 3]
 [3 4]
 [2 3]
 [3 1]
 [3 2]
 [4 0]
 [1 4]
 [0 0]]  Selected action  [3 1]
episode: 70   score: -16.0   memory length: 4096   epsilon: 0.8051152105586383
Exploring Available actions  [[2 0]
 [0 4]
 [4 0]
 [0 3]
 [0 2]
 [2 1]
 [4 1]
 [0 1]
 [3 0]
 [4 2]
 [1 0]
 [1 4]
 [3 1]
 [3 4]
 [0 0]]  Selected action  [0 0]
episode: 70   score: -21.0   memory length: 4096   epsilon: 0.8051152105586383
Exploring Available actions  [[0 3]
 [4 1]
 [1 0]
 [2 3]
 [4 3]
 [0 1]
 [1 4]
 [0 0]]  Selected action  [0 0]
episode: 70   score: -26.0   memory length: 4096   epsilon: 0.8051152105586383
Exploring Available actions  [[3 1]
 [0 4]
 [1 0]
 [3 2]
 [4 3]
 [4 0]
 [2 3]
 [1 3]
 [4 2]
 [4 1]
 [3 4]
 [0 0]]  Selected action  [4 0]
episode: 70   score: -57.0   memory length: 4096   epsilon: 0.8051152105586383
Exploring Available actions  [[2 1]
 [4 0]
 [1 0]
 [0 0]]  Selected action  [2 1]

episode: 70   score: -121.0   memory length: 4096   epsilon: 0.8051152105586383
Exploring Available actions  [[2 1]
 [0 3]
 [3 0]
 [3 4]
 [2 4]
 [2 3]
 [1 3]
 [1 0]
 [4 3]
 [4 1]
 [1 4]
 [3 1]
 [0 0]]  Selected action  [0 0]
episode: 70   score: -126.0   memory length: 4096   epsilon: 0.8051152105586383
Exploring Available actions  [[1 0]
 [0 3]
 [2 0]
 [4 0]
 [3 1]
 [4 2]
 [3 4]
 [0 2]
 [3 2]
 [0 0]]  Selected action  [3 1]
episode: 70   score: -127.0   memory length: 4096   epsilon: 0.8051152105586383
Exploring Available actions  [[1 2]
 [4 3]
 [4 1]
 [0 4]
 [3 4]
 [1 3]
 [3 0]
 [2 4]
 [4 0]
 [0 1]
 [0 3]
 [0 2]
 [3 1]
 [2 3]
 [2 1]
 [0 0]]  Selected action  [2 1]
episode: 70   score: -139.0   memory length: 4096   epsilon: 0.8051152105586383
Exploring Available actions  [[4 0]
 [1 4]
 [0 3]
 [0 2]
 [3 4]
 [4 3]
 [1 2]
 [3 2]
 [0 1]
 [3 1]
 [0 4]
 [0 0]]  Selected action  [3 2]
episode: 70   score: -122.0   memory length: 4096   epsilon: 0.8051152105586383
Exploring Available actions

episode: 70   score: -131.0   memory length: 4096   epsilon: 0.8051152105586383
Exploring Available actions  [[3 4]
 [2 1]
 [2 0]
 [0 3]
 [4 2]
 [2 4]
 [4 1]
 [0 0]]  Selected action  [0 0]
episode: 70   score: -136.0   memory length: 4096   epsilon: 0.8051152105586383
(1, 21) [0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[0 2]
 [2 4]
 [4 2]
 [0 3]
 [4 3]
 [2 3]
 [0 0]]  Selected action  [0 2]
episode: 70   score: -133.0   memory length: 4096   epsilon: 0.8051152105586383
Exploring Available actions  [[1 0]
 [1 2]
 [0 4]
 [3 2]
 [3 1]
 [0 0]]  Selected action  [0 4]
episode: 70   score: -139.0   memory length: 4096   epsilon: 0.8051152105586383
Exploring Available actions  [[2 4]
 [0 3]
 [3 4]
 [2 0]
 [1 0]
 [4 1]
 [4 2]
 [0 2]
 [0 0]]  Selected action  [1 0]
episode: 70   score: -123.0   memory length: 4096   epsilon: 0.8051152105586383
Exploring Available actions  [[4 2]
 [0 2]
 [0 3]
 [4 1]
 [0 0]]  Selected action  [4 1]
episode: 70   score: -128.0   memory len

episode: 71   score: -92.0   memory length: 4096   epsilon: 0.8025898354548281
(1, 21) [0. 0.] Index  0 0.0
Exploiting Available actions  [[3 0]
 [0 0]]  Selected action  [3 0]
episode: 71   score: -88.0   memory length: 4096   epsilon: 0.8025898354548281
Exploring Available actions  [[0 2]
 [3 0]
 [3 1]
 [2 0]
 [0 0]]  Selected action  [3 0]
episode: 71   score: -92.0   memory length: 4096   epsilon: 0.8025898354548281
Exploring Available actions  [[0 4]
 [2 1]
 [1 2]
 [4 2]
 [0 1]
 [0 0]]  Selected action  [4 2]
episode: 71   score: -97.0   memory length: 4096   epsilon: 0.8025898354548281
Exploring Available actions  [[4 2]
 [4 0]
 [3 0]
 [1 2]
 [0 0]]  Selected action  [0 0]
episode: 71   score: -102.0   memory length: 4096   epsilon: 0.8025898354548281
Exploring Available actions  [[0 1]
 [0 3]
 [4 3]
 [4 2]
 [0 0]]  Selected action  [0 3]
episode: 71   score: -103.0   memory length: 4096   epsilon: 0.8025898354548281
Exploring Available actions  [[2 4]
 [0 2]
 [0 3]
 [1 3]
 [4 0]

episode: 71   score: -171.0   memory length: 4096   epsilon: 0.8025898354548281
Exploring Available actions  [[0 3]
 [3 0]
 [0 4]
 [2 4]
 [0 0]]  Selected action  [0 0]
episode: 71   score: -176.0   memory length: 4096   epsilon: 0.8025898354548281
Exploring Available actions  [[0 2]
 [0 0]]  Selected action  [0 2]
episode: 71   score: -179.0   memory length: 4096   epsilon: 0.8025898354548281
Exploring Available actions  [[1 3]
 [0 2]
 [0 0]]  Selected action  [0 2]
episode: 71   score: -201.0   memory length: 4096   epsilon: 0.8025898354548281
Exploring Available actions  [[4 2]
 [0 3]
 [0 4]
 [3 2]
 [0 0]]  Selected action  [3 2]
episode: 71   score: -231.0   memory length: 4096   epsilon: 0.8025898354548281
Exploring Available actions  [[3 1]
 [1 4]
 [2 3]
 [0 4]
 [0 3]
 [0 0]]  Selected action  [3 1]
episode: 71   score: -227.0   memory length: 4096   epsilon: 0.8025898354548281
Exploring Available actions  [[2 0]
 [2 4]
 [0 3]
 [3 0]
 [4 2]
 [4 3]
 [0 1]
 [4 0]
 [4 1]
 [0 0]]  Se

episode: 71   score: -216.0   memory length: 4096   epsilon: 0.8025898354548281
Exploring Available actions  [[4 3]
 [3 2]
 [2 3]
 [1 2]
 [0 2]
 [2 4]
 [3 1]
 [0 3]
 [1 4]
 [0 4]
 [4 1]
 [4 2]
 [3 4]
 [0 1]
 [0 0]]  Selected action  [3 4]
episode: 71   score: -218.0   memory length: 4096   epsilon: 0.8025898354548281
Exploring Available actions  [[4 3]
 [0 4]
 [1 4]
 [0 1]
 [3 1]
 [3 0]
 [4 2]
 [0 0]]  Selected action  [3 0]
episode: 71   score: -224.0   memory length: 4096   epsilon: 0.8025898354548281
(1, 21) [0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[2 4]
 [0 4]
 [0 0]]  Selected action  [2 4]
episode: 71   score: -245.0   memory length: 4096   epsilon: 0.8025898354548281
Exploring Available actions  [[3 2]
 [0 3]
 [1 0]
 [3 4]
 [0 2]
 [3 1]
 [1 2]
 [4 3]
 [0 0]]  Selected action  [0 3]
episode: 71   score: -228.0   memory length: 4096   epsilon: 0.8025898354548281
Exploring Available actions  [[4 3]
 [1 4]
 [4 0]
 [2 0]
 [3 2]
 [1 2]
 [3 1]
 [1 0]
 [0 4]
 [0 3]
 [0 0]]

episode: 72   score: -234.0   memory length: 4096   epsilon: 0.8000723816017052
(1, 21) [0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[3 4]
 [1 3]
 [1 0]
 [4 3]
 [0 0]]  Selected action  [3 4]
episode: 72   score: -240.0   memory length: 4096   epsilon: 0.8000723816017052
Exploring Available actions  [[0 3]
 [4 1]
 [2 0]
 [4 2]
 [1 4]
 [0 0]]  Selected action  [1 4]
episode: 72   score: -243.0   memory length: 4096   epsilon: 0.8000723816017052
Exploring Available actions  [[1 0]
 [0 3]
 [2 1]
 [4 2]
 [3 1]
 [4 1]
 [1 3]
 [4 0]
 [1 4]
 [3 4]
 [4 3]
 [0 0]]  Selected action  [2 1]
episode: 72   score: -279.0   memory length: 4096   epsilon: 0.8000723816017052
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[4 0]
 [2 1]
 [2 0]
 [3 1]
 [3 4]
 [1 0]
 [4 3]
 [4 1]
 [4 2]
 [0 0]]  Selected action  [4 0]
episode: 72   score: -277.0   memory length: 4096   epsilon: 0.8000723816017052
Exploring Available actions  [[2 0]
 [4 2]
 [3 2]
 [0 0]]  S

episode: 72   score: -314.0   memory length: 4096   epsilon: 0.8000723816017052
Exploring Available actions  [[1 0]
 [0 3]
 [0 4]
 [1 4]
 [2 0]
 [4 0]
 [0 0]]  Selected action  [1 0]
episode: 72   score: -306.0   memory length: 4096   epsilon: 0.8000723816017052
Exploring Available actions  [[1 3]
 [0 0]]  Selected action  [1 3]
episode: 72   score: -316.0   memory length: 4096   epsilon: 0.8000723816017052
Exploring Available actions  [[3 2]
 [4 3]
 [0 1]
 [4 1]
 [0 0]]  Selected action  [3 2]
episode: 72   score: -308.0   memory length: 4096   epsilon: 0.8000723816017052
Exploring Available actions  [[1 4]
 [2 0]
 [4 0]
 [0 3]
 [1 2]
 [1 3]
 [0 0]]  Selected action  [2 0]
episode: 72   score: -288.0   memory length: 4096   epsilon: 0.8000723816017052
Exploring Available actions  [[0 2]
 [0 0]]  Selected action  [0 0]
episode: 72   score: -293.0   memory length: 4096   epsilon: 0.8000723816017052
Exploring Available actions  [[4 0]
 [0 0]]  Selected action  [4 0]
episode: 72   score: 

episode: 72   score: -312.0   memory length: 4096   epsilon: 0.8000723816017052
Exploring Available actions  [[1 0]
 [0 0]]  Selected action  [0 0]
episode: 72   score: -317.0   memory length: 4096   epsilon: 0.8000723816017052
Exploring Available actions  [[4 1]
 [1 0]
 [2 1]
 [1 2]
 [0 0]]  Selected action  [2 1]
episode: 72   score: -323.0   memory length: 4096   epsilon: 0.8000723816017052
Episode : 73
(1, 21) [0. 0.] Index  0 0.0
Exploiting Available actions  [[1 2]
 [0 0]]  Selected action  [1 2]
episode: 73   score: 25.0   memory length: 4096   epsilon: 0.7975628241529755
Exploring Available actions  [[0 3]
 [4 1]
 [1 4]
 [0 1]
 [3 1]
 [0 0]]  Selected action  [0 0]
episode: 73   score: 20.0   memory length: 4096   epsilon: 0.7975628241529755
Exploring Available actions  [[3 1]
 [2 1]
 [4 2]
 [2 0]
 [1 3]
 [0 0]]  Selected action  [2 0]
episode: 73   score: 48.0   memory length: 4096   epsilon: 0.7975628241529755
Exploring Available actions  [[0 1]
 [0 0]]  Selected action  [0 1

episode: 73   score: 123.0   memory length: 4096   epsilon: 0.7975628241529755
Exploring Available actions  [[4 0]
 [1 3]
 [0 0]]  Selected action  [4 0]
episode: 73   score: 141.0   memory length: 4096   epsilon: 0.7975628241529755
Exploring Available actions  [[3 2]
 [0 0]]  Selected action  [0 0]
episode: 73   score: 136.0   memory length: 4096   epsilon: 0.7975628241529755
(1, 21) [0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[4 0]
 [0 2]
 [0 0]]  Selected action  [4 0]
episode: 73   score: 136.0   memory length: 4096   epsilon: 0.7975628241529755
Exploring Available actions  [[2 1]
 [0 0]]  Selected action  [2 1]
episode: 73   score: 149.0   memory length: 4096   epsilon: 0.7975628241529755
Exploring Available actions  [[0 1]
 [2 3]
 [3 2]
 [2 1]
 [4 3]
 [1 3]
 [0 4]
 [3 0]
 [4 1]
 [3 4]
 [4 0]
 [1 4]
 [2 4]
 [1 2]
 [0 0]]  Selected action  [2 4]
episode: 73   score: 148.0   memory length: 4096   epsilon: 0.7975628241529755
Exploring Available actions  [[2 3]
 [0 3]
 [2 0

episode: 73   score: -23.0   memory length: 4096   epsilon: 0.7975628241529755
Exploring Available actions  [[1 4]
 [4 1]
 [0 0]]  Selected action  [0 0]
episode: 73   score: -28.0   memory length: 4096   epsilon: 0.7975628241529755
Exploring Available actions  [[0 3]
 [0 0]]  Selected action  [0 3]
episode: 73   score: -30.0   memory length: 4096   epsilon: 0.7975628241529755
Exploring Available actions  [[1 4]
 [3 4]
 [3 2]
 [0 0]]  Selected action  [0 0]
episode: 73   score: -35.0   memory length: 4096   epsilon: 0.7975628241529755
Exploring Available actions  [[4 2]
 [1 3]
 [2 4]
 [0 3]
 [2 3]
 [4 1]
 [0 1]
 [4 3]
 [1 4]
 [0 2]
 [2 1]
 [3 4]
 [0 0]]  Selected action  [4 1]
episode: 73   score: -76.0   memory length: 4096   epsilon: 0.7975628241529755
Exploring Available actions  [[0 4]
 [1 3]
 [0 3]
 [4 0]
 [0 1]
 [2 0]
 [2 3]
 [3 2]
 [1 2]
 [0 0]]  Selected action  [1 2]
episode: 73   score: -52.0   memory length: 4096   epsilon: 0.7975628241529755
(1, 21) [0. 0. 0.] Index  0 0.0


episode: 74   score: 13.0   memory length: 4096   epsilon: 0.7950611383402794
Exploring Available actions  [[1 0]
 [1 2]
 [4 0]
 [3 4]
 [4 1]
 [0 3]
 [4 2]
 [1 4]
 [1 3]
 [0 4]
 [0 2]
 [2 1]
 [2 4]
 [3 1]
 [0 0]]  Selected action  [1 0]
episode: 74   score: 49.0   memory length: 4096   epsilon: 0.7950611383402794
Exploring Available actions  [[2 4]
 [0 0]]  Selected action  [0 0]
episode: 74   score: 44.0   memory length: 4096   epsilon: 0.7950611383402794
Exploring Available actions  [[3 2]
 [0 0]]  Selected action  [0 0]
episode: 74   score: 39.0   memory length: 4096   epsilon: 0.7950611383402794
Exploring Available actions  [[4 1]
 [0 0]]  Selected action  [0 0]
episode: 74   score: 34.0   memory length: 4096   epsilon: 0.7950611383402794
Exploring Available actions  [[3 2]
 [0 4]
 [0 0]]  Selected action  [3 2]
episode: 74   score: 60.0   memory length: 4096   epsilon: 0.7950611383402794
Exploring Available actions  [[2 0]
 [4 1]
 [3 1]
 [1 3]
 [4 3]
 [2 4]
 [0 0]]  Selected actio

episode: 74   score: -39.0   memory length: 4096   epsilon: 0.7950611383402794
Exploring Available actions  [[1 0]
 [4 1]
 [0 4]
 [3 2]
 [0 1]
 [2 3]
 [2 0]
 [4 2]
 [3 4]
 [1 2]
 [3 1]
 [4 0]
 [0 0]]  Selected action  [0 1]
episode: 74   score: -74.0   memory length: 4096   epsilon: 0.7950611383402794
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[0 3]
 [3 2]
 [4 1]
 [0 4]
 [4 2]
 [2 1]
 [4 0]
 [3 0]
 [4 3]
 [0 0]]  Selected action  [0 3]
episode: 74   score: -58.0   memory length: 4096   epsilon: 0.7950611383402794
Exploring Available actions  [[1 3]
 [2 1]
 [0 1]
 [0 4]
 [2 4]
 [4 1]
 [1 2]
 [2 0]
 [2 3]
 [0 0]]  Selected action  [0 0]
episode: 74   score: -63.0   memory length: 4096   epsilon: 0.7950611383402794
Exploring Available actions  [[1 4]
 [3 0]
 [4 0]
 [0 1]
 [1 0]
 [3 4]
 [2 4]
 [0 0]]  Selected action  [1 0]
episode: 74   score: -63.0   memory length: 4096   epsilon: 0.7950611383402794
Exploring Available actions  [[0 2]
 [0 0]]  Sel

episode: 74   score: 13.0   memory length: 4096   epsilon: 0.7950611383402794
Exploring Available actions  [[3 0]
 [2 4]
 [0 4]
 [0 0]]  Selected action  [0 0]
episode: 74   score: 8.0   memory length: 4096   epsilon: 0.7950611383402794
(1, 21) [0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[1 0]
 [3 4]
 [0 3]
 [0 0]]  Selected action  [1 0]
episode: 74   score: 27.0   memory length: 4096   epsilon: 0.7950611383402794
Exploring Available actions  [[3 2]
 [0 0]]  Selected action  [3 2]
episode: 74   score: 33.0   memory length: 4096   epsilon: 0.7950611383402794
Exploring Available actions  [[0 1]
 [2 3]
 [0 0]]  Selected action  [0 1]
episode: 74   score: 46.0   memory length: 4096   epsilon: 0.7950611383402794
Exploring Available actions  [[4 3]
 [1 3]
 [2 3]
 [2 4]
 [3 4]
 [0 2]
 [3 2]
 [0 3]
 [1 2]
 [0 4]
 [4 0]
 [2 1]
 [4 2]
 [1 4]
 [3 0]
 [0 0]]  Selected action  [1 4]
episode: 74   score: 46.0   memory length: 4096   epsilon: 0.7950611383402794
(1, 21) [0. 0. 0. 0. 0. 

episode: 75   score: 24.0   memory length: 4096   epsilon: 0.7925672994729471
Exploring Available actions  [[1 4]
 [3 2]
 [1 2]
 [3 1]
 [2 3]
 [1 0]
 [0 1]
 [0 3]
 [1 3]
 [4 2]
 [2 0]
 [0 0]]  Selected action  [4 2]
episode: 75   score: 17.0   memory length: 4096   epsilon: 0.7925672994729471
Exploring Available actions  [[2 4]
 [0 0]]  Selected action  [0 0]
episode: 75   score: 12.0   memory length: 4096   epsilon: 0.7925672994729471
Exploring Available actions  [[2 1]
 [2 3]
 [0 4]
 [0 0]]  Selected action  [0 4]
episode: 75   score: 26.0   memory length: 4096   epsilon: 0.7925672994729471
(1, 21) [0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[4 0]
 [4 3]
 [4 2]
 [3 1]
 [2 0]
 [0 0]]  Selected action  [4 0]
episode: 75   score: 30.0   memory length: 4096   epsilon: 0.7925672994729471
(1, 21) [0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[2 0]
 [1 0]
 [0 0]]  Selected action  [2 0]
episode: 75   score: 24.0   memory length: 4096   epsilon: 0.792567299472947

(1, 21) [0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[2 4]
 [4 3]
 [0 4]
 [1 3]
 [0 0]]  Selected action  [2 4]
episode: 75   score: 40.0   memory length: 4096   epsilon: 0.7925672994729471
Exploring Available actions  [[1 2]
 [2 3]
 [0 1]
 [2 0]
 [4 2]
 [0 3]
 [1 3]
 [3 0]
 [2 1]
 [0 0]]  Selected action  [2 3]
episode: 75   score: 72.0   memory length: 4096   epsilon: 0.7925672994729471
Exploring Available actions  [[1 0]
 [2 1]
 [0 2]
 [1 3]
 [1 2]
 [1 4]
 [0 0]]  Selected action  [2 1]
episode: 75   score: 82.0   memory length: 4096   epsilon: 0.7925672994729471
Exploring Available actions  [[3 0]
 [4 1]
 [3 2]
 [1 4]
 [0 1]
 [1 0]
 [4 2]
 [4 3]
 [2 4]
 [3 1]
 [1 2]
 [1 3]
 [2 1]
 [0 3]
 [0 0]]  Selected action  [0 3]
episode: 75   score: 84.0   memory length: 4096   epsilon: 0.7925672994729471
(1, 21) [ 0.       0.      59.79658  0.       0.       0.       0.       0.     ] Index  2 59.79658
Exploiting Available actions  [[2 3]
 [4 2]
 [2 0]
 [0 4]
 [3 1]
 [4 3]
 [

episode: 75   score: 173.0   memory length: 4096   epsilon: 0.7925672994729471
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[0 3]
 [4 2]
 [1 0]
 [3 2]
 [3 0]
 [3 1]
 [0 2]
 [0 1]
 [4 3]
 [3 4]
 [2 4]
 [0 0]]  Selected action  [0 3]
episode: 75   score: 136.0   memory length: 4096   epsilon: 0.7925672994729471
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[3 1]
 [0 3]
 [0 2]
 [3 0]
 [4 2]
 [2 3]
 [3 2]
 [3 4]
 [0 1]
 [1 4]
 [0 4]
 [0 0]]  Selected action  [3 1]
episode: 75   score: 140.0   memory length: 4096   epsilon: 0.7925672994729471
Exploring Available actions  [[3 0]
 [2 3]
 [2 1]
 [3 1]
 [0 4]
 [4 3]
 [0 1]
 [1 4]
 [1 0]
 [0 3]
 [0 0]]  Selected action  [0 0]
episode: 75   score: 135.0   memory length: 4096   epsilon: 0.7925672994729471
Exploring Available actions  [[0 4]
 [0 1]
 [2 0]
 [2 4]
 [0 3]
 [4 1]
 [3 0]
 [3 4]
 [2 1]
 [1 3]
 [0 2]
 [1 2]
 [4 3]
 [3 2]
 [0 0]]  Selected action  [0 2

episode: 76   score: -28.0   memory length: 4096   epsilon: 0.7900812829377555
Exploring Available actions  [[0 2]
 [0 0]]  Selected action  [0 0]
episode: 76   score: -33.0   memory length: 4096   epsilon: 0.7900812829377555
(1, 21) [0. 0.] Index  0 0.0
Exploiting Available actions  [[1 4]
 [0 0]]  Selected action  [1 4]
episode: 76   score: -68.0   memory length: 4096   epsilon: 0.7900812829377555
Exploring Available actions  [[1 0]
 [4 1]
 [4 0]
 [3 4]
 [1 2]
 [0 0]]  Selected action  [1 0]
episode: 76   score: -40.0   memory length: 4096   epsilon: 0.7900812829377555
Exploring Available actions  [[2 4]
 [1 2]
 [1 0]
 [0 0]]  Selected action  [1 0]
episode: 76   score: -43.0   memory length: 4096   epsilon: 0.7900812829377555
Exploring Available actions  [[4 0]
 [0 0]]  Selected action  [0 0]
episode: 76   score: -48.0   memory length: 4096   epsilon: 0.7900812829377555
Exploring Available actions  [[2 1]
 [0 1]
 [3 2]
 [0 0]]  Selected action  [2 1]
episode: 76   score: -57.0   mem

episode: 76   score: -183.0   memory length: 4096   epsilon: 0.7900812829377555
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[0 4]
 [4 2]
 [0 1]
 [0 2]
 [3 1]
 [2 1]
 [1 2]
 [0 3]
 [2 0]
 [0 0]]  Selected action  [0 4]
episode: 76   score: -194.0   memory length: 4096   epsilon: 0.7900812829377555
Exploring Available actions  [[4 1]
 [3 1]
 [1 2]
 [1 4]
 [0 2]
 [1 0]
 [3 4]
 [2 3]
 [4 0]
 [0 4]
 [4 3]
 [0 0]]  Selected action  [1 4]
episode: 76   score: -198.0   memory length: 4096   epsilon: 0.7900812829377555
Exploring Available actions  [[0 2]
 [4 2]
 [4 1]
 [3 0]
 [1 2]
 [4 3]
 [2 0]
 [3 2]
 [2 3]
 [0 0]]  Selected action  [4 1]
episode: 76   score: -186.0   memory length: 4096   epsilon: 0.7900812829377555
Exploring Available actions  [[4 2]
 [4 3]
 [1 3]
 [4 1]
 [3 0]
 [1 4]
 [4 0]
 [0 2]
 [0 4]
 [3 4]
 [0 0]]  Selected action  [3 4]
episode: 76   score: -155.0   memory length: 4096   epsilon: 0.7900812829377555
Exploring Available actions  

episode: 76   score: -64.0   memory length: 4096   epsilon: 0.7900812829377555
Exploring Available actions  [[2 4]
 [4 2]
 [4 0]
 [3 2]
 [2 0]
 [4 3]
 [1 3]
 [3 1]
 [0 0]]  Selected action  [3 1]
episode: 76   score: -72.0   memory length: 4096   epsilon: 0.7900812829377555
Exploring Available actions  [[3 2]
 [3 0]
 [2 1]
 [3 1]
 [0 4]
 [2 4]
 [0 0]]  Selected action  [0 0]
episode: 76   score: -77.0   memory length: 4096   epsilon: 0.7900812829377555
Exploring Available actions  [[4 3]
 [1 2]
 [0 2]
 [1 4]
 [3 2]
 [0 1]
 [4 0]
 [4 2]
 [3 1]
 [2 1]
 [1 3]
 [0 4]
 [3 0]
 [4 1]
 [3 4]
 [0 0]]  Selected action  [4 3]
episode: 76   score: -74.0   memory length: 4096   epsilon: 0.7900812829377555
Exploring Available actions  [[0 1]
 [3 1]
 [4 0]
 [0 2]
 [4 1]
 [1 2]
 [2 0]
 [0 0]]  Selected action  [0 1]
episode: 76   score: -74.0   memory length: 4096   epsilon: 0.7900812829377555
Exploring Available actions  [[0 2]
 [4 1]
 [2 0]
 [3 0]
 [4 0]
 [4 3]
 [0 3]
 [1 0]
 [3 1]
 [0 0]]  Selected

episode: 77   score: -52.0   memory length: 4096   epsilon: 0.7876030641986846
Exploring Available actions  [[1 4]
 [4 0]
 [0 4]
 [3 1]
 [2 0]
 [0 0]]  Selected action  [0 0]
episode: 77   score: -57.0   memory length: 4096   epsilon: 0.7876030641986846
Exploring Available actions  [[4 2]
 [1 0]
 [0 0]]  Selected action  [0 0]
episode: 77   score: -62.0   memory length: 4096   epsilon: 0.7876030641986846
Exploring Available actions  [[1 0]
 [4 1]
 [0 0]]  Selected action  [1 0]
episode: 77   score: -55.0   memory length: 4096   epsilon: 0.7876030641986846
Exploring Available actions  [[1 0]
 [1 2]
 [0 0]]  Selected action  [1 0]
episode: 77   score: -42.0   memory length: 4096   epsilon: 0.7876030641986846
Exploring Available actions  [[1 3]
 [2 4]
 [3 4]
 [0 0]]  Selected action  [0 0]
episode: 77   score: -47.0   memory length: 4096   epsilon: 0.7876030641986846
Exploring Available actions  [[2 1]
 [2 4]
 [0 3]
 [0 0]]  Selected action  [2 1]
episode: 77   score: -38.0   memory lengt

episode: 77   score: 21.0   memory length: 4096   epsilon: 0.7876030641986846
(1, 21) [0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[0 4]
 [1 3]
 [0 2]
 [4 1]
 [3 4]
 [0 0]]  Selected action  [0 4]
episode: 77   score: 30.0   memory length: 4096   epsilon: 0.7876030641986846
Exploring Available actions  [[4 0]
 [0 2]
 [2 4]
 [2 1]
 [0 3]
 [0 4]
 [0 1]
 [1 4]
 [0 0]]  Selected action  [2 1]
episode: 77   score: 53.0   memory length: 4096   epsilon: 0.7876030641986846
Exploring Available actions  [[4 0]
 [0 4]
 [0 1]
 [2 0]
 [1 3]
 [4 1]
 [0 0]]  Selected action  [0 1]
episode: 77   score: 50.0   memory length: 4096   epsilon: 0.7876030641986846
Exploring Available actions  [[1 0]
 [4 1]
 [4 2]
 [1 4]
 [0 2]
 [1 3]
 [1 2]
 [3 2]
 [2 3]
 [0 4]
 [4 3]
 [0 0]]  Selected action  [2 3]
episode: 77   score: 44.0   memory length: 4096   epsilon: 0.7876030641986846
Exploring Available actions  [[4 0]
 [2 3]
 [4 1]
 [2 0]
 [3 2]
 [3 0]
 [1 2]
 [1 4]
 [3 4]
 [0 0]]  Selected acti

episode: 78   score: -113.0   memory length: 4096   epsilon: 0.7851326187966758
Exploring Available actions  [[3 0]
 [0 0]]  Selected action  [0 0]
episode: 78   score: -118.0   memory length: 4096   epsilon: 0.7851326187966758
Exploring Available actions  [[2 0]
 [0 0]]  Selected action  [2 0]
episode: 78   score: -115.0   memory length: 4096   epsilon: 0.7851326187966758
Exploring Available actions  [[0 3]
 [2 0]
 [0 0]]  Selected action  [0 0]
episode: 78   score: -120.0   memory length: 4096   epsilon: 0.7851326187966758
Exploring Available actions  [[0 2]
 [0 1]
 [0 0]]  Selected action  [0 0]
episode: 78   score: -125.0   memory length: 4096   epsilon: 0.7851326187966758
Exploring Available actions  [[0 4]
 [0 0]]  Selected action  [0 0]
episode: 78   score: -130.0   memory length: 4096   epsilon: 0.7851326187966758
(1, 21) [0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[1 4]
 [1 2]
 [0 0]]  Selected action  [1 4]
episode: 78   score: -152.0   memory length: 4096   epsilo

episode: 78   score: -285.0   memory length: 4096   epsilon: 0.7851326187966758
Exploring Available actions  [[2 1]
 [0 3]
 [3 0]
 [1 3]
 [4 0]
 [0 2]
 [4 3]
 [4 2]
 [3 4]
 [4 1]
 [2 4]
 [1 0]
 [0 1]
 [2 3]
 [1 2]
 [0 0]]  Selected action  [3 4]
episode: 78   score: -266.0   memory length: 4096   epsilon: 0.7851326187966758
Exploring Available actions  [[4 3]
 [0 2]
 [1 4]
 [2 3]
 [4 1]
 [0 3]
 [1 2]
 [2 1]
 [0 1]
 [2 0]
 [0 0]]  Selected action  [0 1]
episode: 78   score: -239.0   memory length: 4096   epsilon: 0.7851326187966758
Exploring Available actions  [[2 4]
 [3 4]
 [3 0]
 [4 1]
 [2 1]
 [2 0]
 [1 4]
 [2 3]
 [0 0]]  Selected action  [3 0]
episode: 78   score: -232.0   memory length: 4096   epsilon: 0.7851326187966758
Exploring Available actions  [[4 1]
 [0 0]]  Selected action  [4 1]
episode: 78   score: -247.0   memory length: 4096   epsilon: 0.7851326187966758
Exploring Available actions  [[2 1]
 [4 1]
 [3 1]
 [1 2]
 [1 4]
 [3 0]
 [4 2]
 [2 3]
 [0 1]
 [2 4]
 [0 0]]  Selected a

episode: 78   score: -186.0   memory length: 4096   epsilon: 0.7851326187966758
Exploring Available actions  [[3 2]
 [3 4]
 [4 3]
 [3 1]
 [1 0]
 [0 2]
 [1 4]
 [0 0]]  Selected action  [4 3]
episode: 78   score: -150.0   memory length: 4096   epsilon: 0.7851326187966758
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[4 0]
 [3 2]
 [2 4]
 [0 1]
 [0 2]
 [2 3]
 [4 3]
 [4 1]
 [3 0]
 [1 3]
 [3 1]
 [0 4]
 [4 2]
 [0 0]]  Selected action  [4 0]
episode: 78   score: -171.0   memory length: 4096   epsilon: 0.7851326187966758
Exploring Available actions  [[4 2]
 [0 4]
 [2 1]
 [0 0]]  Selected action  [0 0]
episode: 78   score: -176.0   memory length: 4096   epsilon: 0.7851326187966758
(1, 21) [0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[1 4]
 [2 0]
 [0 0]]  Selected action  [1 4]
episode: 78   score: -211.0   memory length: 4096   epsilon: 0.7851326187966758
Exploring Available actions  [[3 1]
 [0 3]
 [3 4]
 [1 4]
 [0 0]]  Selected action 

episode: 79   score: -96.0   memory length: 4096   epsilon: 0.7826699223493903
Exploring Available actions  [[1 4]
 [0 0]]  Selected action  [0 0]
episode: 79   score: -101.0   memory length: 4096   epsilon: 0.7826699223493903
Exploring Available actions  [[3 4]
 [0 0]]  Selected action  [3 4]
episode: 79   score: -117.0   memory length: 4096   epsilon: 0.7826699223493903
Exploring Available actions  [[0 3]
 [2 3]
 [4 0]
 [0 2]
 [3 2]
 [0 4]
 [4 2]
 [1 4]
 [2 4]
 [0 1]
 [1 3]
 [3 0]
 [4 3]
 [0 0]]  Selected action  [1 4]
episode: 79   score: -117.0   memory length: 4096   epsilon: 0.7826699223493903
Exploring Available actions  [[4 1]
 [2 1]
 [3 2]
 [2 3]
 [0 2]
 [4 0]
 [0 4]
 [4 2]
 [1 0]
 [0 0]]  Selected action  [4 0]
episode: 79   score: -113.0   memory length: 4096   epsilon: 0.7826699223493903
Exploring Available actions  [[3 0]
 [0 0]]  Selected action  [0 0]
episode: 79   score: -118.0   memory length: 4096   epsilon: 0.7826699223493903
Exploring Available actions  [[3 4]
 [0 0

episode: 79   score: -99.0   memory length: 4096   epsilon: 0.7826699223493903
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[1 2]
 [2 0]
 [0 4]
 [3 1]
 [4 0]
 [0 1]
 [2 4]
 [1 3]
 [0 0]]  Selected action  [1 2]
episode: 79   score: -76.0   memory length: 4096   epsilon: 0.7826699223493903
(1, 21) [0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[4 3]
 [4 1]
 [2 3]
 [2 4]
 [1 0]
 [0 0]]  Selected action  [4 3]
episode: 79   score: -78.0   memory length: 4096   epsilon: 0.7826699223493903
Exploring Available actions  [[3 4]
 [3 0]
 [0 2]
 [2 4]
 [1 4]
 [2 1]
 [0 1]
 [1 3]
 [0 0]]  Selected action  [2 1]
episode: 79   score: -63.0   memory length: 4096   epsilon: 0.7826699223493903
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[0 2]
 [1 0]
 [2 4]
 [4 2]
 [1 3]
 [4 1]
 [0 4]
 [2 0]
 [0 3]
 [0 0]]  Selected action  [0 2]
episode: 79   score: -53.0   memory length: 4096   epsilon: 0.7826699223493903
Expl

episode: 79   score: -41.0   memory length: 4096   epsilon: 0.7826699223493903
Exploring Available actions  [[1 3]
 [3 4]
 [4 2]
 [4 1]
 [2 3]
 [4 0]
 [0 0]]  Selected action  [4 2]
episode: 79   score: -41.0   memory length: 4096   epsilon: 0.7826699223493903
Exploring Available actions  [[3 2]
 [2 1]
 [0 0]]  Selected action  [0 0]
episode: 79   score: -46.0   memory length: 4096   epsilon: 0.7826699223493903
Exploring Available actions  [[2 4]
 [1 0]
 [1 2]
 [2 3]
 [0 3]
 [0 0]]  Selected action  [2 3]
episode: 79   score: -14.0   memory length: 4096   epsilon: 0.7826699223493903
Exploring Available actions  [[0 4]
 [1 0]
 [3 2]
 [1 2]
 [2 0]
 [3 0]
 [0 0]]  Selected action  [2 0]
episode: 79   score: -11.0   memory length: 4096   epsilon: 0.7826699223493903
Exploring Available actions  [[4 1]
 [4 0]
 [0 0]]  Selected action  [0 0]
episode: 79   score: -16.0   memory length: 4096   epsilon: 0.7826699223493903
(1, 21) [0. 0.] Index  0 0.0
Exploiting Available actions  [[4 1]
 [0 0]] 

episode: 80   score: 37.0   memory length: 4096   epsilon: 0.7802149505509681
Exploring Available actions  [[3 1]
 [1 2]
 [1 3]
 [0 0]]  Selected action  [0 0]
episode: 80   score: 32.0   memory length: 4096   epsilon: 0.7802149505509681
Exploring Available actions  [[0 2]
 [1 0]
 [0 0]]  Selected action  [0 0]
episode: 80   score: 27.0   memory length: 4096   epsilon: 0.7802149505509681
(1, 21) [0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[1 0]
 [1 2]
 [0 0]]  Selected action  [1 0]
episode: 80   score: 25.0   memory length: 4096   epsilon: 0.7802149505509681
Exploring Available actions  [[1 2]
 [2 0]
 [3 2]
 [2 3]
 [0 0]]  Selected action  [2 0]
episode: 80   score: 22.0   memory length: 4096   epsilon: 0.7802149505509681
(1, 21) [0. 0.] Index  0 0.0
Exploiting Available actions  [[4 0]
 [0 0]]  Selected action  [4 0]
episode: 80   score: 22.0   memory length: 4096   epsilon: 0.7802149505509681
Exploring Available actions  [[3 0]
 [4 2]
 [4 3]
 [0 0]]  Selected action  [4 3

episode: 80   score: -54.0   memory length: 4096   epsilon: 0.7802149505509681
Exploring Available actions  [[0 4]
 [3 2]
 [3 1]
 [2 4]
 [1 4]
 [3 0]
 [0 3]
 [1 2]
 [0 1]
 [4 1]
 [0 0]]  Selected action  [0 3]
episode: 80   score: -35.0   memory length: 4096   epsilon: 0.7802149505509681
Exploring Available actions  [[0 1]
 [2 3]
 [1 2]
 [3 1]
 [0 4]
 [2 0]
 [4 1]
 [4 2]
 [1 4]
 [4 3]
 [3 0]
 [0 3]
 [1 3]
 [0 2]
 [0 0]]  Selected action  [1 4]
episode: 80   score: -37.0   memory length: 4096   epsilon: 0.7802149505509681
Exploring Available actions  [[2 4]
 [0 1]
 [3 1]
 [1 2]
 [4 3]
 [1 3]
 [0 0]]  Selected action  [0 1]
episode: 80   score: -14.0   memory length: 4096   epsilon: 0.7802149505509681
Exploring Available actions  [[3 4]
 [4 2]
 [3 0]
 [1 0]
 [0 1]
 [1 4]
 [2 4]
 [2 1]
 [0 0]]  Selected action  [0 1]
episode: 80   score: -17.0   memory length: 4096   epsilon: 0.7802149505509681
Exploring Available actions  [[3 1]
 [2 1]
 [3 0]
 [1 2]
 [0 3]
 [2 3]
 [0 4]
 [2 0]
 [4 3]
 [4

episode: 80   score: -26.0   memory length: 4096   epsilon: 0.7802149505509681
Exploring Available actions  [[2 4]
 [1 0]
 [0 0]]  Selected action  [1 0]
episode: 80   score: -29.0   memory length: 4096   epsilon: 0.7802149505509681
Exploring Available actions  [[0 1]
 [0 0]]  Selected action  [0 1]
episode: 80   score: -17.0   memory length: 4096   epsilon: 0.7802149505509681
Exploring Available actions  [[2 1]
 [4 0]
 [2 3]
 [4 1]
 [0 4]
 [3 2]
 [3 4]
 [2 0]
 [2 4]
 [3 1]
 [1 2]
 [3 0]
 [0 0]]  Selected action  [4 0]
episode: 80   score: -5.0   memory length: 4096   epsilon: 0.7802149505509681
Exploring Available actions  [[3 4]
 [0 0]]  Selected action  [0 0]
episode: 80   score: -10.0   memory length: 4096   epsilon: 0.7802149505509681
Exploring Available actions  [[3 2]
 [0 1]
 [4 3]
 [0 0]]  Selected action  [4 3]
episode: 80   score: -17.0   memory length: 4096   epsilon: 0.7802149505509681
Exploring Available actions  [[4 3]
 [4 2]
 [0 0]]  Selected action  [0 0]
episode: 80   

episode: 81   score: 79.0   memory length: 4096   epsilon: 0.777767679171789
Exploring Available actions  [[1 3]
 [3 4]
 [3 1]
 [1 2]
 [3 2]
 [1 4]
 [4 1]
 [1 0]
 [2 1]
 [0 4]
 [2 4]
 [4 3]
 [0 2]
 [4 0]
 [3 0]
 [0 0]]  Selected action  [0 0]
episode: 81   score: 74.0   memory length: 4096   epsilon: 0.777767679171789
Exploring Available actions  [[3 4]
 [4 3]
 [2 1]
 [0 0]]  Selected action  [2 1]
episode: 81   score: 38.0   memory length: 4096   epsilon: 0.777767679171789
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[2 0]
 [1 3]
 [3 0]
 [2 1]
 [4 1]
 [3 2]
 [4 2]
 [0 2]
 [1 0]
 [1 4]
 [0 3]
 [0 0]]  Selected action  [2 0]
episode: 81   score: 45.0   memory length: 4096   epsilon: 0.777767679171789
Exploring Available actions  [[0 2]
 [3 1]
 [0 0]]  Selected action  [3 1]
episode: 81   score: 52.0   memory length: 4096   epsilon: 0.777767679171789
Exploring Available actions  [[3 2]
 [2 1]
 [1 4]
 [4 0]
 [3 0]
 [1 2]
 [0 1]
 [4 2]
 [4 1]
 [

episode: 81   score: 54.0   memory length: 4096   epsilon: 0.777767679171789
Exploring Available actions  [[1 2]
 [2 3]
 [4 2]
 [0 3]
 [4 0]
 [2 4]
 [3 2]
 [0 0]]  Selected action  [0 3]
episode: 81   score: 48.0   memory length: 4096   epsilon: 0.777767679171789
Exploring Available actions  [[3 4]
 [1 3]
 [0 0]]  Selected action  [0 0]
episode: 81   score: 43.0   memory length: 4096   epsilon: 0.777767679171789
Exploring Available actions  [[1 0]
 [4 2]
 [0 1]
 [1 4]
 [4 3]
 [3 2]
 [3 4]
 [0 0]]  Selected action  [0 1]
episode: 81   score: 56.0   memory length: 4096   epsilon: 0.777767679171789
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[2 1]
 [0 3]
 [4 3]
 [0 1]
 [0 2]
 [4 1]
 [2 0]
 [1 4]
 [3 4]
 [2 4]
 [3 0]
 [2 3]
 [1 3]
 [3 1]
 [0 0]]  Selected action  [2 1]
episode: 81   score: 85.0   memory length: 4096   epsilon: 0.777767679171789
Exploring Available actions  [[0 1]
 [0 3]
 [4 3]
 [3 2]
 [3 4]
 [1 0]
 [2 0]
 [0 0]]  Selec

episode: 81   score: 224.0   memory length: 4096   epsilon: 0.777767679171789
Exploring Available actions  [[4 3]
 [4 0]
 [2 4]
 [0 4]
 [3 1]
 [4 1]
 [0 1]
 [1 2]
 [0 2]
 [0 0]]  Selected action  [3 1]
episode: 81   score: 213.0   memory length: 4096   epsilon: 0.777767679171789
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[3 0]
 [4 3]
 [3 4]
 [2 3]
 [1 3]
 [4 0]
 [0 1]
 [0 2]
 [2 0]
 [1 2]
 [1 0]
 [1 4]
 [4 1]
 [4 2]
 [0 0]]  Selected action  [3 0]
episode: 81   score: 220.0   memory length: 4096   epsilon: 0.777767679171789
Exploring Available actions  [[2 3]
 [4 2]
 [0 3]
 [0 0]]  Selected action  [0 3]
episode: 81   score: 224.0   memory length: 4096   epsilon: 0.777767679171789
Exploring Available actions  [[4 1]
 [0 3]
 [4 2]
 [4 0]
 [3 2]
 [3 1]
 [0 0]]  Selected action  [4 2]
episode: 81   score: 204.0   memory length: 4096   epsilon: 0.777767679171789
Exploring Available actions  [[3 0]
 [4 3]
 [0 0]]  Selected action  [4 3

episode: 82   score: 50.0   memory length: 4096   epsilon: 0.7753280840582328
(1, 21) [0. 0.] Index  0 0.0
Exploiting Available actions  [[2 0]
 [0 0]]  Selected action  [2 0]
episode: 82   score: 70.0   memory length: 4096   epsilon: 0.7753280840582328
Exploring Available actions  [[1 0]
 [1 3]
 [2 0]
 [0 0]]  Selected action  [1 0]
episode: 82   score: 67.0   memory length: 4096   epsilon: 0.7753280840582328
Exploring Available actions  [[1 0]
 [0 0]]  Selected action  [0 0]
episode: 82   score: 62.0   memory length: 4096   epsilon: 0.7753280840582328
(1, 21) [0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[0 1]
 [2 3]
 [0 0]]  Selected action  [0 1]
episode: 82   score: 90.0   memory length: 4096   epsilon: 0.7753280840582328
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[0 4]
 [2 0]
 [3 1]
 [4 0]
 [3 2]
 [2 3]
 [1 2]
 [3 4]
 [1 0]
 [1 3]
 [0 1]
 [0 2]
 [1 4]
 [0 0]]  Selected action  [0 4]
episode: 82   score: 94.0   memory l

episode: 82   score: -104.0   memory length: 4096   epsilon: 0.7753280840582328
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[3 0]
 [0 3]
 [2 3]
 [1 2]
 [2 4]
 [4 2]
 [1 4]
 [4 3]
 [0 2]
 [4 1]
 [0 1]
 [0 4]
 [3 4]
 [1 0]
 [4 0]
 [0 0]]  Selected action  [3 0]
episode: 82   score: -117.0   memory length: 4096   epsilon: 0.7753280840582328
Exploring Available actions  [[0 3]
 [0 0]]  Selected action  [0 0]
episode: 82   score: -122.0   memory length: 4096   epsilon: 0.7753280840582328
Exploring Available actions  [[0 3]
 [2 1]
 [0 2]
 [0 0]]  Selected action  [0 3]
episode: 82   score: -114.0   memory length: 4096   epsilon: 0.7753280840582328
Exploring Available actions  [[3 4]
 [2 1]
 [1 2]
 [4 2]
 [2 0]
 [3 1]
 [2 4]
 [0 4]
 [1 4]
 [0 0]]  Selected action  [1 2]
episode: 82   score: -115.0   memory length: 4096   epsilon: 0.7753280840582328
Exploring Available actions  [[2 3]
 [2 1]
 [0 3]
 [1 4]
 [1 3]
 [0 0]]  Selected action

episode: 82   score: -170.0   memory length: 4096   epsilon: 0.7753280840582328
Exploring Available actions  [[1 4]
 [0 2]
 [0 1]
 [0 0]]  Selected action  [1 4]
episode: 82   score: -170.0   memory length: 4096   epsilon: 0.7753280840582328
Exploring Available actions  [[0 3]
 [4 3]
 [4 0]
 [1 4]
 [3 1]
 [0 4]
 [3 4]
 [0 1]
 [4 1]
 [2 3]
 [3 2]
 [0 0]]  Selected action  [1 4]
episode: 82   score: -170.0   memory length: 4096   epsilon: 0.7753280840582328
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[2 0]
 [1 2]
 [1 0]
 [3 1]
 [3 0]
 [2 4]
 [4 3]
 [0 0]]  Selected action  [2 0]
episode: 82   score: -167.0   memory length: 4096   epsilon: 0.7753280840582328
Exploring Available actions  [[0 2]
 [0 0]]  Selected action  [0 0]
episode: 82   score: -172.0   memory length: 4096   epsilon: 0.7753280840582328
Exploring Available actions  [[1 0]
 [0 3]
 [0 0]]  Selected action  [0 0]
episode: 82   score: -177.0   memory length: 4096   epsilon: 0.7753280840582328

episode: 83   score: -60.0   memory length: 4096   epsilon: 0.772896141132441
Exploring Available actions  [[4 3]
 [0 4]
 [3 2]
 [4 0]
 [2 3]
 [0 0]]  Selected action  [2 3]
episode: 83   score: -44.0   memory length: 4096   epsilon: 0.772896141132441
Exploring Available actions  [[4 3]
 [0 1]
 [4 1]
 [0 3]
 [3 4]
 [1 4]
 [4 2]
 [2 0]
 [1 3]
 [0 0]]  Selected action  [4 1]
episode: 83   score: -52.0   memory length: 4096   epsilon: 0.772896141132441
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[0 2]
 [3 2]
 [2 4]
 [1 3]
 [0 1]
 [3 4]
 [1 4]
 [3 1]
 [0 0]]  Selected action  [0 2]
episode: 83   score: -53.0   memory length: 4096   epsilon: 0.772896141132441
Exploring Available actions  [[1 2]
 [0 1]
 [2 3]
 [4 3]
 [0 0]]  Selected action  [0 0]
episode: 83   score: -58.0   memory length: 4096   epsilon: 0.772896141132441
Exploring Available actions  [[0 3]
 [3 0]
 [0 2]
 [0 0]]  Selected action  [0 3]
episode: 83   score: -48.0   memory length: 4096   

episode: 83   score: -254.0   memory length: 4096   epsilon: 0.772896141132441
Exploring Available actions  [[0 1]
 [1 2]
 [1 0]
 [1 4]
 [4 0]
 [4 1]
 [0 4]
 [4 2]
 [0 0]]  Selected action  [0 1]
episode: 83   score: -254.0   memory length: 4096   epsilon: 0.772896141132441
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[3 2]
 [0 4]
 [1 4]
 [4 2]
 [4 1]
 [0 2]
 [2 1]
 [1 2]
 [3 0]
 [2 0]
 [2 4]
 [3 4]
 [0 3]
 [2 3]
 [0 1]
 [0 0]]  Selected action  [3 2]
episode: 83   score: -232.0   memory length: 4096   epsilon: 0.772896141132441
Exploring Available actions  [[0 2]
 [1 0]
 [4 1]
 [3 0]
 [3 1]
 [1 2]
 [0 0]]  Selected action  [0 2]
episode: 83   score: -229.0   memory length: 4096   epsilon: 0.772896141132441
(1, 21) [0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[0 3]
 [0 4]
 [0 0]]  Selected action  [0 3]
episode: 83   score: -259.0   memory length: 4096   epsilon: 0.772896141132441
Exploring Available actions  [[4 1]
 [2

episode: 83   score: -229.0   memory length: 4096   epsilon: 0.772896141132441
Exploring Available actions  [[3 2]
 [0 3]
 [4 2]
 [1 0]
 [4 3]
 [3 4]
 [4 0]
 [0 2]
 [2 0]
 [2 3]
 [1 4]
 [0 1]
 [0 0]]  Selected action  [0 1]
episode: 83   score: -233.0   memory length: 4096   epsilon: 0.772896141132441
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[0 1]
 [0 2]
 [3 0]
 [3 1]
 [0 3]
 [1 2]
 [1 4]
 [3 4]
 [4 0]
 [1 3]
 [2 0]
 [3 2]
 [2 3]
 [2 4]
 [4 3]
 [0 0]]  Selected action  [0 1]
episode: 83   score: -245.0   memory length: 4096   epsilon: 0.772896141132441
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[3 2]
 [1 4]
 [3 0]
 [4 2]
 [2 4]
 [2 1]
 [1 3]
 [2 3]
 [3 4]
 [1 0]
 [1 2]
 [0 4]
 [3 1]
 [4 3]
 [2 0]
 [0 0]]  Selected action  [3 2]
episode: 83   score: -238.0   memory length: 4096   epsilon: 0.772896141132441
Exploring Available actions  [[1 0]
 [0 2]
 [0 1]
 [1 4]
 [0 0]

episode: 84   score: -23.0   memory length: 4096   epsilon: 0.7704718263920792
(1, 21) [0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[4 2]
 [3 0]
 [0 3]
 [0 0]]  Selected action  [4 2]
episode: 84   score: -32.0   memory length: 4096   epsilon: 0.7704718263920792
Exploring Available actions  [[1 3]
 [0 1]
 [4 1]
 [2 1]
 [3 4]
 [0 0]]  Selected action  [0 0]
episode: 84   score: -37.0   memory length: 4096   epsilon: 0.7704718263920792
(1, 21) [133.31769   0.        0.        0.        0.        0.     ] Index  0 133.31769
Exploiting Available actions  [[4 0]
 [0 1]
 [4 1]
 [1 2]
 [0 4]
 [0 0]]  Selected action  [4 0]
episode: 84   score: -53.0   memory length: 4096   epsilon: 0.7704718263920792
Exploring Available actions  [[3 1]
 [2 1]
 [3 0]
 [4 2]
 [4 3]
 [0 2]
 [0 0]]  Selected action  [2 1]
episode: 84   score: -36.0   memory length: 4096   epsilon: 0.7704718263920792
Exploring Available actions  [[4 0]
 [0 3]
 [1 4]
 [4 3]
 [2 0]
 [1 3]
 [1 0]
 [3 0]
 [0 1]
 [3 4]
 [2

episode: 84   score: -81.0   memory length: 4096   epsilon: 0.7704718263920792
Exploring Available actions  [[2 4]
 [0 0]]  Selected action  [2 4]
episode: 84   score: -88.0   memory length: 4096   epsilon: 0.7704718263920792
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[3 1]
 [2 0]
 [2 1]
 [2 3]
 [4 0]
 [3 4]
 [0 1]
 [0 0]]  Selected action  [3 1]
episode: 84   score: -96.0   memory length: 4096   epsilon: 0.7704718263920792
Exploring Available actions  [[0 2]
 [1 2]
 [2 0]
 [0 4]
 [3 2]
 [3 4]
 [4 2]
 [0 3]
 [1 4]
 [2 1]
 [0 0]]  Selected action  [1 2]
episode: 84   score: -60.0   memory length: 4096   epsilon: 0.7704718263920792
Exploring Available actions  [[1 0]
 [3 0]
 [0 0]]  Selected action  [0 0]
episode: 84   score: -65.0   memory length: 4096   epsilon: 0.7704718263920792
Exploring Available actions  [[0 4]
 [1 4]
 [4 1]
 [0 0]]  Selected action  [0 4]
episode: 84   score: -96.0   memory length: 4096   epsilon: 0.7704718263920792
Exploring Av

episode: 84   score: -216.0   memory length: 4096   epsilon: 0.7704718263920792
Exploring Available actions  [[3 0]
 [3 2]
 [1 3]
 [1 2]
 [1 4]
 [0 0]]  Selected action  [1 4]
episode: 84   score: -238.0   memory length: 4096   epsilon: 0.7704718263920792
Exploring Available actions  [[0 4]
 [4 3]
 [4 2]
 [0 0]]  Selected action  [0 0]
episode: 84   score: -243.0   memory length: 4096   epsilon: 0.7704718263920792
Exploring Available actions  [[4 0]
 [0 3]
 [3 0]
 [2 1]
 [3 4]
 [3 1]
 [1 4]
 [0 0]]  Selected action  [3 0]
episode: 84   score: -269.0   memory length: 4096   epsilon: 0.7704718263920792
(1, 21) [0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[2 4]
 [0 3]
 [2 0]
 [0 0]]  Selected action  [2 4]
episode: 84   score: -271.0   memory length: 4096   epsilon: 0.7704718263920792
Exploring Available actions  [[4 0]
 [3 1]
 [3 2]
 [0 2]
 [0 4]
 [0 0]]  Selected action  [0 4]
episode: 84   score: -287.0   memory length: 4096   epsilon: 0.7704718263920792
Exploring Availabl

episode: 85   score: 13.0   memory length: 4096   epsilon: 0.7680551159101003
Exploring Available actions  [[1 3]
 [2 3]
 [4 3]
 [0 1]
 [1 4]
 [2 0]
 [0 2]
 [3 0]
 [1 0]
 [0 0]]  Selected action  [0 2]
episode: 85   score: 10.0   memory length: 4096   epsilon: 0.7680551159101003
(1, 21) [0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[4 2]
 [3 1]
 [2 1]
 [1 2]
 [0 0]]  Selected action  [4 2]
episode: 85   score: 10.0   memory length: 4096   epsilon: 0.7680551159101003
Exploring Available actions  [[3 1]
 [1 2]
 [2 1]
 [4 1]
 [0 1]
 [4 3]
 [3 0]
 [0 2]
 [0 0]]  Selected action  [4 3]
episode: 85   score: 26.0   memory length: 4096   epsilon: 0.7680551159101003
Exploring Available actions  [[2 4]
 [3 1]
 [2 0]
 [2 3]
 [0 0]]  Selected action  [2 4]
episode: 85   score: -4.0   memory length: 4096   epsilon: 0.7680551159101003
(1, 21) [0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[1 3]
 [3 0]
 [2 4]
 [2 0]
 [4 1]
 [4 0]
 [0 0]]  Selected action  [1 3]
epis

episode: 85   score: -163.0   memory length: 4096   epsilon: 0.7680551159101003
Exploring Available actions  [[1 3]
 [4 2]
 [2 4]
 [0 0]]  Selected action  [4 2]
episode: 85   score: -164.0   memory length: 4096   epsilon: 0.7680551159101003
(1, 21) [0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[2 1]
 [1 3]
 [2 3]
 [3 0]
 [3 4]
 [0 0]]  Selected action  [2 1]
episode: 85   score: -160.0   memory length: 4096   epsilon: 0.7680551159101003
Exploring Available actions  [[4 0]
 [0 3]
 [3 2]
 [1 4]
 [3 1]
 [2 0]
 [1 2]
 [3 0]
 [2 3]
 [3 4]
 [4 2]
 [1 0]
 [4 1]
 [4 3]
 [1 3]
 [0 0]]  Selected action  [0 3]
episode: 85   score: -168.0   memory length: 4096   epsilon: 0.7680551159101003
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[0 4]
 [1 0]
 [1 3]
 [2 0]
 [4 0]
 [0 2]
 [2 4]
 [0 1]
 [0 0]]  Selected action  [0 4]
episode: 85   score: -179.0   memory length: 4096   epsilon: 0.7680551159101003
Exploring Available actions  [[2 0]
 [1 3]
 [2

episode: 86   score: -61.0   memory length: 4096   epsilon: 0.7656459858345084
Exploring Available actions  [[3 1]
 [2 4]
 [3 4]
 [0 0]]  Selected action  [2 4]
episode: 86   score: -76.0   memory length: 4096   epsilon: 0.7656459858345084
Exploring Available actions  [[0 3]
 [4 3]
 [3 1]
 [0 4]
 [1 2]
 [4 0]
 [3 2]
 [2 1]
 [2 0]
 [0 0]]  Selected action  [0 4]
episode: 86   score: -77.0   memory length: 4096   epsilon: 0.7656459858345084
Exploring Available actions  [[4 3]
 [0 1]
 [2 3]
 [2 1]
 [3 4]
 [1 4]
 [4 2]
 [1 3]
 [3 2]
 [4 0]
 [0 4]
 [1 0]
 [0 0]]  Selected action  [4 3]
episode: 86   score: -65.0   memory length: 4096   epsilon: 0.7656459858345084
Exploring Available actions  [[1 0]
 [4 2]
 [4 0]
 [0 0]]  Selected action  [4 2]
episode: 86   score: -72.0   memory length: 4096   epsilon: 0.7656459858345084
Exploring Available actions  [[4 3]
 [3 2]
 [4 2]
 [1 4]
 [3 0]
 [0 2]
 [0 0]]  Selected action  [1 4]
episode: 86   score: -85.0   memory length: 4096   epsilon: 0.7656459

episode: 86   score: -27.0   memory length: 4096   epsilon: 0.7656459858345084
Exploring Available actions  [[0 2]
 [0 0]]  Selected action  [0 0]
episode: 86   score: -32.0   memory length: 4096   epsilon: 0.7656459858345084
Exploring Available actions  [[2 0]
 [0 0]]  Selected action  [2 0]
episode: 86   score: -45.0   memory length: 4096   epsilon: 0.7656459858345084
Exploring Available actions  [[0 4]
 [0 0]]  Selected action  [0 0]
episode: 86   score: -50.0   memory length: 4096   epsilon: 0.7656459858345084
Exploring Available actions  [[1 0]
 [0 4]
 [1 3]
 [2 1]
 [0 0]]  Selected action  [0 0]
episode: 86   score: -55.0   memory length: 4096   epsilon: 0.7656459858345084
(1, 21) [0. 0.] Index  0 0.0
Exploiting Available actions  [[3 1]
 [0 0]]  Selected action  [3 1]
episode: 86   score: -86.0   memory length: 4096   epsilon: 0.7656459858345084
Exploring Available actions  [[0 1]
 [3 4]
 [1 2]
 [1 4]
 [1 3]
 [4 3]
 [2 3]
 [3 0]
 [3 2]
 [0 2]
 [1 0]
 [0 3]
 [4 2]
 [0 4]
 [0 0]] 

episode: 86   score: -165.0   memory length: 4096   epsilon: 0.7656459858345084
Exploring Available actions  [[4 2]
 [1 4]
 [2 4]
 [0 2]
 [1 2]
 [1 0]
 [4 3]
 [3 0]
 [2 1]
 [0 3]
 [0 0]]  Selected action  [2 4]
episode: 86   score: -167.0   memory length: 4096   epsilon: 0.7656459858345084
Exploring Available actions  [[3 1]
 [1 0]
 [1 2]
 [2 3]
 [4 3]
 [0 2]
 [2 0]
 [0 1]
 [0 0]]  Selected action  [0 1]
episode: 86   score: -147.0   memory length: 4096   epsilon: 0.7656459858345084
Exploring Available actions  [[4 1]
 [1 2]
 [2 0]
 [4 3]
 [2 4]
 [0 4]
 [1 0]
 [4 2]
 [3 1]
 [1 4]
 [2 3]
 [4 0]
 [0 3]
 [2 1]
 [3 4]
 [0 0]]  Selected action  [4 2]
episode: 86   score: -137.0   memory length: 4096   epsilon: 0.7656459858345084
Exploring Available actions  [[0 1]
 [0 0]]  Selected action  [0 1]
episode: 86   score: -131.0   memory length: 4096   epsilon: 0.7656459858345084
Exploring Available actions  [[4 1]
 [4 3]
 [3 2]
 [2 0]
 [2 3]
 [0 1]
 [1 4]
 [0 0]]  Selected action  [0 0]
episode:

episode: 87   score: -144.0   memory length: 4096   epsilon: 0.7632444123881229
Exploring Available actions  [[3 1]
 [4 0]
 [0 4]
 [0 0]]  Selected action  [3 1]
episode: 87   score: -159.0   memory length: 4096   epsilon: 0.7632444123881229
Exploring Available actions  [[1 2]
 [0 3]
 [3 2]
 [0 2]
 [4 1]
 [2 1]
 [4 2]
 [2 4]
 [3 0]
 [1 0]
 [1 4]
 [3 1]
 [4 0]
 [0 1]
 [0 0]]  Selected action  [0 1]
episode: 87   score: -161.0   memory length: 4096   epsilon: 0.7632444123881229
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[4 0]
 [1 4]
 [3 4]
 [4 1]
 [0 3]
 [1 3]
 [1 0]
 [0 0]]  Selected action  [4 0]
episode: 87   score: -177.0   memory length: 4096   epsilon: 0.7632444123881229
Exploring Available actions  [[3 1]
 [0 1]
 [0 0]]  Selected action  [0 0]
episode: 87   score: -182.0   memory length: 4096   epsilon: 0.7632444123881229
Exploring Available actions  [[0 3]
 [0 0]]  Selected action  [0 0]
episode: 87   score: -187.0   memory length: 4096   epsilo

episode: 87   score: -80.0   memory length: 4096   epsilon: 0.7632444123881229
(1, 21) [0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[4 3]
 [0 2]
 [2 1]
 [4 0]
 [0 0]]  Selected action  [4 3]
episode: 87   score: -72.0   memory length: 4096   epsilon: 0.7632444123881229
Exploring Available actions  [[2 4]
 [0 3]
 [0 0]]  Selected action  [0 3]
episode: 87   score: -90.0   memory length: 4096   epsilon: 0.7632444123881229
Exploring Available actions  [[1 3]
 [4 2]
 [2 0]
 [0 1]
 [0 4]
 [0 2]
 [2 4]
 [4 0]
 [1 0]
 [0 0]]  Selected action  [4 2]
episode: 87   score: -105.0   memory length: 4096   epsilon: 0.7632444123881229
(1, 21) [0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[1 3]
 [1 4]
 [0 3]
 [0 0]]  Selected action  [1 3]
episode: 87   score: -115.0   memory length: 4096   epsilon: 0.7632444123881229
Exploring Available actions  [[1 3]
 [3 2]
 [0 1]
 [0 0]]  Selected action  [1 3]
episode: 87   score: -115.0   memory length: 4096   epsilon: 0.7632444123881

episode: 88   score: 63.0   memory length: 4096   epsilon: 0.7608503718683446
Exploring Available actions  [[3 0]
 [0 2]
 [1 4]
 [0 1]
 [3 2]
 [4 1]
 [0 4]
 [4 3]
 [1 0]
 [2 1]
 [1 2]
 [2 0]
 [0 0]]  Selected action  [1 4]
episode: 88   score: 43.0   memory length: 4096   epsilon: 0.7608503718683446
Exploring Available actions  [[0 1]
 [1 4]
 [3 2]
 [1 3]
 [4 2]
 [3 4]
 [1 0]
 [2 3]
 [0 3]
 [0 2]
 [0 0]]  Selected action  [0 2]
episode: 88   score: 25.0   memory length: 4096   epsilon: 0.7608503718683446
Exploring Available actions  [[1 2]
 [3 4]
 [1 4]
 [0 0]]  Selected action  [0 0]
episode: 88   score: 20.0   memory length: 4096   epsilon: 0.7608503718683446
Exploring Available actions  [[4 1]
 [3 4]
 [0 3]
 [1 3]
 [0 4]
 [1 4]
 [0 0]]  Selected action  [4 1]
episode: 88   score: 39.0   memory length: 4096   epsilon: 0.7608503718683446
Exploring Available actions  [[1 3]
 [0 1]
 [4 3]
 [4 1]
 [3 1]
 [2 1]
 [3 0]
 [0 0]]  Selected action  [1 3]
episode: 88   score: 43.0   memory leng

episode: 88   score: 12.0   memory length: 4096   epsilon: 0.7608503718683446
Exploring Available actions  [[4 2]
 [0 0]]  Selected action  [0 0]
episode: 88   score: 7.0   memory length: 4096   epsilon: 0.7608503718683446
Exploring Available actions  [[0 2]
 [4 2]
 [3 1]
 [3 2]
 [0 0]]  Selected action  [3 2]
episode: 88   score: 28.0   memory length: 4096   epsilon: 0.7608503718683446
Exploring Available actions  [[0 4]
 [0 0]]  Selected action  [0 4]
episode: 88   score: 2.0   memory length: 4096   epsilon: 0.7608503718683446
Exploring Available actions  [[3 0]
 [4 0]
 [4 3]
 [2 3]
 [3 4]
 [1 2]
 [0 3]
 [3 1]
 [0 2]
 [1 3]
 [0 0]]  Selected action  [3 1]
episode: 88   score: -13.0   memory length: 4096   epsilon: 0.7608503718683446
Exploring Available actions  [[1 3]
 [0 4]
 [1 2]
 [1 4]
 [0 0]]  Selected action  [0 0]
episode: 88   score: -18.0   memory length: 4096   epsilon: 0.7608503718683446
Exploring Available actions  [[0 3]
 [2 3]
 [1 0]
 [2 0]
 [1 2]
 [4 0]
 [0 4]
 [2 4]
 [

episode: 88   score: -72.0   memory length: 4096   epsilon: 0.7608503718683446
(1, 21) [0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[4 2]
 [3 0]
 [3 4]
 [3 2]
 [0 4]
 [0 0]]  Selected action  [4 2]
episode: 88   score: -92.0   memory length: 4096   epsilon: 0.7608503718683446
Exploring Available actions  [[0 2]
 [1 4]
 [3 2]
 [0 1]
 [0 0]]  Selected action  [0 0]
episode: 88   score: -97.0   memory length: 4096   epsilon: 0.7608503718683446
(1, 21) [0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[4 1]
 [4 3]
 [4 0]
 [1 3]
 [3 2]
 [1 4]
 [0 0]]  Selected action  [4 1]
episode: 88   score: -89.0   memory length: 4096   epsilon: 0.7608503718683446
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[3 2]
 [4 2]
 [0 2]
 [4 0]
 [2 4]
 [0 3]
 [3 0]
 [2 3]
 [2 0]
 [3 4]
 [3 1]
 [4 1]
 [1 4]
 [1 3]
 [0 0]]  Selected action  [3 2]
episode: 88   score: -57.0   memory length: 4096   epsilon: 0.7608503718683446
Ex

episode: 89   score: 15.0   memory length: 4096   epsilon: 0.7584638406469213
(1, 21) [0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[2 4]
 [3 0]
 [4 0]
 [2 1]
 [1 3]
 [0 0]]  Selected action  [2 4]
episode: 89   score: 15.0   memory length: 4096   epsilon: 0.7584638406469213
Exploring Available actions  [[1 3]
 [2 1]
 [3 1]
 [4 0]
 [0 1]
 [0 0]]  Selected action  [1 3]
episode: 89   score: 23.0   memory length: 4096   epsilon: 0.7584638406469213
Exploring Available actions  [[4 1]
 [0 4]
 [4 3]
 [2 1]
 [1 4]
 [2 0]
 [0 0]]  Selected action  [1 4]
episode: 89   score: 13.0   memory length: 4096   epsilon: 0.7584638406469213
Exploring Available actions  [[4 2]
 [3 0]
 [2 3]
 [1 3]
 [2 0]
 [1 2]
 [1 4]
 [2 1]
 [0 1]
 [3 2]
 [0 0]]  Selected action  [1 2]
episode: 89   score: 21.0   memory length: 4096   epsilon: 0.7584638406469213
Exploring Available actions  [[0 3]
 [0 2]
 [0 0]]  Selected action  [0 0]
episode: 89   score: 16.0   memory length: 4096   epsilon: 0.758463

episode: 89   score: -7.0   memory length: 4096   epsilon: 0.7584638406469213
Exploring Available actions  [[3 4]
 [4 2]
 [2 0]
 [4 1]
 [4 0]
 [3 0]
 [1 4]
 [2 4]
 [0 0]]  Selected action  [2 0]
episode: 89   score: 15.0   memory length: 4096   epsilon: 0.7584638406469213
(1, 21) [0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[3 1]
 [1 4]
 [3 0]
 [0 0]]  Selected action  [3 1]
episode: 89   score: 1.0   memory length: 4096   epsilon: 0.7584638406469213
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[4 3]
 [4 0]
 [2 3]
 [4 1]
 [3 1]
 [2 0]
 [0 4]
 [3 2]
 [2 4]
 [1 4]
 [0 2]
 [3 0]
 [0 3]
 [1 3]
 [0 0]]  Selected action  [4 3]
episode: 89   score: 25.0   memory length: 4096   epsilon: 0.7584638406469213
Exploring Available actions  [[3 4]
 [2 1]
 [3 2]
 [0 1]
 [2 0]
 [4 3]
 [0 2]
 [4 1]
 [1 4]
 [0 0]]  Selected action  [4 1]
episode: 89   score: 7.0   memory length: 4096   epsilon: 0.7584638406469213
(1, 21) [0. 0. 0. 0. 0. 0

episode: 89   score: 30.0   memory length: 4096   epsilon: 0.7584638406469213
Exploring Available actions  [[4 3]
 [2 3]
 [0 4]
 [1 4]
 [4 0]
 [2 0]
 [4 1]
 [3 1]
 [0 0]]  Selected action  [3 1]
episode: 89   score: 45.0   memory length: 4096   epsilon: 0.7584638406469213
Exploring Available actions  [[3 2]
 [3 1]
 [1 3]
 [2 1]
 [4 0]
 [4 3]
 [3 0]
 [0 0]]  Selected action  [0 0]
episode: 89   score: 40.0   memory length: 4096   epsilon: 0.7584638406469213
Exploring Available actions  [[2 0]
 [2 1]
 [3 4]
 [2 3]
 [4 0]
 [4 1]
 [1 4]
 [2 4]
 [0 3]
 [3 1]
 [4 3]
 [0 1]
 [0 0]]  Selected action  [2 0]
episode: 89   score: 44.0   memory length: 4096   epsilon: 0.7584638406469213
Exploring Available actions  [[1 3]
 [0 0]]  Selected action  [1 3]
episode: 89   score: 18.0   memory length: 4096   epsilon: 0.7584638406469213
(1, 21) [0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[0 1]
 [4 1]
 [2 1]
 [1 3]
 [3 1]
 [2 3]
 [0 0]]  Selected action  [0 1]
episode: 89   score: 2

episode: 90   score: -229.0   memory length: 4096   epsilon: 0.7560847951697145
Exploring Available actions  [[3 2]
 [1 3]
 [0 4]
 [2 3]
 [0 1]
 [0 0]]  Selected action  [1 3]
episode: 90   score: -241.0   memory length: 4096   epsilon: 0.7560847951697145
(1, 21) [0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[1 4]
 [1 2]
 [1 0]
 [1 3]
 [2 0]
 [0 0]]  Selected action  [1 4]
episode: 90   score: -243.0   memory length: 4096   epsilon: 0.7560847951697145
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[4 0]
 [3 1]
 [1 3]
 [3 0]
 [1 4]
 [0 4]
 [2 3]
 [0 2]
 [2 1]
 [0 0]]  Selected action  [4 0]
episode: 90   score: -239.0   memory length: 4096   epsilon: 0.7560847951697145
(1, 21) [0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[2 3]
 [1 4]
 [0 1]
 [3 4]
 [0 0]]  Selected action  [2 3]
episode: 90   score: -250.0   memory length: 4096   epsilon: 0.7560847951697145
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploit

episode: 90   score: -166.0   memory length: 4096   epsilon: 0.7560847951697145
(1, 21) [0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[0 1]
 [2 3]
 [1 0]
 [0 0]]  Selected action  [0 1]
episode: 90   score: -170.0   memory length: 4096   epsilon: 0.7560847951697145
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[4 2]
 [3 0]
 [3 1]
 [3 2]
 [1 3]
 [0 2]
 [0 4]
 [1 2]
 [1 0]
 [2 4]
 [0 1]
 [0 3]
 [2 0]
 [3 4]
 [0 0]]  Selected action  [4 2]
episode: 90   score: -180.0   memory length: 4096   epsilon: 0.7560847951697145
Exploring Available actions  [[3 2]
 [4 3]
 [2 0]
 [4 1]
 [0 0]]  Selected action  [4 1]
episode: 90   score: -172.0   memory length: 4096   epsilon: 0.7560847951697145
Exploring Available actions  [[3 4]
 [2 4]
 [4 1]
 [3 1]
 [0 2]
 [0 3]
 [2 0]
 [1 2]
 [4 3]
 [3 2]
 [0 1]
 [0 0]]  Selected action  [3 1]
episode: 90   score: -176.0   memory length: 4096   epsilon: 0.7560847951697145
Exploring Available actions

episode: 91   score: 18.0   memory length: 4096   epsilon: 0.7537132119564671
Exploring Available actions  [[3 1]
 [0 4]
 [4 1]
 [2 3]
 [3 0]
 [2 4]
 [3 2]
 [1 3]
 [0 0]]  Selected action  [4 1]
episode: 91   score: 34.0   memory length: 4096   epsilon: 0.7537132119564671
Exploring Available actions  [[4 3]
 [2 3]
 [0 4]
 [0 2]
 [0 1]
 [1 0]
 [0 3]
 [1 4]
 [3 4]
 [1 3]
 [4 2]
 [3 0]
 [4 0]
 [1 2]
 [4 1]
 [0 0]]  Selected action  [1 4]
episode: 91   score: 46.0   memory length: 4096   epsilon: 0.7537132119564671
Exploring Available actions  [[3 0]
 [0 1]
 [2 3]
 [1 0]
 [4 3]
 [3 4]
 [0 4]
 [0 0]]  Selected action  [0 4]
episode: 91   score: 53.0   memory length: 4096   epsilon: 0.7537132119564671
Exploring Available actions  [[1 4]
 [0 1]
 [0 2]
 [1 3]
 [4 0]
 [3 4]
 [2 0]
 [2 4]
 [0 0]]  Selected action  [1 3]
episode: 91   score: 57.0   memory length: 4096   epsilon: 0.7537132119564671
(1, 21) [0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[3 4]
 [0 1]
 [0 3]
 [4 1

episode: 91   score: -13.0   memory length: 4096   epsilon: 0.7537132119564671
Exploring Available actions  [[4 0]
 [4 1]
 [0 2]
 [0 4]
 [4 2]
 [0 3]
 [0 0]]  Selected action  [4 2]
episode: 91   score: -33.0   memory length: 4096   epsilon: 0.7537132119564671
Exploring Available actions  [[4 2]
 [2 4]
 [0 0]]  Selected action  [2 4]
episode: 91   score: -33.0   memory length: 4096   epsilon: 0.7537132119564671
Exploring Available actions  [[4 3]
 [2 0]
 [2 3]
 [0 4]
 [0 0]]  Selected action  [0 4]
episode: 91   score: -34.0   memory length: 4096   epsilon: 0.7537132119564671
Exploring Available actions  [[0 3]
 [1 2]
 [4 2]
 [0 4]
 [0 2]
 [2 0]
 [0 0]]  Selected action  [0 4]
episode: 91   score: -35.0   memory length: 4096   epsilon: 0.7537132119564671
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[0 1]
 [3 2]
 [2 1]
 [1 3]
 [3 4]
 [4 1]
 [0 4]
 [1 0]
 [3 1]
 [1 4]
 [0 0]]  Selected action  [0 1]
episode: 91   score: -15.0   memory length: 409

episode: 91   score: -354.0   memory length: 4096   epsilon: 0.7537132119564671
(1, 21) [0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[4 0]
 [1 3]
 [2 0]
 [3 1]
 [3 4]
 [1 4]
 [0 0]]  Selected action  [4 0]
episode: 91   score: -359.0   memory length: 4096   epsilon: 0.7537132119564671
(1, 21) [0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[2 0]
 [0 3]
 [0 0]]  Selected action  [2 0]
episode: 91   score: -361.0   memory length: 4096   epsilon: 0.7537132119564671
Exploring Available actions  [[4 3]
 [0 0]]  Selected action  [4 3]
episode: 91   score: -379.0   memory length: 4096   epsilon: 0.7537132119564671
Exploring Available actions  [[2 0]
 [1 0]
 [1 2]
 [0 0]]  Selected action  [1 0]
episode: 91   score: -372.0   memory length: 4096   epsilon: 0.7537132119564671
Exploring Available actions  [[3 0]
 [1 3]
 [0 4]
 [0 0]]  Selected action  [3 0]
episode: 91   score: -383.0   memory length: 4096   epsilon: 0.7537132119564671
Exploring Available actions  [[2

episode: 92   score: -22.0   memory length: 4096   epsilon: 0.7513490676005719
Exploring Available actions  [[0 3]
 [0 2]
 [4 3]
 [4 0]
 [1 3]
 [1 4]
 [2 0]
 [4 1]
 [4 2]
 [0 0]]  Selected action  [4 3]
episode: 92   score: -2.0   memory length: 4096   epsilon: 0.7513490676005719
Exploring Available actions  [[2 0]
 [0 4]
 [4 0]
 [2 3]
 [0 0]]  Selected action  [2 0]
episode: 92   score: -15.0   memory length: 4096   epsilon: 0.7513490676005719
Exploring Available actions  [[1 0]
 [0 0]]  Selected action  [1 0]
episode: 92   score: -38.0   memory length: 4096   epsilon: 0.7513490676005719
Exploring Available actions  [[0 2]
 [0 1]
 [0 3]
 [0 0]]  Selected action  [0 1]
episode: 92   score: -26.0   memory length: 4096   epsilon: 0.7513490676005719
Exploring Available actions  [[2 4]
 [3 4]
 [0 2]
 [2 0]
 [0 3]
 [4 3]
 [1 2]
 [3 2]
 [3 0]
 [4 2]
 [0 4]
 [0 0]]  Selected action  [3 0]
episode: 92   score: -19.0   memory length: 4096   epsilon: 0.7513490676005719
Exploring Available action

episode: 92   score: -189.0   memory length: 4096   epsilon: 0.7513490676005719
(1, 21) [0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[2 1]
 [0 4]
 [0 0]]  Selected action  [2 1]
episode: 92   score: -149.0   memory length: 4096   epsilon: 0.7513490676005719
Exploring Available actions  [[3 0]
 [1 3]
 [4 3]
 [3 2]
 [2 0]
 [3 1]
 [3 4]
 [1 4]
 [0 1]
 [0 0]]  Selected action  [3 4]
episode: 92   score: -153.0   memory length: 4096   epsilon: 0.7513490676005719
Exploring Available actions  [[2 3]
 [0 1]
 [1 2]
 [3 2]
 [1 4]
 [3 1]
 [0 4]
 [2 1]
 [2 4]
 [0 0]]  Selected action  [0 0]
episode: 92   score: -158.0   memory length: 4096   epsilon: 0.7513490676005719
Exploring Available actions  [[0 2]
 [4 3]
 [0 4]
 [2 0]
 [1 4]
 [2 1]
 [4 2]
 [3 1]
 [3 2]
 [0 0]]  Selected action  [2 1]
episode: 92   score: -150.0   memory length: 4096   epsilon: 0.7513490676005719
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[1 3]
 [2 0]
 [4 1]
 [3 4]
 [0 2]

episode: 93   score: -6.0   memory length: 4096   epsilon: 0.74899233876884
Exploring Available actions  [[0 4]
 [1 0]
 [1 3]
 [3 1]
 [2 1]
 [0 0]]  Selected action  [1 0]
episode: 93   score: -18.0   memory length: 4096   epsilon: 0.74899233876884
(1, 21) [0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[0 2]
 [0 1]
 [0 0]]  Selected action  [0 2]
episode: 93   score: -6.0   memory length: 4096   epsilon: 0.74899233876884
(1, 21) [0. 0. 0. 0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[2 3]
 [1 3]
 [3 2]
 [2 1]
 [0 3]
 [0 2]
 [4 3]
 [0 4]
 [0 0]]  Selected action  [2 3]
episode: 93   score: 26.0   memory length: 4096   epsilon: 0.74899233876884
(1, 21) [0. 0. 0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[0 1]
 [0 3]
 [1 2]
 [0 4]
 [3 4]
 [0 0]]  Selected action  [0 1]
episode: 93   score: 12.0   memory length: 4096   epsilon: 0.74899233876884
Exploring Available actions  [[0 2]
 [2 0]
 [1 2]
 [3 1]
 [1 0]
 [1 4]
 [0 3]
 [3 4]
 [1 3]
 [2 4]
 [0 0]]  S

episode: 93   score: 120.0   memory length: 4096   epsilon: 0.74899233876884
Exploring Available actions  [[0 1]
 [2 4]
 [3 0]
 [1 4]
 [3 4]
 [2 3]
 [1 3]
 [1 2]
 [4 1]
 [0 2]
 [3 1]
 [0 0]]  Selected action  [3 0]
episode: 93   score: 109.0   memory length: 4096   epsilon: 0.74899233876884
(1, 21) [0. 0. 0. 0.] Index  0 0.0
Exploiting Available actions  [[1 4]
 [3 2]
 [3 4]
 [0 0]]  Selected action  [1 4]
episode: 93   score: 78.0   memory length: 4096   epsilon: 0.74899233876884
Exploring Available actions  [[0 3]
 [2 0]
 [4 0]
 [0 1]
 [1 2]
 [4 2]
 [1 3]
 [0 0]]  Selected action  [4 2]
episode: 93   score: 110.0   memory length: 4096   epsilon: 0.74899233876884
(1, 21) [0. 0.] Index  0 0.0
Exploiting Available actions  [[3 0]
 [0 0]]  Selected action  [3 0]
episode: 93   score: 82.0   memory length: 4096   epsilon: 0.74899233876884
Exploring Available actions  [[3 0]
 [0 0]]  Selected action  [0 0]
episode: 93   score: 77.0   memory length: 4096   epsilon: 0.74899233876884
(1, 21) [

In [None]:
# make directory
if not os.path.exists("saved_pickle_files"):
    os.mkdir("saved_pickle_files")

# save rewards_per_episode
save_pickle(rewards_per_episode, "saved_pickle_files/rewards_per_episode")


# plot results
with open('saved_pickle_files/rewards_per_episode.pkl', 'rb') as f:
    rewards_per_episode = pickle.load(f)

plt.plot(list(range(len(rewards_per_episode))), rewards_per_episode)
plt.xlabel("episode number")
plt.ylabel("reward per episode")

# save plots in saved_plots/ directory
plt.savefig('rewards.png')

print("Average reward of last 100 episodes is {0}".format(np.mean(rewards_per_episode[-100:])))

## Tracking convergence

In [None]:
j = 0
tracked_state_action = list(states_to_be_tracked.keys())
for i in range(1, 10):
    plt.figure(j, figsize=(16, 7))
    state_action = tracked_state_action[i]
    if not len(states_to_be_tracked[state_action]):
        continue
    xaxis = np.asarray(range(0, len(states_to_be_tracked[state_action])))
    plt.title('state={0} action={1}'.format(state_action[0], state_action[1]))
    plt.plot(xaxis,np.asarray(states_to_be_tracked[state_action]))
    j = (j + 1) % 9
    plt.show()

#### Epsilon-decay sample function

<div class="alert alert-block alert-info">
Try building a similar epsilon-decay function for your model.
</div>

In [None]:
total_steps = 50
time = np.arange(0, total_steps)
epsilon = []
x = 0.99
for step in range(0,total_steps):
    x = abs(np.exp((-np.pi/total_steps)*step))
    epsilon.append(x)

In [None]:
plt.plot(time, epsilon)
plt.show()