In [None]:
import gym
import numpy as np
import random
import math
from time import sleep


## Initialize the "Cart-Pole" environment
env = gym.make('CartPole-v0')

## Defining the environment related constants

# Number of discrete states (bucket) per state dimension
NUM_BUCKETS = (1, 1, 6, 3)  # (x, x', theta, theta')
# Number of discrete actions
NUM_ACTIONS = env.action_space.n # (left, right)
# Bounds for each discrete state
STATE_BOUNDS = list(zip(env.observation_space.low, env.observation_space.high))
STATE_BOUNDS[1] = [-0.5, 0.5]
STATE_BOUNDS[3] = [-math.radians(50), math.radians(50)]
# Index of the action
ACTION_INDEX = len(NUM_BUCKETS)

## Creating a Q-Table for each state-action pair
q_table = np.zeros(NUM_BUCKETS + (NUM_ACTIONS,))

## Learning related constants
MIN_EXPLORE_RATE = 0.01
MIN_LEARNING_RATE = 0.1

## Defining the simulation related constants
NUM_EPISODES = 1000
MAX_T = 250
STREAK_TO_END = 120
SOLVED_T = 199
DEBUG_MODE = True

def simulate():

    ## Instantiating the learning related parameters
    learning_rate = get_learning_rate(0)
    explore_rate = get_explore_rate(0)
    discount_factor = 0.99  # since the world is unchanging

    num_streaks = 0

    for episode in range(NUM_EPISODES):

        # Reset the environment
        obv = env.reset()

        # the initial state
        state_0 = state_to_bucket(obv)

        for t in range(MAX_T):
            env.render()

            # Select an action
            action = select_action(state_0, explore_rate)

            # Execute the action
            obv, reward, done, _ = env.step(action)

            # Observe the result
            state = state_to_bucket(obv)

            # Update the Q based on the result
            best_q = np.amax(q_table[state])
            q_table[state_0 + (action,)] += learning_rate*(reward + discount_factor*(best_q) - q_table[state_0 + (action,)])

            # Setting up for the next iteration
            state_0 = state

            # Print data
            if (DEBUG_MODE):
                print("\nEpisode = %d" % episode)
                print("t = %d" % t)
                print("Action: %d" % action)
                print("State: %s" % str(state))
                print("Reward: %f" % reward)
                print("Best Q: %f" % best_q)
                print("Explore rate: %f" % explore_rate)
                print("Learning rate: %f" % learning_rate)
                print("Streaks: %d" % num_streaks)

                print("")

            if done:
                print("Episode %d finished after %f time steps" % (episode, t))
                if (t >= SOLVED_T):
                    num_streaks += 1
                else:
                    num_streaks = 0
                break

            #sleep(0.25)

        # It's considered done when it's solved over 120 times consecutively
        if num_streaks > STREAK_TO_END:
            break

        # Update parameters
        explore_rate = get_explore_rate(episode)
        learning_rate = get_learning_rate(episode)


def select_action(state, explore_rate):
    # Select a random action
    if random.random() < explore_rate:
        action = env.action_space.sample()
    # Select the action with the highest q
    else:
        action = np.argmax(q_table[state])
    return action


def get_explore_rate(t):
    return max(MIN_EXPLORE_RATE, min(1, 1.0 - math.log10((t+1)/25)))

def get_learning_rate(t):
    return max(MIN_LEARNING_RATE, min(0.5, 1.0 - math.log10((t+1)/25)))

def state_to_bucket(state):
    bucket_indice = []
    for i in range(len(state)):
        if state[i] <= STATE_BOUNDS[i][0]:
            bucket_index = 0
        elif state[i] >= STATE_BOUNDS[i][1]:
            bucket_index = NUM_BUCKETS[i] - 1
        else:
            # Mapping the state bounds to the bucket array
            bound_width = STATE_BOUNDS[i][1] - STATE_BOUNDS[i][0]
            offset = (NUM_BUCKETS[i]-1)*STATE_BOUNDS[i][0]/bound_width
            scaling = (NUM_BUCKETS[i]-1)/bound_width
            bucket_index = int(round(scaling*state[i] - offset))
        bucket_indice.append(bucket_index)
    return tuple(bucket_indice)

if __name__ == "__main__":
    simulate()


Episode = 0
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 0.000000
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 0
t = 1
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 0.500000
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 0
t = 2
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 0.747500
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 0
t = 3
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 1.243763
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 0
t = 4
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 1.365662
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 0
t = 5
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 0.000000
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 0
t = 6
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 0.000000
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episo



Episode = 3
t = 6
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 4.705902
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 3
t = 7
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 5.090894
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 3
t = 8
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 5.565440
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 3
t = 9
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 5.607844
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 3
t = 10
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 6.079805
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 3
t = 11
Action: 1
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 0.997500
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 3
t = 12
Action: 0
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 0.997500
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


E


Episode = 7
t = 3
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 7.745825
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 7
t = 4
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 8.070908
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 7
t = 5
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 8.368012
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 7
t = 6
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 8.677620
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 7
t = 7
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 7.560150
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 7
t = 8
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 7.560150
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 7
t = 9
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 8.022349
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episo


Episode = 9
t = 16
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 11.795986
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 9
t = 17
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 11.960758
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 9
t = 18
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 12.400954
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 9
t = 19
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 12.536465
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 9
t = 20
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 12.906028
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 9
t = 21
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 13.156716
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 9
t = 22
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 13.465588
Explore rate: 1.000000
Learning rate: 0.500000
Stre


Episode = 11
t = 18
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 12.336208
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 11
t = 19
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 13.524484
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 11
t = 20
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 13.524484
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 11
t = 21
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 13.956862
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 11
t = 22
Action: 1
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 2.166061
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 11
t = 23
Action: 1
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 2.166061
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 11
t = 24
Action: 1
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 2.501295
Explore rate: 1.000000
Learning rate: 0.500000


Best Q: 10.742584
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 14
t = 4
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 14.522840
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 14
t = 5
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 15.195318
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 14
t = 6
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 15.264886
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 14
t = 7
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 15.264886
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 14
t = 8
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 15.317539
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 14
t = 9
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 15.653778
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 14
t = 10
Action: 1
State: (0, 0, 2, 0)
Reward: 

Streaks: 0


Episode = 17
t = 3
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 18.224509
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 17
t = 4
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 18.499533
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 17
t = 5
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 18.769523
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 17
t = 6
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 19.040680
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 17
t = 7
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 17.403661
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 17
t = 8
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 17.403661
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 17
t = 9
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 17.816643
Explore rate: 1.000000
Learning rate: 0


Episode = 20
t = 6
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 16.251733
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 20
t = 7
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 16.670474
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 20
t = 8
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 16.712751
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 20
t = 9
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 12.735016
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 20
t = 10
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 12.735016
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 20
t = 11
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 12.735016
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 20
t = 12
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 19.543521
Explore rate: 1.000000
Learning rate: 0.500000
S

Best Q: 6.337809
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 21
t = 46
Action: 0
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 6.596492
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 21
t = 47
Action: 0
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 6.596492
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0

Episode 21 finished after 47.000000 time steps

Episode = 22
t = 0
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 21.761673
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 22
t = 1
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 21.814762
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 22
t = 2
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 22.374739
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 22
t = 3
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 22.374739
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 22


Episode = 24
t = 23
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 23.073163
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 24
t = 24
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 23.428211
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 24
t = 25
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 23.633546
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 24
t = 26
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 24.015378
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 24
t = 27
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 20.857721
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 24
t = 28
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 20.857721
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 24
t = 29
Action: 1
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 20.857721
Explore rate: 1.000000
Learning rate: 0.5000

t = 1
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 21.151700
Explore rate: 0.950782
Learning rate: 0.500000
Streaks: 0


Episode = 28
t = 2
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 23.961777
Explore rate: 0.950782
Learning rate: 0.500000
Streaks: 0


Episode = 28
t = 3
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 23.961777
Explore rate: 0.950782
Learning rate: 0.500000
Streaks: 0


Episode = 28
t = 4
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 24.341968
Explore rate: 0.950782
Learning rate: 0.500000
Streaks: 0


Episode = 28
t = 5
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 24.341968
Explore rate: 0.950782
Learning rate: 0.500000
Streaks: 0


Episode = 28
t = 6
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 24.720258
Explore rate: 0.950782
Learning rate: 0.500000
Streaks: 0


Episode = 28
t = 7
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 21.151700
Explore rate: 0.950782
Learning rate: 0.500000
Streaks: 0


Episo


Episode = 31
t = 15
Action: 0
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 9.180931
Explore rate: 0.906578
Learning rate: 0.500000
Streaks: 0

Episode 31 finished after 15.000000 time steps

Episode = 32
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 24.720258
Explore rate: 0.892790
Learning rate: 0.500000
Streaks: 0


Episode = 32
t = 1
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 24.448933
Explore rate: 0.892790
Learning rate: 0.500000
Streaks: 0


Episode = 32
t = 2
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 25.150550
Explore rate: 0.892790
Learning rate: 0.500000
Streaks: 0


Episode = 32
t = 3
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 28.402076
Explore rate: 0.892790
Learning rate: 0.500000
Streaks: 0


Episode = 32
t = 4
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 28.402076
Explore rate: 0.892790
Learning rate: 0.500000
Streaks: 0


Episode = 32
t = 5
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 28.402076
Expl


Episode = 34
t = 1
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 31.682155
Explore rate: 0.866461
Learning rate: 0.500000
Streaks: 0


Episode = 34
t = 2
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 31.682155
Explore rate: 0.866461
Learning rate: 0.500000
Streaks: 0


Episode = 34
t = 3
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 29.626450
Explore rate: 0.866461
Learning rate: 0.500000
Streaks: 0


Episode = 34
t = 4
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 31.176965
Explore rate: 0.866461
Learning rate: 0.500000
Streaks: 0


Episode = 34
t = 5
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 29.626450
Explore rate: 0.866461
Learning rate: 0.500000
Streaks: 0


Episode = 34
t = 6
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 29.626450
Explore rate: 0.866461
Learning rate: 0.500000
Streaks: 0


Episode = 34
t = 7
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 29.978318
Explore rate: 0.866461
Learning rate: 0.500000
Stre


Episode = 37
t = 13
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 29.829576
Explore rate: 0.829738
Learning rate: 0.500000
Streaks: 0


Episode = 37
t = 14
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 30.016448
Explore rate: 0.829738
Learning rate: 0.500000
Streaks: 0


Episode = 37
t = 15
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 30.411583
Explore rate: 0.829738
Learning rate: 0.500000
Streaks: 0


Episode = 37
t = 16
Action: 0
State: (0, 0, 1, 1)
Reward: 1.000000
Best Q: 9.099812
Explore rate: 0.829738
Learning rate: 0.500000
Streaks: 0


Episode = 37
t = 17
Action: 0
State: (0, 0, 1, 1)
Reward: 1.000000
Best Q: 9.099812
Explore rate: 0.829738
Learning rate: 0.500000
Streaks: 0


Episode = 37
t = 18
Action: 1
State: (0, 0, 1, 1)
Reward: 1.000000
Best Q: 9.554313
Explore rate: 0.829738
Learning rate: 0.500000
Streaks: 0


Episode = 37
t = 19
Action: 1
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 10.086851
Explore rate: 0.829738
Learning rate: 0.500000


Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 25.050340
Explore rate: 0.795880
Learning rate: 0.500000
Streaks: 0


Episode = 40
t = 9
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 25.050340
Explore rate: 0.795880
Learning rate: 0.500000
Streaks: 0


Episode = 40
t = 10
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 25.050340
Explore rate: 0.795880
Learning rate: 0.500000
Streaks: 0


Episode = 40
t = 11
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 25.425089
Explore rate: 0.795880
Learning rate: 0.500000
Streaks: 0


Episode = 40
t = 12
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 25.797963
Explore rate: 0.795880
Learning rate: 0.500000
Streaks: 0


Episode = 40
t = 13
Action: 1
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 10.495555
Explore rate: 0.795880
Learning rate: 0.500000
Streaks: 0


Episode = 40
t = 14
Action: 1
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 10.495555
Explore rate: 0.795880
Learning rate: 0.500000
Streaks: 0

Episode

State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 23.194396
Explore rate: 0.754487
Learning rate: 0.500000
Streaks: 0


Episode = 44
t = 8
Action: 0
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 14.320147
Explore rate: 0.754487
Learning rate: 0.500000
Streaks: 0


Episode = 44
t = 9
Action: 0
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 14.320147
Explore rate: 0.754487
Learning rate: 0.500000
Streaks: 0

Episode 44 finished after 9.000000 time steps

Episode = 45
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 34.065571
Explore rate: 0.744727
Learning rate: 0.500000
Streaks: 0


Episode = 45
t = 1
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 22.041675
Explore rate: 0.744727
Learning rate: 0.500000
Streaks: 0


Episode = 45
t = 2
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 22.041675
Explore rate: 0.744727
Learning rate: 0.500000
Streaks: 0


Episode = 45
t = 3
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 23.578424
Explore rate: 0.744727
Learning rat

Streaks: 0


Episode = 48
t = 12
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 33.035672
Explore rate: 0.716699
Learning rate: 0.500000
Streaks: 0


Episode = 48
t = 13
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 33.370494
Explore rate: 0.716699
Learning rate: 0.500000
Streaks: 0


Episode = 48
t = 14
Action: 0
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 12.331349
Explore rate: 0.716699
Learning rate: 0.500000
Streaks: 0


Episode = 48
t = 15
Action: 0
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 12.331349
Explore rate: 0.716699
Learning rate: 0.500000
Streaks: 0


Episode = 48
t = 16
Action: 0
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 12.769693
Explore rate: 0.716699
Learning rate: 0.500000
Streaks: 0

Episode 48 finished after 16.000000 time steps

Episode = 49
t = 0
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 29.279928
Explore rate: 0.707744
Learning rate: 0.500000
Streaks: 0


Episode = 49
t = 1
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best 

State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 31.274595
Explore rate: 0.698970
Learning rate: 0.500000
Streaks: 0


Episode = 50
t = 39
Action: 1
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 31.274595
Explore rate: 0.698970
Learning rate: 0.500000
Streaks: 0


Episode = 50
t = 40
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 34.289757
Explore rate: 0.698970
Learning rate: 0.500000
Streaks: 0


Episode = 50
t = 41
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 33.282541
Explore rate: 0.698970
Learning rate: 0.500000
Streaks: 0


Episode = 50
t = 42
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 34.289757
Explore rate: 0.698970
Learning rate: 0.500000
Streaks: 0


Episode = 50
t = 43
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 34.114700
Explore rate: 0.698970
Learning rate: 0.500000
Streaks: 0


Episode = 50
t = 44
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 34.114700
Explore rate: 0.698970
Learning rate: 0.500000
Streaks: 0


Episode = 50
t 


Episode = 53
t = 7
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 35.760938
Explore rate: 0.673664
Learning rate: 0.500000
Streaks: 0


Episode = 53
t = 8
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 35.760938
Explore rate: 0.673664
Learning rate: 0.500000
Streaks: 0


Episode = 53
t = 9
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 36.082133
Explore rate: 0.673664
Learning rate: 0.500000
Streaks: 0


Episode = 53
t = 10
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 36.082133
Explore rate: 0.673664
Learning rate: 0.500000
Streaks: 0


Episode = 53
t = 11
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 36.401723
Explore rate: 0.673664
Learning rate: 0.500000
Streaks: 0


Episode = 53
t = 12
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 27.096203
Explore rate: 0.673664
Learning rate: 0.500000
Streaks: 0


Episode = 53
t = 13
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 35.446382
Explore rate: 0.673664
Learning rate: 0.500000



Episode = 55
t = 12
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 34.877969
Explore rate: 0.657577
Learning rate: 0.500000
Streaks: 0


Episode = 55
t = 13
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 37.965127
Explore rate: 0.657577
Learning rate: 0.500000
Streaks: 0


Episode = 55
t = 14
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 37.965127
Explore rate: 0.657577
Learning rate: 0.500000
Streaks: 0


Episode = 55
t = 15
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 38.275302
Explore rate: 0.657577
Learning rate: 0.500000
Streaks: 0


Episode = 55
t = 16
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 36.136972
Explore rate: 0.657577
Learning rate: 0.500000
Streaks: 0


Episode = 55
t = 17
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 37.679764
Explore rate: 0.657577
Learning rate: 0.500000
Streaks: 0


Episode = 55
t = 18
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 37.219969
Explore rate: 0.657577
Learning rate: 0.5000


Episode = 56
t = 14
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 39.681572
Explore rate: 0.649752
Learning rate: 0.500000
Streaks: 0


Episode = 56
t = 15
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 39.681572
Explore rate: 0.649752
Learning rate: 0.500000
Streaks: 0


Episode = 56
t = 16
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 39.681572
Explore rate: 0.649752
Learning rate: 0.500000
Streaks: 0


Episode = 56
t = 17
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 39.983164
Explore rate: 0.649752
Learning rate: 0.500000
Streaks: 0


Episode = 56
t = 18
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 40.107123
Explore rate: 0.649752
Learning rate: 0.500000
Streaks: 0


Episode = 56
t = 19
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 40.107123
Explore rate: 0.649752
Learning rate: 0.500000
Streaks: 0


Episode = 56
t = 20
Action: 1
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 16.611423
Explore rate: 0.649752
Learning rate: 0.5000


Episode = 59
t = 23
Action: 0
State: (0, 0, 1, 1)
Reward: 1.000000
Best Q: 16.526975
Explore rate: 0.627088
Learning rate: 0.500000
Streaks: 0


Episode = 59
t = 24
Action: 1
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 17.238119
Explore rate: 0.627088
Learning rate: 0.500000
Streaks: 0


Episode = 59
t = 25
Action: 0
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 17.238119
Explore rate: 0.627088
Learning rate: 0.500000
Streaks: 0


Episode = 59
t = 26
Action: 0
State: (0, 0, 1, 1)
Reward: 1.000000
Best Q: 17.296356
Explore rate: 0.627088
Learning rate: 0.500000
Streaks: 0

Episode 59 finished after 26.000000 time steps

Episode = 60
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 37.409307
Explore rate: 0.619789
Learning rate: 0.500000
Streaks: 0


Episode = 60
t = 1
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 41.121270
Explore rate: 0.619789
Learning rate: 0.500000
Streaks: 0


Episode = 60
t = 2
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 41.121270



Episode = 62
t = 6
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 45.020716
Explore rate: 0.605548
Learning rate: 0.500000
Streaks: 0


Episode = 62
t = 7
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 45.295613
Explore rate: 0.605548
Learning rate: 0.500000
Streaks: 0


Episode = 62
t = 8
Action: 0
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 19.256448
Explore rate: 0.605548
Learning rate: 0.500000
Streaks: 0


Episode = 62
t = 9
Action: 0
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 19.256448
Explore rate: 0.605548
Learning rate: 0.500000
Streaks: 0

Episode 62 finished after 9.000000 time steps

Episode = 63
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 40.354181
Explore rate: 0.598599
Learning rate: 0.500000
Streaks: 0


Episode = 63
t = 1
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 40.538710
Explore rate: 0.598599
Learning rate: 0.500000
Streaks: 0


Episode = 63
t = 2
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 40.892866
Explo


Episode = 65
t = 9
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 47.953130
Explore rate: 0.585027
Learning rate: 0.500000
Streaks: 0


Episode = 65
t = 10
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 48.116147
Explore rate: 0.585027
Learning rate: 0.500000
Streaks: 0


Episode = 65
t = 11
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 48.116147
Explore rate: 0.585027
Learning rate: 0.500000
Streaks: 0


Episode = 65
t = 12
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 48.116147
Explore rate: 0.585027
Learning rate: 0.500000
Streaks: 0


Episode = 65
t = 13
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 48.294058
Explore rate: 0.585027
Learning rate: 0.500000
Streaks: 0


Episode = 65
t = 14
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 48.294058
Explore rate: 0.585027
Learning rate: 0.500000
Streaks: 0


Episode = 65
t = 15
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 48.294058
Explore rate: 0.585027
Learning rate: 0.50000

t = 12
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 45.353620
Explore rate: 0.559091
Learning rate: 0.500000
Streaks: 0


Episode = 69
t = 13
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 45.533808
Explore rate: 0.559091
Learning rate: 0.500000
Streaks: 0


Episode = 69
t = 14
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 45.716045
Explore rate: 0.559091
Learning rate: 0.500000
Streaks: 0


Episode = 69
t = 15
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 45.896346
Explore rate: 0.559091
Learning rate: 0.500000
Streaks: 0


Episode = 69
t = 16
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 41.635078
Explore rate: 0.559091
Learning rate: 0.500000
Streaks: 0


Episode = 69
t = 17
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 48.018011
Explore rate: 0.559091
Learning rate: 0.500000
Streaks: 0


Episode = 69
t = 18
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 45.086454
Explore rate: 0.559091
Learning rate: 0.500000
Streaks: 0



Episode = 71
t = 20
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 45.354101
Explore rate: 0.546682
Learning rate: 0.500000
Streaks: 0


Episode = 71
t = 21
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 45.314300
Explore rate: 0.546682
Learning rate: 0.500000
Streaks: 0


Episode = 71
t = 22
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 45.607629
Explore rate: 0.546682
Learning rate: 0.500000
Streaks: 0


Episode = 71
t = 23
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 45.607629
Explore rate: 0.546682
Learning rate: 0.500000
Streaks: 0


Episode = 71
t = 24
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 45.732927
Explore rate: 0.546682
Learning rate: 0.500000
Streaks: 0


Episode = 71
t = 25
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 45.941613
Explore rate: 0.546682
Learning rate: 0.500000
Streaks: 0


Episode = 71
t = 26
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 46.107562
Explore rate: 0.546682
Learning rate: 0.5000


Episode = 74
t = 9
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 47.206230
Explore rate: 0.528708
Learning rate: 0.500000
Streaks: 0


Episode = 74
t = 10
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 43.705095
Explore rate: 0.528708
Learning rate: 0.500000
Streaks: 0


Episode = 74
t = 11
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 47.206230
Explore rate: 0.528708
Learning rate: 0.500000
Streaks: 0


Episode = 74
t = 12
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 47.206230
Explore rate: 0.528708
Learning rate: 0.500000
Streaks: 0


Episode = 74
t = 13
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 47.470199
Explore rate: 0.528708
Learning rate: 0.500000
Streaks: 0


Episode = 74
t = 14
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 47.541826
Explore rate: 0.528708
Learning rate: 0.500000
Streaks: 0


Episode = 74
t = 15
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 47.899628
Explore rate: 0.528708
Learning rate: 0.50000


Episode = 76
t = 29
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 44.860174
Explore rate: 0.517126
Learning rate: 0.500000
Streaks: 0


Episode = 76
t = 30
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 45.135873
Explore rate: 0.517126
Learning rate: 0.500000
Streaks: 0


Episode = 76
t = 31
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 45.410194
Explore rate: 0.517126
Learning rate: 0.500000
Streaks: 0


Episode = 76
t = 32
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 46.496858
Explore rate: 0.517126
Learning rate: 0.500000
Streaks: 0


Episode = 76
t = 33
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 46.357516
Explore rate: 0.517126
Learning rate: 0.500000
Streaks: 0


Episode = 76
t = 34
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 46.695399
Explore rate: 0.517126
Learning rate: 0.500000
Streaks: 0


Episode = 76
t = 35
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 46.792981
Explore rate: 0.517126
Learning rate: 0.5000


Episode = 77
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 48.025951
Explore rate: 0.511449
Learning rate: 0.500000
Streaks: 0


Episode = 77
t = 1
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 44.825759
Explore rate: 0.511449
Learning rate: 0.500000
Streaks: 0


Episode = 77
t = 2
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 49.422781
Explore rate: 0.511449
Learning rate: 0.500000
Streaks: 0


Episode = 77
t = 3
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 44.825759
Explore rate: 0.511449
Learning rate: 0.500000
Streaks: 0


Episode = 77
t = 4
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 44.825759
Explore rate: 0.511449
Learning rate: 0.500000
Streaks: 0


Episode = 77
t = 5
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 45.101630
Explore rate: 0.511449
Learning rate: 0.500000
Streaks: 0


Episode = 77
t = 6
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 45.101630
Explore rate: 0.511449
Learning rate: 0.500000
Stre


Episode = 79
t = 1
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 49.398279
Explore rate: 0.500313
Learning rate: 0.500000
Streaks: 0


Episode = 79
t = 2
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 49.398279
Explore rate: 0.500313
Learning rate: 0.500000
Streaks: 0


Episode = 79
t = 3
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 47.859244
Explore rate: 0.500313
Learning rate: 0.500000
Streaks: 0


Episode = 79
t = 4
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 49.294105
Explore rate: 0.500313
Learning rate: 0.500000
Streaks: 0


Episode = 79
t = 5
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 48.637542
Explore rate: 0.500313
Learning rate: 0.500000
Streaks: 0


Episode = 79
t = 6
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 48.637542
Explore rate: 0.500313
Learning rate: 0.500000
Streaks: 0


Episode = 79
t = 7
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 47.269974
Explore rate: 0.500313
Learning rate: 0.500000
Stre


Episode = 80
t = 44
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 49.241225
Explore rate: 0.494850
Learning rate: 0.494850
Streaks: 0


Episode = 80
t = 45
Action: 1
State: (0, 0, 4, 1)
Reward: 1.000000
Best Q: 12.206998
Explore rate: 0.494850
Learning rate: 0.494850
Streaks: 0


Episode = 80
t = 46
Action: 0
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 17.482614
Explore rate: 0.494850
Learning rate: 0.494850
Streaks: 0


Episode = 80
t = 47
Action: 1
State: (0, 0, 4, 1)
Reward: 1.000000
Best Q: 15.225974
Explore rate: 0.494850
Learning rate: 0.494850
Streaks: 0


Episode = 80
t = 48
Action: 0
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 16.785420
Explore rate: 0.494850
Learning rate: 0.494850
Streaks: 0


Episode = 80
t = 49
Action: 1
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 16.785420
Explore rate: 0.494850
Learning rate: 0.494850
Streaks: 0

Episode 80 finished after 49.000000 time steps

Episode = 81
t = 0
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 51.76613


Episode = 83
t = 21
Action: 0
State: (0, 0, 1, 1)
Reward: 1.000000
Best Q: 24.781792
Explore rate: 0.478862
Learning rate: 0.478862
Streaks: 0


Episode = 83
t = 22
Action: 1
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 25.392135
Explore rate: 0.478862
Learning rate: 0.478862
Streaks: 0


Episode = 83
t = 23
Action: 1
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 25.392135
Explore rate: 0.478862
Learning rate: 0.478862
Streaks: 0

Episode 83 finished after 23.000000 time steps

Episode = 84
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 48.394550
Explore rate: 0.473661
Learning rate: 0.473661
Streaks: 0


Episode = 84
t = 1
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 48.570915
Explore rate: 0.473661
Learning rate: 0.473661
Streaks: 0


Episode = 84
t = 2
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 54.930962
Explore rate: 0.473661
Learning rate: 0.473661
Streaks: 0


Episode = 84
t = 3
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 40.245349
E


Episode = 87
t = 1
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 51.818708
Explore rate: 0.458421
Learning rate: 0.458421
Streaks: 0


Episode = 87
t = 2
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 51.818708
Explore rate: 0.458421
Learning rate: 0.458421
Streaks: 0


Episode = 87
t = 3
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 52.039582
Explore rate: 0.458421
Learning rate: 0.458421
Streaks: 0


Episode = 87
t = 4
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 52.259442
Explore rate: 0.458421
Learning rate: 0.458421
Streaks: 0


Episode = 87
t = 5
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 52.478295
Explore rate: 0.458421
Learning rate: 0.458421
Streaks: 0


Episode = 87
t = 6
Action: 1
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 27.270250
Explore rate: 0.458421
Learning rate: 0.458421
Streaks: 0


Episode = 87
t = 7
Action: 1
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 27.270250
Explore rate: 0.458421
Learning rate: 0.458421
Stre


Episode = 88
t = 52
Action: 1
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 27.830514
Explore rate: 0.453457
Learning rate: 0.453457
Streaks: 0


Episode = 88
t = 53
Action: 0
State: (0, 0, 1, 1)
Reward: 1.000000
Best Q: 27.691860
Explore rate: 0.453457
Learning rate: 0.453457
Streaks: 0

Episode 88 finished after 53.000000 time steps

Episode = 89
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 51.340619
Explore rate: 0.448550
Learning rate: 0.448550
Streaks: 0


Episode = 89
t = 1
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 50.891152
Explore rate: 0.448550
Learning rate: 0.448550
Streaks: 0


Episode = 89
t = 2
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 51.479649
Explore rate: 0.448550
Learning rate: 0.448550
Streaks: 0


Episode = 89
t = 3
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 51.372760
Explore rate: 0.448550
Learning rate: 0.448550
Streaks: 0


Episode = 89
t = 4
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 51.649821
Ex


Episode = 89
t = 58
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 53.732336
Explore rate: 0.448550
Learning rate: 0.448550
Streaks: 0


Episode = 89
t = 59
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 54.027648
Explore rate: 0.448550
Learning rate: 0.448550
Streaks: 0


Episode = 89
t = 60
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 54.071007
Explore rate: 0.448550
Learning rate: 0.448550
Streaks: 0


Episode = 89
t = 61
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 54.819751
Explore rate: 0.448550
Learning rate: 0.448550
Streaks: 0


Episode = 89
t = 62
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 54.819751
Explore rate: 0.448550
Learning rate: 0.448550
Streaks: 0


Episode = 89
t = 63
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 55.022407
Explore rate: 0.448550
Learning rate: 0.448550
Streaks: 0


Episode = 89
t = 64
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 55.224154
Explore rate: 0.448550
Learning rate: 0.4485


Episode = 89
t = 118
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 56.896248
Explore rate: 0.448550
Learning rate: 0.448550
Streaks: 0


Episode = 89
t = 119
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 56.896248
Explore rate: 0.448550
Learning rate: 0.448550
Streaks: 0


Episode = 89
t = 120
Action: 0
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 17.598078
Explore rate: 0.448550
Learning rate: 0.448550
Streaks: 0


Episode = 89
t = 121
Action: 1
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 17.598078
Explore rate: 0.448550
Learning rate: 0.448550
Streaks: 0


Episode = 89
t = 122
Action: 1
State: (0, 0, 4, 1)
Reward: 1.000000
Best Q: 16.409453
Explore rate: 0.448550
Learning rate: 0.448550
Streaks: 0


Episode = 89
t = 123
Action: 0
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 17.643689
Explore rate: 0.448550
Learning rate: 0.448550
Streaks: 0


Episode = 89
t = 124
Action: 1
State: (0, 0, 4, 1)
Reward: 1.000000
Best Q: 17.332479
Explore rate: 0.448550
Learning rate:


Episode = 91
t = 10
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 57.388766
Explore rate: 0.438899
Learning rate: 0.438899
Streaks: 0


Episode = 91
t = 11
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 57.388766
Explore rate: 0.438899
Learning rate: 0.438899
Streaks: 0


Episode = 91
t = 12
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 56.568643
Explore rate: 0.438899
Learning rate: 0.438899
Streaks: 0


Episode = 91
t = 13
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 57.324372
Explore rate: 0.438899
Learning rate: 0.438899
Streaks: 0


Episode = 91
t = 14
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 57.324372
Explore rate: 0.438899
Learning rate: 0.438899
Streaks: 0


Episode = 91
t = 15
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 57.324372
Explore rate: 0.438899
Learning rate: 0.438899
Streaks: 0


Episode = 91
t = 16
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 57.087634
Explore rate: 0.438899
Learning rate: 0.4388


Episode = 92
t = 38
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 57.397115
Explore rate: 0.434152
Learning rate: 0.434152
Streaks: 0


Episode = 92
t = 39
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 57.397115
Explore rate: 0.434152
Learning rate: 0.434152
Streaks: 0


Episode = 92
t = 40
Action: 0
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 57.397115
Explore rate: 0.434152
Learning rate: 0.434152
Streaks: 0


Episode = 92
t = 41
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 57.626781
Explore rate: 0.434152
Learning rate: 0.434152
Streaks: 0


Episode = 92
t = 42
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 57.626781
Explore rate: 0.434152
Learning rate: 0.434152
Streaks: 0


Episode = 92
t = 43
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 56.558116
Explore rate: 0.434152
Learning rate: 0.434152
Streaks: 0


Episode = 92
t = 44
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 57.849547
Explore rate: 0.434152
Learning rate: 0.4341


Episode = 94
t = 24
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 56.452550
Explore rate: 0.424812
Learning rate: 0.424812
Streaks: 0


Episode = 94
t = 25
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 56.452550
Explore rate: 0.424812
Learning rate: 0.424812
Streaks: 0


Episode = 94
t = 26
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 56.637545
Explore rate: 0.424812
Learning rate: 0.424812
Streaks: 0


Episode = 94
t = 27
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 57.785449
Explore rate: 0.424812
Learning rate: 0.424812
Streaks: 0


Episode = 94
t = 28
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 58.831305
Explore rate: 0.424812
Learning rate: 0.424812
Streaks: 0


Episode = 94
t = 29
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 58.831305
Explore rate: 0.424812
Learning rate: 0.424812
Streaks: 0


Episode = 94
t = 30
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 59.006195
Explore rate: 0.424812
Learning rate: 0.4248


Episode = 94
t = 84
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 58.809073
Explore rate: 0.424812
Learning rate: 0.424812
Streaks: 0


Episode = 94
t = 85
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 58.887959
Explore rate: 0.424812
Learning rate: 0.424812
Streaks: 0


Episode = 94
t = 86
Action: 1
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 28.926891
Explore rate: 0.424812
Learning rate: 0.424812
Streaks: 0


Episode = 94
t = 87
Action: 1
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 28.926891
Explore rate: 0.424812
Learning rate: 0.424812
Streaks: 0


Episode = 94
t = 88
Action: 0
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 28.926891
Explore rate: 0.424812
Learning rate: 0.424812
Streaks: 0


Episode = 94
t = 89
Action: 0
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 29.228818
Explore rate: 0.424812
Learning rate: 0.424812
Streaks: 0

Episode 94 finished after 89.000000 time steps

Episode = 95
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 59.48800


Episode = 95
t = 54
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 58.053180
Explore rate: 0.420216
Learning rate: 0.420216
Streaks: 0


Episode = 95
t = 55
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 60.549755
Explore rate: 0.420216
Learning rate: 0.420216
Streaks: 0


Episode = 95
t = 56
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 60.549755
Explore rate: 0.420216
Learning rate: 0.420216
Streaks: 0


Episode = 95
t = 57
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 60.715532
Explore rate: 0.420216
Learning rate: 0.420216
Streaks: 0


Episode = 95
t = 58
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 60.880612
Explore rate: 0.420216
Learning rate: 0.420216
Streaks: 0


Episode = 95
t = 59
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 60.130494
Explore rate: 0.420216
Learning rate: 0.420216
Streaks: 0


Episode = 95
t = 60
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 60.130494
Explore rate: 0.420216
Learning rate: 0.4202


Episode = 95
t = 114
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 61.323692
Explore rate: 0.420216
Learning rate: 0.420216
Streaks: 0


Episode = 95
t = 115
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 61.772192
Explore rate: 0.420216
Learning rate: 0.420216
Streaks: 0


Episode = 95
t = 116
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 61.772192
Explore rate: 0.420216
Learning rate: 0.420216
Streaks: 0


Episode = 95
t = 117
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 61.932832
Explore rate: 0.420216
Learning rate: 0.420216
Streaks: 0


Episode = 95
t = 118
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 61.643827
Explore rate: 0.420216
Learning rate: 0.420216
Streaks: 0


Episode = 95
t = 119
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 62.065311
Explore rate: 0.420216
Learning rate: 0.420216
Streaks: 0


Episode = 95
t = 120
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 61.980349
Explore rate: 0.420216
Learning rate:


Episode = 95
t = 174
Action: 1
State: (0, 0, 4, 1)
Reward: 1.000000
Best Q: 18.724014
Explore rate: 0.420216
Learning rate: 0.420216
Streaks: 0


Episode = 95
t = 175
Action: 0
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 19.112273
Explore rate: 0.420216
Learning rate: 0.420216
Streaks: 0


Episode = 95
t = 176
Action: 1
State: (0, 0, 4, 1)
Reward: 1.000000
Best Q: 19.227071
Explore rate: 0.420216
Learning rate: 0.420216
Streaks: 0

Episode 95 finished after 176.000000 time steps

Episode = 96
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 62.941306
Explore rate: 0.415669
Learning rate: 0.415669
Streaks: 0


Episode = 96
t = 1
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 62.756023
Explore rate: 0.415669
Learning rate: 0.415669
Streaks: 0


Episode = 96
t = 2
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 63.109113
Explore rate: 0.415669
Learning rate: 0.415669
Streaks: 0


Episode = 96
t = 3
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 63.1091


Episode = 98
t = 13
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 55.411355
Explore rate: 0.406714
Learning rate: 0.406714
Streaks: 0


Episode = 98
t = 14
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 55.411355
Explore rate: 0.406714
Learning rate: 0.406714
Streaks: 0


Episode = 98
t = 15
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 55.411355
Explore rate: 0.406714
Learning rate: 0.406714
Streaks: 0


Episode = 98
t = 16
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 55.411355
Explore rate: 0.406714
Learning rate: 0.406714
Streaks: 0


Episode = 98
t = 17
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 55.411355
Explore rate: 0.406714
Learning rate: 0.406714
Streaks: 0


Episode = 98
t = 18
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 55.592704
Explore rate: 0.406714
Learning rate: 0.406714
Streaks: 0


Episode = 98
t = 19
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 55.773314
Explore rate: 0.406714
Learning rate: 0.4067


Episode = 98
t = 73
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 65.356364
Explore rate: 0.406714
Learning rate: 0.406714
Streaks: 0


Episode = 98
t = 74
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 65.356364
Explore rate: 0.406714
Learning rate: 0.406714
Streaks: 0


Episode = 98
t = 75
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 65.497265
Explore rate: 0.406714
Learning rate: 0.406714
Streaks: 0


Episode = 98
t = 76
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 65.187207
Explore rate: 0.406714
Learning rate: 0.406714
Streaks: 0


Episode = 98
t = 77
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 65.596003
Explore rate: 0.406714
Learning rate: 0.406714
Streaks: 0


Episode = 98
t = 78
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 65.596003
Explore rate: 0.406714
Learning rate: 0.406714
Streaks: 0


Episode = 98
t = 79
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 65.735929
Explore rate: 0.406714
Learning rate: 0.4067


Episode = 99
t = 44
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 65.575739
Explore rate: 0.402305
Learning rate: 0.402305
Streaks: 0


Episode = 99
t = 45
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 65.575739
Explore rate: 0.402305
Learning rate: 0.402305
Streaks: 0


Episode = 99
t = 46
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 65.714230
Explore rate: 0.402305
Learning rate: 0.402305
Streaks: 0


Episode = 99
t = 47
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 65.714230
Explore rate: 0.402305
Learning rate: 0.402305
Streaks: 0


Episode = 99
t = 48
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 65.986710
Explore rate: 0.402305
Learning rate: 0.402305
Streaks: 0


Episode = 99
t = 49
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 65.986710
Explore rate: 0.402305
Learning rate: 0.402305
Streaks: 0


Episode = 99
t = 50
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 66.123547
Explore rate: 0.402305
Learning rate: 0.4023


Episode = 100
t = 6
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 65.660645
Explore rate: 0.397940
Learning rate: 0.397940
Streaks: 0


Episode = 100
t = 7
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 65.797295
Explore rate: 0.397940
Learning rate: 0.397940
Streaks: 0


Episode = 100
t = 8
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 65.797295
Explore rate: 0.397940
Learning rate: 0.397940
Streaks: 0


Episode = 100
t = 9
Action: 1
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 29.816848
Explore rate: 0.397940
Learning rate: 0.397940
Streaks: 0


Episode = 100
t = 10
Action: 0
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 29.816848
Explore rate: 0.397940
Learning rate: 0.397940
Streaks: 0

Episode 100 finished after 10.000000 time steps

Episode = 101
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 66.731515
Explore rate: 0.393619
Learning rate: 0.393619
Streaks: 0


Episode = 101
t = 1
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 63.72


Episode = 101
t = 55
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 66.297519
Explore rate: 0.393619
Learning rate: 0.393619
Streaks: 0


Episode = 101
t = 56
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 66.430178
Explore rate: 0.393619
Learning rate: 0.393619
Streaks: 0


Episode = 101
t = 57
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 66.562315
Explore rate: 0.393619
Learning rate: 0.393619
Streaks: 0


Episode = 101
t = 58
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 66.562315
Explore rate: 0.393619
Learning rate: 0.393619
Streaks: 0


Episode = 101
t = 59
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 66.562315
Explore rate: 0.393619
Learning rate: 0.393619
Streaks: 0


Episode = 101
t = 60
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 66.693932
Explore rate: 0.393619
Learning rate: 0.393619
Streaks: 0


Episode = 101
t = 61
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 66.169854
Explore rate: 0.393619
Learning rate:


Episode = 104
t = 4
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 67.340342
Explore rate: 0.380907
Learning rate: 0.380907
Streaks: 0


Episode = 104
t = 5
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 67.464745
Explore rate: 0.380907
Learning rate: 0.380907
Streaks: 0


Episode = 104
t = 6
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 67.464745
Explore rate: 0.380907
Learning rate: 0.380907
Streaks: 0


Episode = 104
t = 7
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 67.464745
Explore rate: 0.380907
Learning rate: 0.380907
Streaks: 0


Episode = 104
t = 8
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 67.198659
Explore rate: 0.380907
Learning rate: 0.380907
Streaks: 0


Episode = 104
t = 9
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 67.198659
Explore rate: 0.380907
Learning rate: 0.380907
Streaks: 0


Episode = 104
t = 10
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 67.323601
Explore rate: 0.380907
Learning rate: 0.380


Episode = 106
t = 15
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 66.702596
Explore rate: 0.372634
Learning rate: 0.372634
Streaks: 0


Episode = 106
t = 16
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 66.826674
Explore rate: 0.372634
Learning rate: 0.372634
Streaks: 0


Episode = 106
t = 17
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 67.341468
Explore rate: 0.372634
Learning rate: 0.372634
Streaks: 0


Episode = 106
t = 18
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 68.329988
Explore rate: 0.372634
Learning rate: 0.372634
Streaks: 0


Episode = 106
t = 19
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 68.329988
Explore rate: 0.372634
Learning rate: 0.372634
Streaks: 0


Episode = 106
t = 20
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 68.448001
Explore rate: 0.372634
Learning rate: 0.372634
Streaks: 0


Episode = 106
t = 21
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 67.778586
Explore rate: 0.372634
Learning rate:


Episode = 109
t = 4
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 53.120618
Explore rate: 0.360514
Learning rate: 0.360514
Streaks: 0


Episode = 109
t = 5
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 53.120618
Explore rate: 0.360514
Learning rate: 0.360514
Streaks: 0


Episode = 109
t = 6
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 53.289625
Explore rate: 0.360514
Learning rate: 0.360514
Streaks: 0


Episode = 109
t = 7
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 53.458022
Explore rate: 0.360514
Learning rate: 0.360514
Streaks: 0


Episode = 109
t = 8
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 65.076066
Explore rate: 0.360514
Learning rate: 0.360514
Streaks: 0


Episode = 109
t = 9
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 65.076066
Explore rate: 0.360514
Learning rate: 0.360514
Streaks: 0


Episode = 109
t = 10
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 65.201971
Explore rate: 0.360514
Learning rate: 0.360


Episode = 109
t = 64
Action: 1
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 22.235128
Explore rate: 0.360514
Learning rate: 0.360514
Streaks: 0


Episode = 109
t = 65
Action: 1
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 22.235128
Explore rate: 0.360514
Learning rate: 0.360514
Streaks: 0


Episode = 109
t = 66
Action: 1
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 22.235128
Explore rate: 0.360514
Learning rate: 0.360514
Streaks: 0

Episode 109 finished after 66.000000 time steps

Episode = 110
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 67.989739
Explore rate: 0.356547
Learning rate: 0.356547
Streaks: 0


Episode = 110
t = 1
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 66.496242
Explore rate: 0.356547
Learning rate: 0.356547
Streaks: 0


Episode = 110
t = 2
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 67.650132
Explore rate: 0.356547
Learning rate: 0.356547
Streaks: 0


Episode = 110
t = 3
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 60.

Episode = 111
t = 38
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 64.599472
Explore rate: 0.352617
Learning rate: 0.352617
Streaks: 0


Episode = 111
t = 39
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 64.724300
Explore rate: 0.352617
Learning rate: 0.352617
Streaks: 0


Episode = 111
t = 40
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 66.908905
Explore rate: 0.352617
Learning rate: 0.352617
Streaks: 0


Episode = 111
t = 41
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 66.908905
Explore rate: 0.352617
Learning rate: 0.352617
Streaks: 0


Episode = 111
t = 42
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 66.908905
Explore rate: 0.352617
Learning rate: 0.352617
Streaks: 0


Episode = 111
t = 43
Action: 1
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 66.908905
Explore rate: 0.352617
Learning rate: 0.352617
Streaks: 0


Episode = 111
t = 44
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 58.807032
Explore rate: 0.352617
Learning rate: 


Episode = 113
t = 1
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 66.967768
Explore rate: 0.344862
Learning rate: 0.344862
Streaks: 0


Episode = 113
t = 2
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 66.967768
Explore rate: 0.344862
Learning rate: 0.344862
Streaks: 0


Episode = 113
t = 3
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 59.945691
Explore rate: 0.344862
Learning rate: 0.344862
Streaks: 0


Episode = 113
t = 4
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 60.836117
Explore rate: 0.344862
Learning rate: 0.344862
Streaks: 0


Episode = 113
t = 5
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 60.387826
Explore rate: 0.344862
Learning rate: 0.344862
Streaks: 0


Episode = 113
t = 6
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 60.818126
Explore rate: 0.344862
Learning rate: 0.344862
Streaks: 0


Episode = 113
t = 7
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 60.671343
Explore rate: 0.344862
Learning rate: 0.3448


Episode = 114
t = 41
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 59.312473
Explore rate: 0.341035
Learning rate: 0.341035
Streaks: 0


Episode = 114
t = 42
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 62.431389
Explore rate: 0.341035
Learning rate: 0.341035
Streaks: 0


Episode = 114
t = 43
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 60.595693
Explore rate: 0.341035
Learning rate: 0.341035
Streaks: 0


Episode = 114
t = 44
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 62.431389
Explore rate: 0.341035
Learning rate: 0.341035
Streaks: 0


Episode = 114
t = 45
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 62.431389
Explore rate: 0.341035
Learning rate: 0.341035
Streaks: 0


Episode = 114
t = 46
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 62.559511
Explore rate: 0.341035
Learning rate: 0.341035
Streaks: 0


Episode = 114
t = 47
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 65.298895
Explore rate: 0.341035
Learning rate:


Episode = 114
t = 101
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 66.876642
Explore rate: 0.341035
Learning rate: 0.341035
Streaks: 0


Episode = 114
t = 102
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 67.004970
Explore rate: 0.341035
Learning rate: 0.341035
Streaks: 0


Episode = 114
t = 103
Action: 1
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 22.512397
Explore rate: 0.341035
Learning rate: 0.341035
Streaks: 0


Episode = 114
t = 104
Action: 0
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 22.512397
Explore rate: 0.341035
Learning rate: 0.341035
Streaks: 0


Episode = 114
t = 105
Action: 0
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 22.776657
Explore rate: 0.341035
Learning rate: 0.341035
Streaks: 0


Episode = 114
t = 106
Action: 0
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 23.040016
Explore rate: 0.341035
Learning rate: 0.341035
Streaks: 0

Episode 114 finished after 106.000000 time steps

Episode = 115
t = 0
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
B


Episode = 115
t = 54
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 65.479028
Explore rate: 0.337242
Learning rate: 0.337242
Streaks: 0


Episode = 115
t = 55
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 65.595447
Explore rate: 0.337242
Learning rate: 0.337242
Streaks: 0


Episode = 115
t = 56
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 67.237033
Explore rate: 0.337242
Learning rate: 0.337242
Streaks: 0


Episode = 115
t = 57
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 66.336447
Explore rate: 0.337242
Learning rate: 0.337242
Streaks: 0


Episode = 115
t = 58
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 67.046846
Explore rate: 0.337242
Learning rate: 0.337242
Streaks: 0


Episode = 115
t = 59
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 65.980004
Explore rate: 0.337242
Learning rate: 0.337242
Streaks: 0


Episode = 115
t = 60
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 65.980004
Explore rate: 0.337242
Learning rate:


Episode = 115
t = 114
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 66.149396
Explore rate: 0.337242
Learning rate: 0.337242
Streaks: 0


Episode = 115
t = 115
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 66.263555
Explore rate: 0.337242
Learning rate: 0.337242
Streaks: 0


Episode = 115
t = 116
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 66.705157
Explore rate: 0.337242
Learning rate: 0.337242
Streaks: 0


Episode = 115
t = 117
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 66.600170
Explore rate: 0.337242
Learning rate: 0.337242
Streaks: 0


Episode = 115
t = 118
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 66.782389
Explore rate: 0.337242
Learning rate: 0.337242
Streaks: 0


Episode = 115
t = 119
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 66.782389
Explore rate: 0.337242
Learning rate: 0.337242
Streaks: 0


Episode = 115
t = 120
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 64.574766
Explore rate: 0.337242
Learnin

Episode = 117
t = 29
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 65.933589
Explore rate: 0.329754
Learning rate: 0.329754
Streaks: 0


Episode = 117
t = 30
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 65.074520
Explore rate: 0.329754
Learning rate: 0.329754
Streaks: 0


Episode = 117
t = 31
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 65.074520
Explore rate: 0.329754
Learning rate: 0.329754
Streaks: 0


Episode = 117
t = 32
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 65.074520
Explore rate: 0.329754
Learning rate: 0.329754
Streaks: 0


Episode = 117
t = 33
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 65.189688
Explore rate: 0.329754
Learning rate: 0.329754
Streaks: 0


Episode = 117
t = 34
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 65.189688
Explore rate: 0.329754
Learning rate: 0.329754
Streaks: 0


Episode = 117
t = 35
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 65.304476
Explore rate: 0.329754
Learning rate: 


Episode = 118
t = 39
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 64.140424
Explore rate: 0.326058
Learning rate: 0.326058
Streaks: 0


Episode = 118
t = 40
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 64.257347
Explore rate: 0.326058
Learning rate: 0.326058
Streaks: 0


Episode = 118
t = 41
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 66.975036
Explore rate: 0.326058
Learning rate: 0.326058
Streaks: 0


Episode = 118
t = 42
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 65.329694
Explore rate: 0.326058
Learning rate: 0.326058
Streaks: 0


Episode = 118
t = 43
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 66.551604
Explore rate: 0.326058
Learning rate: 0.326058
Streaks: 0


Episode = 118
t = 44
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 66.551604
Explore rate: 0.326058
Learning rate: 0.326058
Streaks: 0


Episode = 118
t = 45
Action: 1
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 66.551604
Explore rate: 0.326058
Learning rate:


Episode = 118
t = 99
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 67.025365
Explore rate: 0.326058
Learning rate: 0.326058
Streaks: 0


Episode = 118
t = 100
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 65.935799
Explore rate: 0.326058
Learning rate: 0.326058
Streaks: 0


Episode = 118
t = 101
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 65.935799
Explore rate: 0.326058
Learning rate: 0.326058
Streaks: 0


Episode = 118
t = 102
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 67.059296
Explore rate: 0.326058
Learning rate: 0.326058
Streaks: 0


Episode = 118
t = 103
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 67.059296
Explore rate: 0.326058
Learning rate: 0.326058
Streaks: 0


Episode = 118
t = 104
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 67.166702
Explore rate: 0.326058
Learning rate: 0.326058
Streaks: 0


Episode = 118
t = 105
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 67.506419
Explore rate: 0.326058
Learning


Episode = 118
t = 159
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 68.159750
Explore rate: 0.326058
Learning rate: 0.326058
Streaks: 0


Episode = 118
t = 160
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 67.153777
Explore rate: 0.326058
Learning rate: 0.326058
Streaks: 0


Episode = 118
t = 161
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 68.159750
Explore rate: 0.326058
Learning rate: 0.326058
Streaks: 0


Episode = 118
t = 162
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 68.159750
Explore rate: 0.326058
Learning rate: 0.326058
Streaks: 0


Episode = 118
t = 163
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 68.263567
Explore rate: 0.326058
Learning rate: 0.326058
Streaks: 0


Episode = 118
t = 164
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 67.969992
Explore rate: 0.326058
Learning rate: 0.326058
Streaks: 0


Episode = 118
t = 165
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 68.365165
Explore rate: 0.326058
Learnin


Episode = 119
t = 19
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 68.461573
Explore rate: 0.322393
Learning rate: 0.322393
Streaks: 1


Episode = 119
t = 20
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 69.276509
Explore rate: 0.322393
Learning rate: 0.322393
Streaks: 1


Episode = 119
t = 21
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 69.276509
Explore rate: 0.322393
Learning rate: 0.322393
Streaks: 1


Episode = 119
t = 22
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 69.375560
Explore rate: 0.322393
Learning rate: 0.322393
Streaks: 1


Episode = 119
t = 23
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 69.009471
Explore rate: 0.322393
Learning rate: 0.322393
Streaks: 1


Episode = 119
t = 24
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 69.424347
Explore rate: 0.322393
Learning rate: 0.322393
Streaks: 1


Episode = 119
t = 25
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 69.241798
Explore rate: 0.322393
Learning rate:


Episode = 120
t = 1
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 69.607265
Explore rate: 0.318759
Learning rate: 0.318759
Streaks: 0


Episode = 120
t = 2
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 69.704144
Explore rate: 0.318759
Learning rate: 0.318759
Streaks: 0


Episode = 120
t = 3
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 69.389301
Explore rate: 0.318759
Learning rate: 0.318759
Streaks: 0


Episode = 120
t = 4
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 69.767148
Explore rate: 0.318759
Learning rate: 0.318759
Streaks: 0


Episode = 120
t = 5
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 69.606113
Explore rate: 0.318759
Learning rate: 0.318759
Streaks: 0


Episode = 120
t = 6
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 70.127285
Explore rate: 0.318759
Learning rate: 0.318759
Streaks: 0


Episode = 120
t = 7
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 61.583521
Explore rate: 0.318759
Learning rate: 0.3187


Episode = 121
t = 46
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 70.641237
Explore rate: 0.315155
Learning rate: 0.315155
Streaks: 0


Episode = 121
t = 47
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 70.405346
Explore rate: 0.315155
Learning rate: 0.315155
Streaks: 0


Episode = 121
t = 48
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 70.405346
Explore rate: 0.315155
Learning rate: 0.315155
Streaks: 0


Episode = 121
t = 49
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 70.498615
Explore rate: 0.315155
Learning rate: 0.315155
Streaks: 0


Episode = 121
t = 50
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 70.282504
Explore rate: 0.315155
Learning rate: 0.315155
Streaks: 0


Episode = 121
t = 51
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 70.587836
Explore rate: 0.315155
Learning rate: 0.315155
Streaks: 0


Episode = 121
t = 52
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 70.471425
Explore rate: 0.315155
Learning rate:


Episode = 122
t = 8
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 58.746773
Explore rate: 0.311580
Learning rate: 0.311580
Streaks: 0


Episode = 122
t = 9
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 58.746773
Explore rate: 0.311580
Learning rate: 0.311580
Streaks: 0


Episode = 122
t = 10
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 58.875310
Explore rate: 0.311580
Learning rate: 0.311580
Streaks: 0


Episode = 122
t = 11
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 59.003446
Explore rate: 0.311580
Learning rate: 0.311580
Streaks: 0


Episode = 122
t = 12
Action: 0
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 24.799492
Explore rate: 0.311580
Learning rate: 0.311580
Streaks: 0


Episode = 122
t = 13
Action: 1
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 24.799492
Explore rate: 0.311580
Learning rate: 0.311580
Streaks: 0

Episode 122 finished after 13.000000 time steps

Episode = 123
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 71


Episode = 124
t = 21
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 71.466101
Explore rate: 0.304518
Learning rate: 0.304518
Streaks: 0


Episode = 124
t = 22
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 71.552992
Explore rate: 0.304518
Learning rate: 0.304518
Streaks: 0


Episode = 124
t = 23
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 71.639618
Explore rate: 0.304518
Learning rate: 0.304518
Streaks: 0


Episode = 124
t = 24
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 71.639618
Explore rate: 0.304518
Learning rate: 0.304518
Streaks: 0


Episode = 124
t = 25
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 71.359066
Explore rate: 0.304518
Learning rate: 0.304518
Streaks: 0


Episode = 124
t = 26
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 71.652258
Explore rate: 0.304518
Learning rate: 0.304518
Streaks: 0


Episode = 124
t = 27
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 71.652258
Explore rate: 0.304518
Learning rate:


Episode = 124
t = 81
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 72.006376
Explore rate: 0.304518
Learning rate: 0.304518
Streaks: 0


Episode = 124
t = 82
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 71.969095
Explore rate: 0.304518
Learning rate: 0.304518
Streaks: 0


Episode = 124
t = 83
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 71.969095
Explore rate: 0.304518
Learning rate: 0.304518
Streaks: 0


Episode = 124
t = 84
Action: 1
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 71.969095
Explore rate: 0.304518
Learning rate: 0.304518
Streaks: 0


Episode = 124
t = 85
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 71.753106
Explore rate: 0.304518
Learning rate: 0.304518
Streaks: 0


Episode = 124
t = 86
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 64.851298
Explore rate: 0.304518
Learning rate: 0.304518
Streaks: 0


Episode = 124
t = 87
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 71.753106
Explore rate: 0.304518
Learning rate:


Episode = 124
t = 141
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 71.584647
Explore rate: 0.304518
Learning rate: 0.304518
Streaks: 0


Episode = 124
t = 142
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 72.055474
Explore rate: 0.304518
Learning rate: 0.304518
Streaks: 0


Episode = 124
t = 143
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 71.813119
Explore rate: 0.304518
Learning rate: 0.304518
Streaks: 0


Episode = 124
t = 144
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 72.437073
Explore rate: 0.304518
Learning rate: 0.304518
Streaks: 0


Episode = 124
t = 145
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 72.437073
Explore rate: 0.304518
Learning rate: 0.304518
Streaks: 0


Episode = 124
t = 146
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 72.521007
Explore rate: 0.304518
Learning rate: 0.304518
Streaks: 0


Episode = 124
t = 147
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 68.622544
Explore rate: 0.304518
Learnin


Episode = 125
t = 15
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 72.565849
Explore rate: 0.301030
Learning rate: 0.301030
Streaks: 0


Episode = 125
t = 16
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 72.622411
Explore rate: 0.301030
Learning rate: 0.301030
Streaks: 0


Episode = 125
t = 17
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 72.471759
Explore rate: 0.301030
Learning rate: 0.301030
Streaks: 0


Episode = 125
t = 18
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 72.471759
Explore rate: 0.301030
Learning rate: 0.301030
Streaks: 0


Episode = 125
t = 19
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 72.554627
Explore rate: 0.301030
Learning rate: 0.301030
Streaks: 0


Episode = 125
t = 20
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 72.554627
Explore rate: 0.301030
Learning rate: 0.301030
Streaks: 0


Episode = 125
t = 21
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 72.637246
Explore rate: 0.301030
Learning rate:


Episode = 126
t = 41
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 72.870221
Explore rate: 0.297569
Learning rate: 0.297569
Streaks: 0


Episode = 126
t = 42
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 72.870221
Explore rate: 0.297569
Learning rate: 0.297569
Streaks: 0


Episode = 126
t = 43
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 72.950951
Explore rate: 0.297569
Learning rate: 0.297569
Streaks: 0


Episode = 126
t = 44
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 67.362398
Explore rate: 0.297569
Learning rate: 0.297569
Streaks: 0


Episode = 126
t = 45
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 72.859152
Explore rate: 0.297569
Learning rate: 0.297569
Streaks: 0


Episode = 126
t = 46
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 69.078827
Explore rate: 0.297569
Learning rate: 0.297569
Streaks: 0


Episode = 126
t = 47
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 72.859152
Explore rate: 0.297569
Learning rate:


Episode = 127
t = 22
Action: 0
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 25.246373
Explore rate: 0.294136
Learning rate: 0.294136
Streaks: 0


Episode = 127
t = 23
Action: 1
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 25.466251
Explore rate: 0.294136
Learning rate: 0.294136
Streaks: 0


Episode = 127
t = 24
Action: 0
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 25.466251
Explore rate: 0.294136
Learning rate: 0.294136
Streaks: 0

Episode 127 finished after 24.000000 time steps

Episode = 128
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 72.936457
Explore rate: 0.290730
Learning rate: 0.290730
Streaks: 0


Episode = 128
t = 1
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 72.724380
Explore rate: 0.290730
Learning rate: 0.290730
Streaks: 0


Episode = 128
t = 2
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 73.009905
Explore rate: 0.290730
Learning rate: 0.290730
Streaks: 0


Episode = 128
t = 3
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 72.


Episode = 128
t = 57
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 72.483054
Explore rate: 0.290730
Learning rate: 0.290730
Streaks: 0


Episode = 128
t = 58
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 73.854346
Explore rate: 0.290730
Learning rate: 0.290730
Streaks: 0


Episode = 128
t = 59
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 73.854346
Explore rate: 0.290730
Learning rate: 0.290730
Streaks: 0


Episode = 128
t = 60
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 73.930359
Explore rate: 0.290730
Learning rate: 0.290730
Streaks: 0


Episode = 128
t = 61
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 73.635686
Explore rate: 0.290730
Learning rate: 0.290730
Streaks: 0


Episode = 128
t = 62
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 73.975095
Explore rate: 0.290730
Learning rate: 0.290730
Streaks: 0


Episode = 128
t = 63
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 73.975095
Explore rate: 0.290730
Learning rate:


Episode = 128
t = 118
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 73.937726
Explore rate: 0.290730
Learning rate: 0.290730
Streaks: 0


Episode = 128
t = 119
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 74.145090
Explore rate: 0.290730
Learning rate: 0.290730
Streaks: 0


Episode = 128
t = 120
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 74.145090
Explore rate: 0.290730
Learning rate: 0.290730
Streaks: 0


Episode = 128
t = 121
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 74.073181
Explore rate: 0.290730
Learning rate: 0.290730
Streaks: 0


Episode = 128
t = 122
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 74.252876
Explore rate: 0.290730
Learning rate: 0.290730
Streaks: 0


Episode = 128
t = 123
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 74.252876
Explore rate: 0.290730
Learning rate: 0.290730
Streaks: 0


Episode = 128
t = 124
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 74.252876
Explore rate: 0.290730
Learnin


Episode = 131
t = 7
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 74.573198
Explore rate: 0.280669
Learning rate: 0.280669
Streaks: 0


Episode = 131
t = 8
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 74.680106
Explore rate: 0.280669
Learning rate: 0.280669
Streaks: 0


Episode = 131
t = 9
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 73.564362
Explore rate: 0.280669
Learning rate: 0.280669
Streaks: 0


Episode = 131
t = 10
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 74.680106
Explore rate: 0.280669
Learning rate: 0.280669
Streaks: 0


Episode = 131
t = 11
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 74.680106
Explore rate: 0.280669
Learning rate: 0.280669
Streaks: 0


Episode = 131
t = 12
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 74.751171
Explore rate: 0.280669
Learning rate: 0.280669
Streaks: 0


Episode = 131
t = 13
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 74.674269
Explore rate: 0.280669
Learning rate: 0.


Episode = 132
t = 36
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 74.690618
Explore rate: 0.277366
Learning rate: 0.277366
Streaks: 0


Episode = 132
t = 37
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 72.954940
Explore rate: 0.277366
Learning rate: 0.277366
Streaks: 0


Episode = 132
t = 38
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 74.284213
Explore rate: 0.277366
Learning rate: 0.277366
Streaks: 0


Episode = 132
t = 39
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 74.712083
Explore rate: 0.277366
Learning rate: 0.277366
Streaks: 0


Episode = 132
t = 40
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 74.712083
Explore rate: 0.277366
Learning rate: 0.277366
Streaks: 0


Episode = 132
t = 41
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 74.782223
Explore rate: 0.277366
Learning rate: 0.277366
Streaks: 0


Episode = 132
t = 42
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 74.579010
Explore rate: 0.277366
Learning rate:


Episode = 132
t = 96
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 74.087579
Explore rate: 0.277366
Learning rate: 0.277366
Streaks: 0


Episode = 132
t = 97
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 74.202270
Explore rate: 0.277366
Learning rate: 0.277366
Streaks: 0


Episode = 132
t = 98
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 74.821017
Explore rate: 0.277366
Learning rate: 0.277366
Streaks: 0


Episode = 132
t = 99
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 74.821017
Explore rate: 0.277366
Learning rate: 0.277366
Streaks: 0


Episode = 132
t = 100
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 74.890855
Explore rate: 0.277366
Learning rate: 0.277366
Streaks: 0


Episode = 132
t = 101
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 74.120807
Explore rate: 0.277366
Learning rate: 0.277366
Streaks: 0


Episode = 132
t = 102
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 74.190945
Explore rate: 0.277366
Learning ra


Episode = 132
t = 156
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 75.088565
Explore rate: 0.277366
Learning rate: 0.277366
Streaks: 0


Episode = 132
t = 157
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 75.075459
Explore rate: 0.277366
Learning rate: 0.277366
Streaks: 0


Episode = 132
t = 158
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 74.206655
Explore rate: 0.277366
Learning rate: 0.277366
Streaks: 0


Episode = 132
t = 159
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 74.999441
Explore rate: 0.277366
Learning rate: 0.277366
Streaks: 0


Episode = 132
t = 160
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 74.999441
Explore rate: 0.277366
Learning rate: 0.277366
Streaks: 0


Episode = 132
t = 161
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 75.068784
Explore rate: 0.277366
Learning rate: 0.277366
Streaks: 0


Episode = 132
t = 162
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 75.068784
Explore rate: 0.277366
Learnin


Episode = 133
t = 16
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 75.756880
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 1


Episode = 133
t = 17
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 75.823327
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 1


Episode = 133
t = 18
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 75.889593
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 1


Episode = 133
t = 19
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 75.427288
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 1


Episode = 133
t = 20
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 75.878203
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 1


Episode = 133
t = 21
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 75.616994
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 1


Episode = 133
t = 22
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 75.873439
Explore rate: 0.274088
Learning rate:


Episode = 133
t = 76
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 75.808516
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 1


Episode = 133
t = 77
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 75.808516
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 1


Episode = 133
t = 78
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 75.874823
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 1


Episode = 133
t = 79
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 75.917415
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 1


Episode = 133
t = 80
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 76.000505
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 1


Episode = 133
t = 81
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 76.005969
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 1


Episode = 133
t = 82
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 76.067767
Explore rate: 0.274088
Learning rate:


Episode = 133
t = 136
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 76.739093
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 1


Episode = 133
t = 137
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 76.577639
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 1


Episode = 133
t = 138
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 76.759038
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 1


Episode = 133
t = 139
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 76.055205
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 1


Episode = 133
t = 140
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 76.637450
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 1


Episode = 133
t = 141
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 76.637450
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 1


Episode = 133
t = 142
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 76.701484
Explore rate: 0.274088
Learnin


Episode = 133
t = 196
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 77.241843
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 1


Episode = 133
t = 197
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 77.154799
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 1


Episode = 133
t = 198
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 77.048550
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 1


Episode = 133
t = 199
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 77.048550
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 1

Episode 133 finished after 199.000000 time steps

Episode = 134
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 77.325882
Explore rate: 0.270835
Learning rate: 0.270835
Streaks: 2


Episode = 134
t = 1
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 77.188584
Explore rate: 0.270835
Learning rate: 0.270835
Streaks: 2


Episode = 134
t = 2
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best 


Episode = 134
t = 56
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 77.326031
Explore rate: 0.270835
Learning rate: 0.270835
Streaks: 2


Episode = 134
t = 57
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 77.326031
Explore rate: 0.270835
Learning rate: 0.270835
Streaks: 2


Episode = 134
t = 58
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 77.387440
Explore rate: 0.270835
Learning rate: 0.270835
Streaks: 2


Episode = 134
t = 59
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 77.387440
Explore rate: 0.270835
Learning rate: 0.270835
Streaks: 2


Episode = 134
t = 60
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 77.448683
Explore rate: 0.270835
Learning rate: 0.270835
Streaks: 2


Episode = 134
t = 61
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 77.080631
Explore rate: 0.270835
Learning rate: 0.270835
Streaks: 2


Episode = 134
t = 62
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 77.455610
Explore rate: 0.270835
Learning rate:


Episode = 134
t = 116
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 78.052657
Explore rate: 0.270835
Learning rate: 0.270835
Streaks: 2


Episode = 134
t = 117
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 77.606428
Explore rate: 0.270835
Learning rate: 0.270835
Streaks: 2


Episode = 134
t = 118
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 77.606428
Explore rate: 0.270835
Learning rate: 0.270835
Streaks: 2


Episode = 134
t = 119
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 77.606428
Explore rate: 0.270835
Learning rate: 0.270835
Streaks: 2


Episode = 134
t = 120
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 77.667077
Explore rate: 0.270835
Learning rate: 0.270835
Streaks: 2


Episode = 134
t = 121
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 78.172973
Explore rate: 0.270835
Learning rate: 0.270835
Streaks: 2


Episode = 134
t = 122
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 78.172973
Explore rate: 0.270835
Learnin


Episode = 134
t = 176
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 78.954296
Explore rate: 0.270835
Learning rate: 0.270835
Streaks: 2


Episode = 134
t = 177
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 78.954296
Explore rate: 0.270835
Learning rate: 0.270835
Streaks: 2


Episode = 134
t = 178
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 78.898399
Explore rate: 0.270835
Learning rate: 0.270835
Streaks: 2


Episode = 134
t = 179
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 79.037870
Explore rate: 0.270835
Learning rate: 0.270835
Streaks: 2


Episode = 134
t = 180
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 78.992946
Explore rate: 0.270835
Learning rate: 0.270835
Streaks: 2


Episode = 134
t = 181
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 79.082597
Explore rate: 0.270835
Learning rate: 0.270835
Streaks: 2


Episode = 134
t = 182
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 79.073878
Explore rate: 0.270835
Learnin


Episode = 135
t = 36
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 79.241587
Explore rate: 0.267606
Learning rate: 0.267606
Streaks: 3


Episode = 135
t = 37
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 79.217946
Explore rate: 0.267606
Learning rate: 0.267606
Streaks: 3


Episode = 135
t = 38
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 79.217946
Explore rate: 0.267606
Learning rate: 0.267606
Streaks: 3


Episode = 135
t = 39
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 79.290875
Explore rate: 0.267606
Learning rate: 0.267606
Streaks: 3


Episode = 135
t = 40
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 79.333612
Explore rate: 0.267606
Learning rate: 0.267606
Streaks: 3


Episode = 135
t = 41
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 79.357616
Explore rate: 0.267606
Learning rate: 0.267606
Streaks: 3


Episode = 135
t = 42
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 79.395276
Explore rate: 0.267606
Learning rate:

Episode = 137
t = 29
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 79.457854
Explore rate: 0.261219
Learning rate: 0.261219
Streaks: 0


Episode = 137
t = 30
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 79.457854
Explore rate: 0.261219
Learning rate: 0.261219
Streaks: 0


Episode = 137
t = 31
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 79.511515
Explore rate: 0.261219
Learning rate: 0.261219
Streaks: 0


Episode = 137
t = 32
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 79.304822
Explore rate: 0.261219
Learning rate: 0.261219
Streaks: 0


Episode = 137
t = 33
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 79.396223
Explore rate: 0.261219
Learning rate: 0.261219
Streaks: 0


Episode = 137
t = 34
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 79.396223
Explore rate: 0.261219
Learning rate: 0.261219
Streaks: 0


Episode = 137
t = 35
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 79.450045
Explore rate: 0.261219
Learning rate: 


Episode = 139
t = 8
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 78.822984
Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 0


Episode = 139
t = 9
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 71.820461
Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 0


Episode = 139
t = 10
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 78.822984
Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 0


Episode = 139
t = 11
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 78.822984
Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 0


Episode = 139
t = 12
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 78.876970
Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 0


Episode = 139
t = 13
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 79.197895
Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 0


Episode = 139
t = 14
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 79.051932
Explore rate: 0.254925
Learning rate: 0


Episode = 139
t = 68
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 79.429756
Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 0


Episode = 139
t = 69
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 79.551383
Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 0


Episode = 139
t = 70
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 79.512890
Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 0


Episode = 139
t = 71
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 79.593797
Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 0


Episode = 139
t = 72
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 79.585536
Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 0


Episode = 139
t = 73
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 77.665068
Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 0


Episode = 139
t = 74
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 76.559328
Explore rate: 0.254925
Learning rate:


Episode = 139
t = 128
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 77.307778
Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 0


Episode = 139
t = 129
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 77.307778
Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 0


Episode = 139
t = 130
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 78.995870
Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 0


Episode = 139
t = 131
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 77.834761
Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 0


Episode = 139
t = 132
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 78.995870
Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 0


Episode = 139
t = 133
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 78.995870
Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 0


Episode = 139
t = 134
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 79.049415
Explore rate: 0.254925
Learnin


Episode = 139
t = 189
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 79.953900
Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 0


Episode = 139
t = 190
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 79.953900
Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 0


Episode = 139
t = 191
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 80.005002
Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 0


Episode = 139
t = 192
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 79.618655
Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 0


Episode = 139
t = 193
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 79.271222
Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 0


Episode = 139
t = 194
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 79.271222
Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 0


Episode = 139
t = 195
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 79.324064
Explore rate: 0.254925
Learnin


Episode = 140
t = 50
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 79.996840
Explore rate: 0.251812
Learning rate: 0.251812
Streaks: 1


Episode = 140
t = 51
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 80.047211
Explore rate: 0.251812
Learning rate: 0.251812
Streaks: 1


Episode = 140
t = 52
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 79.274779
Explore rate: 0.251812
Learning rate: 0.251812
Streaks: 1


Episode = 140
t = 53
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 79.970133
Explore rate: 0.251812
Learning rate: 0.251812
Streaks: 1


Episode = 140
t = 54
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 79.970133
Explore rate: 0.251812
Learning rate: 0.251812
Streaks: 1


Episode = 140
t = 55
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 80.020570
Explore rate: 0.251812
Learning rate: 0.251812
Streaks: 1


Episode = 140
t = 56
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 79.812635
Explore rate: 0.251812
Learning rate:


Episode = 140
t = 111
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 80.242798
Explore rate: 0.251812
Learning rate: 0.251812
Streaks: 1


Episode = 140
t = 112
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 80.545847
Explore rate: 0.251812
Learning rate: 0.251812
Streaks: 1


Episode = 140
t = 113
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 80.545847
Explore rate: 0.251812
Learning rate: 0.251812
Streaks: 1


Episode = 140
t = 114
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 80.545847
Explore rate: 0.251812
Learning rate: 0.251812
Streaks: 1


Episode = 140
t = 115
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 80.368097
Explore rate: 0.251812
Learning rate: 0.251812
Streaks: 1


Episode = 140
t = 116
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 80.587175
Explore rate: 0.251812
Learning rate: 0.251812
Streaks: 1


Episode = 140
t = 117
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 80.472147
Explore rate: 0.251812
Learnin


Episode = 140
t = 172
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 80.991883
Explore rate: 0.251812
Learning rate: 0.251812
Streaks: 1


Episode = 140
t = 173
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 80.774171
Explore rate: 0.251812
Learning rate: 0.251812
Streaks: 1


Episode = 140
t = 174
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 80.991883
Explore rate: 0.251812
Learning rate: 0.251812
Streaks: 1


Episode = 140
t = 175
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 80.991883
Explore rate: 0.251812
Learning rate: 0.251812
Streaks: 1


Episode = 140
t = 176
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 81.039748
Explore rate: 0.251812
Learning rate: 0.251812
Streaks: 1


Episode = 140
t = 177
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 80.614972
Explore rate: 0.251812
Learning rate: 0.251812
Streaks: 1


Episode = 140
t = 178
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 81.017319
Explore rate: 0.251812
Learnin


Episode = 142
t = 7
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 72.640349
Explore rate: 0.245652
Learning rate: 0.245652
Streaks: 0


Episode = 142
t = 8
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 72.707559
Explore rate: 0.245652
Learning rate: 0.245652
Streaks: 0


Episode = 142
t = 9
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 72.774603
Explore rate: 0.245652
Learning rate: 0.245652
Streaks: 0


Episode = 142
t = 10
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 72.841483
Explore rate: 0.245652
Learning rate: 0.245652
Streaks: 0


Episode = 142
t = 11
Action: 1
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 40.729373
Explore rate: 0.245652
Learning rate: 0.245652
Streaks: 0


Episode = 142
t = 12
Action: 1
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 40.729373
Explore rate: 0.245652
Learning rate: 0.245652
Streaks: 0

Episode 142 finished after 12.000000 time steps

Episode = 143
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 81.


Episode = 143
t = 55
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 81.454360
Explore rate: 0.242604
Learning rate: 0.242604
Streaks: 0


Episode = 143
t = 56
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 81.454360
Explore rate: 0.242604
Learning rate: 0.242604
Streaks: 0


Episode = 143
t = 57
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 81.499352
Explore rate: 0.242604
Learning rate: 0.242604
Streaks: 0


Episode = 143
t = 58
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 81.283411
Explore rate: 0.242604
Learning rate: 0.242604
Streaks: 0


Episode = 143
t = 59
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 81.526366
Explore rate: 0.242604
Learning rate: 0.242604
Streaks: 0


Episode = 143
t = 60
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 81.526366
Explore rate: 0.242604
Learning rate: 0.242604
Streaks: 0


Episode = 143
t = 61
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 81.387171
Explore rate: 0.242604
Learning rate:


Episode = 145
t = 18
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 79.941973
Explore rate: 0.236572
Learning rate: 0.236572
Streaks: 0


Episode = 145
t = 19
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 79.795575
Explore rate: 0.236572
Learning rate: 0.236572
Streaks: 0


Episode = 145
t = 20
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 79.955138
Explore rate: 0.236572
Learning rate: 0.236572
Streaks: 0


Episode = 145
t = 21
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 79.880744
Explore rate: 0.236572
Learning rate: 0.236572
Streaks: 0


Episode = 145
t = 22
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 79.880744
Explore rate: 0.236572
Learning rate: 0.236572
Streaks: 0


Episode = 145
t = 23
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 79.985135
Explore rate: 0.236572
Learning rate: 0.236572
Streaks: 0


Episode = 145
t = 24
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 79.989126
Explore rate: 0.236572
Learning rate:


Episode = 147
t = 24
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 74.012010
Explore rate: 0.230623
Learning rate: 0.230623
Streaks: 0


Episode = 147
t = 25
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 79.998831
Explore rate: 0.230623
Learning rate: 0.230623
Streaks: 0


Episode = 147
t = 26
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 75.484945
Explore rate: 0.230623
Learning rate: 0.230623
Streaks: 0


Episode = 147
t = 27
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 79.123226
Explore rate: 0.230623
Learning rate: 0.230623
Streaks: 0


Episode = 147
t = 28
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 79.123226
Explore rate: 0.230623
Learning rate: 0.230623
Streaks: 0


Episode = 147
t = 29
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 79.171372
Explore rate: 0.230623
Learning rate: 0.230623
Streaks: 0


Episode = 147
t = 30
Action: 1
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 79.219408
Explore rate: 0.230623
Learning rate:

Episode = 147
t = 84
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 79.008327
Explore rate: 0.230623
Learning rate: 0.230623
Streaks: 0


Episode = 147
t = 85
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 78.831918
Explore rate: 0.230623
Learning rate: 0.230623
Streaks: 0


Episode = 147
t = 86
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 78.831918
Explore rate: 0.230623
Learning rate: 0.230623
Streaks: 0


Episode = 147
t = 87
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 78.831918
Explore rate: 0.230623
Learning rate: 0.230623
Streaks: 0


Episode = 147
t = 88
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 78.831918
Explore rate: 0.230623
Learning rate: 0.230623
Streaks: 0


Episode = 147
t = 89
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 78.880737
Explore rate: 0.230623
Learning rate: 0.230623
Streaks: 0


Episode = 147
t = 90
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 79.016462
Explore rate: 0.230623
Learning rate: 


Episode = 149
t = 9
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 74.767356
Explore rate: 0.224754
Learning rate: 0.224754
Streaks: 0


Episode = 149
t = 10
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 74.824068
Explore rate: 0.224754
Learning rate: 0.224754
Streaks: 0


Episode = 149
t = 11
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 74.880651
Explore rate: 0.224754
Learning rate: 0.224754
Streaks: 0


Episode = 149
t = 12
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 74.937108
Explore rate: 0.224754
Learning rate: 0.224754
Streaks: 0


Episode = 149
t = 13
Action: 1
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 41.856578
Explore rate: 0.224754
Learning rate: 0.224754
Streaks: 0


Episode = 149
t = 14
Action: 1
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 41.856578
Explore rate: 0.224754
Learning rate: 0.224754
Streaks: 0

Episode 149 finished after 14.000000 time steps

Episode = 150
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 7


Episode = 152
t = 7
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 79.165513
Explore rate: 0.216096
Learning rate: 0.216096
Streaks: 0


Episode = 152
t = 8
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 67.676484
Explore rate: 0.216096
Learning rate: 0.216096
Streaks: 0


Episode = 152
t = 9
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 67.676484
Explore rate: 0.216096
Learning rate: 0.216096
Streaks: 0


Episode = 152
t = 10
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 67.746334
Explore rate: 0.216096
Learning rate: 0.216096
Streaks: 0


Episode = 152
t = 11
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 67.816033
Explore rate: 0.216096
Learning rate: 0.216096
Streaks: 0


Episode = 152
t = 12
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 67.885581
Explore rate: 0.216096
Learning rate: 0.216096
Streaks: 0


Episode = 152
t = 13
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 67.954980
Explore rate: 0.216096
Learning rate: 0.


Episode = 153
t = 51
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 77.583372
Explore rate: 0.213249
Learning rate: 0.213249
Streaks: 0


Episode = 153
t = 52
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 76.548939
Explore rate: 0.213249
Learning rate: 0.213249
Streaks: 0


Episode = 153
t = 53
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 76.548939
Explore rate: 0.213249
Learning rate: 0.213249
Streaks: 0


Episode = 153
t = 54
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 76.548939
Explore rate: 0.213249
Learning rate: 0.213249
Streaks: 0


Episode = 153
t = 55
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 77.412789
Explore rate: 0.213249
Learning rate: 0.213249
Streaks: 0


Episode = 153
t = 56
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 76.820665
Explore rate: 0.213249
Learning rate: 0.213249
Streaks: 0


Episode = 153
t = 57
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 77.335949
Explore rate: 0.213249
Learning rate:

t = 10
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 77.379327
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 0


Episode = 154
t = 11
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 77.039038
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 0


Episode = 154
t = 12
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 76.821017
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 0


Episode = 154
t = 13
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 76.821017
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 0


Episode = 154
t = 14
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 76.869790
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 0


Episode = 154
t = 15
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 77.025246
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 0


Episode = 154
t = 16
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 77.393621
Explore rate: 0.210419
Learning rate: 0.210419
Strea


Episode = 154
t = 71
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 77.530995
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 0


Episode = 154
t = 72
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 77.578274
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 0


Episode = 154
t = 73
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 77.135254
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 0


Episode = 154
t = 74
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 77.199802
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 0


Episode = 154
t = 75
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 77.199802
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 0


Episode = 154
t = 76
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 77.247778
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 0


Episode = 154
t = 77
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 77.049493
Explore rate: 0.210419
Learning rate:

t = 131
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 77.598060
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 0


Episode = 154
t = 132
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 77.645198
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 0


Episode = 154
t = 133
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 77.436795
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 0


Episode = 154
t = 134
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 77.685964
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 0


Episode = 154
t = 135
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 77.536178
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 0


Episode = 154
t = 136
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 77.701714
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 0


Episode = 154
t = 137
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 77.617930
Explore rate: 0.210419
Learning rate: 0.21041


Episode = 154
t = 192
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 77.040593
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 0


Episode = 154
t = 193
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 77.040593
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 0


Episode = 154
t = 194
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 77.088904
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 0


Episode = 154
t = 195
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 77.088904
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 0


Episode = 154
t = 196
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 77.137113
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 0


Episode = 154
t = 197
Action: 1
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 27.473308
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 0


Episode = 154
t = 198
Action: 0
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 27.473308
Explore rate: 0.210419
Learnin


Episode = 156
t = 36
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 78.268220
Explore rate: 0.204815
Learning rate: 0.204815
Streaks: 0


Episode = 156
t = 37
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 78.268220
Explore rate: 0.204815
Learning rate: 0.204815
Streaks: 0


Episode = 156
t = 38
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 78.312730
Explore rate: 0.204815
Learning rate: 0.204815
Streaks: 0


Episode = 156
t = 39
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 77.665617
Explore rate: 0.204815
Learning rate: 0.204815
Streaks: 0


Episode = 156
t = 40
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 78.249045
Explore rate: 0.204815
Learning rate: 0.204815
Streaks: 0


Episode = 156
t = 41
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 78.249045
Explore rate: 0.204815
Learning rate: 0.204815
Streaks: 0


Episode = 156
t = 42
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 78.293594
Explore rate: 0.204815
Learning rate:


Episode = 156
t = 96
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 77.784178
Explore rate: 0.204815
Learning rate: 0.204815
Streaks: 0


Episode = 156
t = 97
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 77.829679
Explore rate: 0.204815
Learning rate: 0.204815
Streaks: 0


Episode = 156
t = 98
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 77.640405
Explore rate: 0.204815
Learning rate: 0.204815
Streaks: 0


Episode = 156
t = 99
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 77.872817
Explore rate: 0.204815
Learning rate: 0.204815
Streaks: 0


Episode = 156
t = 100
Action: 0
State: (0, 0, 1, 1)
Reward: 1.000000
Best Q: 41.565437
Explore rate: 0.204815
Learning rate: 0.204815
Streaks: 0


Episode = 156
t = 101
Action: 0
State: (0, 0, 1, 1)
Reward: 1.000000
Best Q: 41.565437
Explore rate: 0.204815
Learning rate: 0.204815
Streaks: 0


Episode = 156
t = 102
Action: 1
State: (0, 0, 1, 1)
Reward: 1.000000
Best Q: 41.565437
Explore rate: 0.204815
Learning ra


Episode = 158
t = 40
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 78.521315
Explore rate: 0.199283
Learning rate: 0.199283
Streaks: 0


Episode = 158
t = 41
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 76.615815
Explore rate: 0.199283
Learning rate: 0.199283
Streaks: 0


Episode = 158
t = 42
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 76.615815
Explore rate: 0.199283
Learning rate: 0.199283
Streaks: 0


Episode = 158
t = 43
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 76.662415
Explore rate: 0.199283
Learning rate: 0.199283
Streaks: 0


Episode = 158
t = 44
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 78.012484
Explore rate: 0.199283
Learning rate: 0.199283
Streaks: 0


Episode = 158
t = 45
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 78.148613
Explore rate: 0.199283
Learning rate: 0.199283
Streaks: 0


Episode = 158
t = 46
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 78.148613
Explore rate: 0.199283
Learning rate:


Episode = 158
t = 101
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 78.229887
Explore rate: 0.199283
Learning rate: 0.199283
Streaks: 0


Episode = 158
t = 102
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 78.125575
Explore rate: 0.199283
Learning rate: 0.199283
Streaks: 0


Episode = 158
t = 103
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 78.287430
Explore rate: 0.199283
Learning rate: 0.199283
Streaks: 0


Episode = 158
t = 104
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 78.201099
Explore rate: 0.199283
Learning rate: 0.199283
Streaks: 0


Episode = 158
t = 105
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 77.320674
Explore rate: 0.199283
Learning rate: 0.199283
Streaks: 0


Episode = 158
t = 106
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 77.320674
Explore rate: 0.199283
Learning rate: 0.199283
Streaks: 0


Episode = 158
t = 107
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 77.365870
Explore rate: 0.199283
Learnin


Episode = 158
t = 161
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 77.734837
Explore rate: 0.199283
Learning rate: 0.199283
Streaks: 0


Episode = 158
t = 162
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 78.118727
Explore rate: 0.199283
Learning rate: 0.199283
Streaks: 0


Episode = 158
t = 163
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 78.379370
Explore rate: 0.199283
Learning rate: 0.199283
Streaks: 0


Episode = 158
t = 164
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 78.379370
Explore rate: 0.199283
Learning rate: 0.199283
Streaks: 0


Episode = 158
t = 165
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 78.422456
Explore rate: 0.199283
Learning rate: 0.199283
Streaks: 0


Episode = 158
t = 166
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 78.267534
Explore rate: 0.199283
Learning rate: 0.199283
Streaks: 0


Episode = 158
t = 167
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 75.771560
Explore rate: 0.199283
Learnin


Episode = 159
t = 21
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 78.253797
Explore rate: 0.196543
Learning rate: 0.196543
Streaks: 1


Episode = 159
t = 22
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 78.296538
Explore rate: 0.196543
Learning rate: 0.196543
Streaks: 1


Episode = 159
t = 23
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 78.296538
Explore rate: 0.196543
Learning rate: 0.196543
Streaks: 1


Episode = 159
t = 24
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 78.339194
Explore rate: 0.196543
Learning rate: 0.196543
Streaks: 1


Episode = 159
t = 25
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 78.125685
Explore rate: 0.196543
Learning rate: 0.196543
Streaks: 1


Episode = 159
t = 26
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 78.784935
Explore rate: 0.196543
Learning rate: 0.196543
Streaks: 1


Episode = 159
t = 27
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 78.784935
Explore rate: 0.196543
Learning rate:


Episode = 159
t = 82
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 78.613167
Explore rate: 0.196543
Learning rate: 0.196543
Streaks: 1


Episode = 159
t = 83
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 78.734788
Explore rate: 0.196543
Learning rate: 0.196543
Streaks: 1


Episode = 159
t = 84
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 78.311757
Explore rate: 0.196543
Learning rate: 0.196543
Streaks: 1


Episode = 159
t = 85
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 78.646698
Explore rate: 0.196543
Learning rate: 0.196543
Streaks: 1


Episode = 159
t = 86
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 78.646698
Explore rate: 0.196543
Learning rate: 0.196543
Streaks: 1


Episode = 159
t = 87
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 78.688666
Explore rate: 0.196543
Learning rate: 0.196543
Streaks: 1


Episode = 159
t = 88
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 78.918465
Explore rate: 0.196543
Learning rate:


Episode = 159
t = 142
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 79.079695
Explore rate: 0.196543
Learning rate: 0.196543
Streaks: 1


Episode = 159
t = 143
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 78.680725
Explore rate: 0.196543
Learning rate: 0.196543
Streaks: 1


Episode = 159
t = 144
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 79.079695
Explore rate: 0.196543
Learning rate: 0.196543
Streaks: 1


Episode = 159
t = 145
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 79.079695
Explore rate: 0.196543
Learning rate: 0.196543
Streaks: 1


Episode = 159
t = 146
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 79.120813
Explore rate: 0.196543
Learning rate: 0.196543
Streaks: 1


Episode = 159
t = 147
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 79.161849
Explore rate: 0.196543
Learning rate: 0.196543
Streaks: 1


Episode = 159
t = 148
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 79.088503
Explore rate: 0.196543
Learnin


Episode = 160
t = 2
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 78.387373
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 2


Episode = 160
t = 3
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 78.387373
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 2


Episode = 160
t = 4
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 79.459612
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 2


Episode = 160
t = 5
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 78.668777
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 2


Episode = 160
t = 6
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 79.459612
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 2


Episode = 160
t = 7
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 79.459612
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 2


Episode = 160
t = 8
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 79.499423
Explore rate: 0.193820
Learning rate: 0.1938


Episode = 160
t = 62
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 79.748876
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 2


Episode = 160
t = 63
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 79.788126
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 2


Episode = 160
t = 64
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 79.262166
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 2


Episode = 160
t = 65
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 79.799952
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 2


Episode = 160
t = 66
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 79.799952
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 2


Episode = 160
t = 67
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 79.839104
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 2


Episode = 160
t = 68
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 79.761206
Explore rate: 0.193820
Learning rate:


Episode = 160
t = 123
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 80.218116
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 2


Episode = 160
t = 124
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 80.380757
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 2


Episode = 160
t = 125
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 80.287665
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 2


Episode = 160
t = 126
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 80.068366
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 2


Episode = 160
t = 127
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 80.068366
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 2


Episode = 160
t = 128
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 80.106998
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 2


Episode = 160
t = 129
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 79.926684
Explore rate: 0.193820
Learnin


Episode = 160
t = 184
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 80.502140
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 2


Episode = 160
t = 185
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 80.539931
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 2


Episode = 160
t = 186
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 80.512778
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 2


Episode = 160
t = 187
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 80.512778
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 2


Episode = 160
t = 188
Action: 0
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 80.512778
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 2


Episode = 160
t = 189
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 80.602845
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 2


Episode = 160
t = 190
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 79.842691
Explore rate: 0.193820
Learnin

t = 45
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 80.886863
Explore rate: 0.191114
Learning rate: 0.191114
Streaks: 3


Episode = 161
t = 46
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 80.754881
Explore rate: 0.191114
Learning rate: 0.191114
Streaks: 3


Episode = 161
t = 47
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 80.898420
Explore rate: 0.191114
Learning rate: 0.191114
Streaks: 3


Episode = 161
t = 48
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 80.818819
Explore rate: 0.191114
Learning rate: 0.191114
Streaks: 3


Episode = 161
t = 49
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 80.818819
Explore rate: 0.191114
Learning rate: 0.191114
Streaks: 3


Episode = 161
t = 50
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 80.441492
Explore rate: 0.191114
Learning rate: 0.191114
Streaks: 3


Episode = 161
t = 51
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 80.776576
Explore rate: 0.191114
Learning rate: 0.191114
Strea

State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 81.077492
Explore rate: 0.191114
Learning rate: 0.191114
Streaks: 3


Episode = 161
t = 107
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 81.158765
Explore rate: 0.191114
Learning rate: 0.191114
Streaks: 3


Episode = 161
t = 108
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 81.158765
Explore rate: 0.191114
Learning rate: 0.191114
Streaks: 3


Episode = 161
t = 109
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 81.194773
Explore rate: 0.191114
Learning rate: 0.191114
Streaks: 3


Episode = 161
t = 110
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 80.770871
Explore rate: 0.191114
Learning rate: 0.191114
Streaks: 3


Episode = 161
t = 111
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 81.288220
Explore rate: 0.191114
Learning rate: 0.191114
Streaks: 3


Episode = 161
t = 112
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 81.288220
Explore rate: 0.191114
Learning rate: 0.191114
Streaks: 3


Epi


Episode = 161
t = 168
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 81.431880
Explore rate: 0.191114
Learning rate: 0.191114
Streaks: 3


Episode = 161
t = 169
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 81.431880
Explore rate: 0.191114
Learning rate: 0.191114
Streaks: 3


Episode = 161
t = 170
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 81.467367
Explore rate: 0.191114
Learning rate: 0.191114
Streaks: 3


Episode = 161
t = 171
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 81.275313
Explore rate: 0.191114
Learning rate: 0.191114
Streaks: 3


Episode = 161
t = 172
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 81.495098
Explore rate: 0.191114
Learning rate: 0.191114
Streaks: 3


Episode = 161
t = 173
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 81.669414
Explore rate: 0.191114
Learning rate: 0.191114
Streaks: 3


Episode = 161
t = 174
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 81.669414
Explore rate: 0.191114
Learnin

t = 28
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 81.521337
Explore rate: 0.188425
Learning rate: 0.188425
Streaks: 4


Episode = 162
t = 29
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 81.521337
Explore rate: 0.188425
Learning rate: 0.188425
Streaks: 4


Episode = 162
t = 30
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 81.904561
Explore rate: 0.188425
Learning rate: 0.188425
Streaks: 4


Episode = 162
t = 31
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 81.904561
Explore rate: 0.188425
Learning rate: 0.188425
Streaks: 4


Episode = 162
t = 32
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 81.938658
Explore rate: 0.188425
Learning rate: 0.188425
Streaks: 4


Episode = 162
t = 33
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 81.744062
Explore rate: 0.188425
Learning rate: 0.188425
Streaks: 4


Episode = 162
t = 34
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 81.964009
Explore rate: 0.188425
Learning rate: 0.188425
Strea


Episode = 162
t = 89
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 82.137730
Explore rate: 0.188425
Learning rate: 0.188425
Streaks: 4


Episode = 162
t = 90
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 82.057663
Explore rate: 0.188425
Learning rate: 0.188425
Streaks: 4


Episode = 162
t = 91
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 82.183766
Explore rate: 0.188425
Learning rate: 0.188425
Streaks: 4


Episode = 162
t = 92
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 82.114994
Explore rate: 0.188425
Learning rate: 0.188425
Streaks: 4


Episode = 162
t = 93
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 82.204508
Explore rate: 0.188425
Learning rate: 0.188425
Streaks: 4


Episode = 162
t = 94
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 82.204508
Explore rate: 0.188425
Learning rate: 0.188425
Streaks: 4


Episode = 162
t = 95
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 82.165392
Explore rate: 0.188425
Learning rate:


Episode = 162
t = 150
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 82.514850
Explore rate: 0.188425
Learning rate: 0.188425
Streaks: 4


Episode = 162
t = 151
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 82.514850
Explore rate: 0.188425
Learning rate: 0.188425
Streaks: 4


Episode = 162
t = 152
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 82.547797
Explore rate: 0.188425
Learning rate: 0.188425
Streaks: 4


Episode = 162
t = 153
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 82.388408
Explore rate: 0.188425
Learning rate: 0.188425
Streaks: 4


Episode = 162
t = 154
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 82.577637
Explore rate: 0.188425
Learning rate: 0.188425
Streaks: 4


Episode = 162
t = 155
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 82.577637
Explore rate: 0.188425
Learning rate: 0.188425
Streaks: 4


Episode = 162
t = 156
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 82.456892
Explore rate: 0.188425
Learnin


Episode = 163
t = 10
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 82.860474
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 5


Episode = 163
t = 11
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 82.892311
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 5


Episode = 163
t = 12
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 82.892311
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 5


Episode = 163
t = 13
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 82.924089
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 5


Episode = 163
t = 14
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 82.924089
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 5


Episode = 163
t = 15
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 82.768580
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 5


Episode = 163
t = 16
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 82.805982
Explore rate: 0.185752
Learning rate:


Episode = 163
t = 70
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 83.046513
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 5


Episode = 163
t = 71
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 82.983267
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 5


Episode = 163
t = 72
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 82.983267
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 5


Episode = 163
t = 73
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 82.820034
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 5


Episode = 163
t = 74
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 83.261193
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 5


Episode = 163
t = 75
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 83.261193
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 5


Episode = 163
t = 76
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 83.292286
Explore rate: 0.185752
Learning rate:


Episode = 163
t = 131
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 83.209775
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 5


Episode = 163
t = 132
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 83.474377
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 5


Episode = 163
t = 133
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 83.474377
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 5


Episode = 163
t = 134
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 83.505073
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 5


Episode = 163
t = 135
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 83.505073
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 5


Episode = 163
t = 136
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 83.535713
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 5


Episode = 163
t = 137
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 83.566296
Explore rate: 0.185752
Learnin

Episode = 163
t = 191
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 83.768303
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 5


Episode = 163
t = 192
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 83.476754
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 5


Episode = 163
t = 193
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 83.768303
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 5


Episode = 163
t = 194
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 83.768303
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 5


Episode = 163
t = 195
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 83.798453
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 5


Episode = 163
t = 196
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 83.709911
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 5


Episode = 163
t = 197
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 83.836770
Explore rate: 0.185752
Learning


Episode = 164
t = 52
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 83.471235
Explore rate: 0.183096
Learning rate: 0.183096
Streaks: 6


Episode = 164
t = 53
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 83.971677
Explore rate: 0.183096
Learning rate: 0.183096
Streaks: 6


Episode = 164
t = 54
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 83.971677
Explore rate: 0.183096
Learning rate: 0.183096
Streaks: 6


Episode = 164
t = 55
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 84.001024
Explore rate: 0.183096
Learning rate: 0.183096
Streaks: 6


Episode = 164
t = 56
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 84.030318
Explore rate: 0.183096
Learning rate: 0.183096
Streaks: 6


Episode = 164
t = 57
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 84.030318
Explore rate: 0.183096
Learning rate: 0.183096
Streaks: 6


Episode = 164
t = 58
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 84.030318
Explore rate: 0.183096
Learning rate:


Episode = 164
t = 112
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 84.258661
Explore rate: 0.183096
Learning rate: 0.183096
Streaks: 6


Episode = 164
t = 113
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 84.199114
Explore rate: 0.183096
Learning rate: 0.183096
Streaks: 6


Episode = 164
t = 114
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 84.300233
Explore rate: 0.183096
Learning rate: 0.183096
Streaks: 6


Episode = 164
t = 115
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 84.246375
Explore rate: 0.183096
Learning rate: 0.183096
Streaks: 6


Episode = 164
t = 116
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 84.319216
Explore rate: 0.183096
Learning rate: 0.183096
Streaks: 6


Episode = 164
t = 117
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 84.288423
Explore rate: 0.183096
Learning rate: 0.183096
Streaks: 6


Episode = 164
t = 118
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 84.236393
Explore rate: 0.183096
Learnin


Episode = 164
t = 173
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 84.560945
Explore rate: 0.183096
Learning rate: 0.183096
Streaks: 6


Episode = 164
t = 174
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 84.498624
Explore rate: 0.183096
Learning rate: 0.183096
Streaks: 6


Episode = 164
t = 175
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 84.560297
Explore rate: 0.183096
Learning rate: 0.183096
Streaks: 6


Episode = 164
t = 176
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 84.560297
Explore rate: 0.183096
Learning rate: 0.183096
Streaks: 6


Episode = 164
t = 177
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 84.535570
Explore rate: 0.183096
Learning rate: 0.183096
Streaks: 6


Episode = 164
t = 178
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 84.601009
Explore rate: 0.183096
Learning rate: 0.183096
Streaks: 6


Episode = 164
t = 179
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 84.601009
Explore rate: 0.183096
Learnin


Episode = 165
t = 33
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 84.741623
Explore rate: 0.180456
Learning rate: 0.180456
Streaks: 7


Episode = 165
t = 34
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 84.769158
Explore rate: 0.180456
Learning rate: 0.180456
Streaks: 7


Episode = 165
t = 35
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 84.760791
Explore rate: 0.180456
Learning rate: 0.180456
Streaks: 7


Episode = 165
t = 36
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 84.817673
Explore rate: 0.180456
Learning rate: 0.180456
Streaks: 7


Episode = 165
t = 37
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 84.265792
Explore rate: 0.180456
Learning rate: 0.180456
Streaks: 7


Episode = 165
t = 38
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 84.793517
Explore rate: 0.180456
Learning rate: 0.180456
Streaks: 7


Episode = 165
t = 39
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 84.793517
Explore rate: 0.180456
Learning rate:


Episode = 165
t = 93
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 84.958970
Explore rate: 0.180456
Learning rate: 0.180456
Streaks: 7


Episode = 165
t = 94
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 85.022685
Explore rate: 0.180456
Learning rate: 0.180456
Streaks: 7


Episode = 165
t = 95
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 84.688922
Explore rate: 0.180456
Learning rate: 0.180456
Streaks: 7


Episode = 165
t = 96
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 85.070356
Explore rate: 0.180456
Learning rate: 0.180456
Streaks: 7


Episode = 165
t = 97
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 85.070356
Explore rate: 0.180456
Learning rate: 0.180456
Streaks: 7


Episode = 165
t = 98
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 85.097297
Explore rate: 0.180456
Learning rate: 0.180456
Streaks: 7


Episode = 165
t = 99
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 85.025801
Explore rate: 0.180456
Learning rate:



Episode = 165
t = 154
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 85.387504
Explore rate: 0.180456
Learning rate: 0.180456
Streaks: 7


Episode = 165
t = 155
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 85.428171
Explore rate: 0.180456
Learning rate: 0.180456
Streaks: 7


Episode = 165
t = 156
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 84.863838
Explore rate: 0.180456
Learning rate: 0.180456
Streaks: 7


Episode = 165
t = 157
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 85.196015
Explore rate: 0.180456
Learning rate: 0.180456
Streaks: 7


Episode = 165
t = 158
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 85.196015
Explore rate: 0.180456
Learning rate: 0.180456
Streaks: 7


Episode = 165
t = 159
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 85.222730
Explore rate: 0.180456
Learning rate: 0.180456
Streaks: 7


Episode = 165
t = 160
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 85.094411
Explore rate: 0.180456
Learni

Episode = 166
t = 14
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 85.459599
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 8


Episode = 166
t = 15
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 85.282261
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 8


Episode = 166
t = 16
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 85.459599
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 8


Episode = 166
t = 17
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 85.339655
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 8


Episode = 166
t = 18
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 85.459599
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 8


Episode = 166
t = 19
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 85.459599
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 8


Episode = 166
t = 20
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 85.485457
Explore rate: 0.177832
Learning rate: 


Episode = 166
t = 75
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 85.695027
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 8


Episode = 166
t = 76
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 85.762244
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 8


Episode = 166
t = 77
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 85.762244
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 8


Episode = 166
t = 78
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 85.732299
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 8


Episode = 166
t = 79
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 85.732299
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 8


Episode = 166
t = 80
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 85.383129
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 8


Episode = 166
t = 81
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 85.791119
Explore rate: 0.177832
Learning rate:


Episode = 166
t = 136
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 85.941183
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 8


Episode = 166
t = 137
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 85.941183
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 8


Episode = 166
t = 138
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 85.966184
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 8


Episode = 166
t = 139
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 85.885676
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 8


Episode = 166
t = 140
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 85.997486
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 8


Episode = 166
t = 141
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 85.707481
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 8


Episode = 166
t = 142
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 85.707481
Explore rate: 0.177832
Learnin

Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 8


Episode = 166
t = 197
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 86.005097
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 8


Episode = 166
t = 198
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 86.254039
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 8


Episode = 166
t = 199
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 86.254039
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 8

Episode 166 finished after 199.000000 time steps

Episode = 167
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 86.226441
Explore rate: 0.175224
Learning rate: 0.175224
Streaks: 9


Episode = 167
t = 1
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 86.145553
Explore rate: 0.175224
Learning rate: 0.175224
Streaks: 9


Episode = 167
t = 2
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 86.145553
Explore rate: 0.175224
Learning rate: 0.175224
Streaks: 9


Episode = 167
t


Episode = 167
t = 57
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 86.193991
Explore rate: 0.175224
Learning rate: 0.175224
Streaks: 9


Episode = 167
t = 58
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 86.193991
Explore rate: 0.175224
Learning rate: 0.175224
Streaks: 9


Episode = 167
t = 59
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 86.452036
Explore rate: 0.175224
Learning rate: 0.175224
Streaks: 9


Episode = 167
t = 60
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 86.452036
Explore rate: 0.175224
Learning rate: 0.175224
Streaks: 9


Episode = 167
t = 61
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 86.475775
Explore rate: 0.175224
Learning rate: 0.175224
Streaks: 9


Episode = 167
t = 62
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 86.430114
Explore rate: 0.175224
Learning rate: 0.175224
Streaks: 9


Episode = 167
t = 63
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 86.430114
Explore rate: 0.175224
Learning rate:


Episode = 167
t = 118
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 86.622448
Explore rate: 0.175224
Learning rate: 0.175224
Streaks: 9


Episode = 167
t = 119
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 86.483650
Explore rate: 0.175224
Learning rate: 0.175224
Streaks: 9


Episode = 167
t = 120
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 86.641144
Explore rate: 0.175224
Learning rate: 0.175224
Streaks: 9


Episode = 167
t = 121
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 86.534654
Explore rate: 0.175224
Learning rate: 0.175224
Streaks: 9


Episode = 167
t = 122
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 86.692293
Explore rate: 0.175224
Learning rate: 0.175224
Streaks: 9


Episode = 167
t = 123
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 86.692293
Explore rate: 0.175224
Learning rate: 0.175224
Streaks: 9


Episode = 167
t = 124
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 86.715611
Explore rate: 0.175224
Learnin


Episode = 167
t = 178
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 86.930965
Explore rate: 0.175224
Learning rate: 0.175224
Streaks: 9


Episode = 167
t = 179
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 86.930965
Explore rate: 0.175224
Learning rate: 0.175224
Streaks: 9


Episode = 167
t = 180
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 86.953865
Explore rate: 0.175224
Learning rate: 0.175224
Streaks: 9


Episode = 167
t = 181
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 86.874412
Explore rate: 0.175224
Learning rate: 0.175224
Streaks: 9


Episode = 167
t = 182
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 86.981796
Explore rate: 0.175224
Learning rate: 0.175224
Streaks: 9


Episode = 167
t = 183
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 86.981796
Explore rate: 0.175224
Learning rate: 0.175224
Streaks: 9


Episode = 167
t = 184
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 86.916039
Explore rate: 0.175224
Learnin