In [3]:
import gym
import numpy as np
import math
import random

In [4]:
env = gym.make('CartPole-v0')

In [5]:
env.action_space.n

2

In [6]:
env.observation_space

Box(4,)

In [7]:
env.observation_space.low

array([-4.8000002e+00, -3.4028235e+38, -4.1887903e-01, -3.4028235e+38],
      dtype=float32)

In [8]:
env.observation_space.high

array([4.8000002e+00, 3.4028235e+38, 4.1887903e-01, 3.4028235e+38],
      dtype=float32)

In [9]:
NUM_BUCKETS = (1, 1, 6, 3)

In [10]:
NUM_ACTIONS = env.action_space.n

In [11]:
STATE_BOUNDS = list(zip(env.observation_space.low, env.observation_space.high))

In [12]:
STATE_BOUNDS[1] = [-0.5, 0.5]

In [13]:
STATE_BOUNDS[3] = [-math.radians(50), math.radians(50)]

In [14]:
STATE_BOUNDS

[(-4.8, 4.8),
 [-0.5, 0.5],
 (-0.41887903, 0.41887903),
 [-0.8726646259971648, 0.8726646259971648]]

In [15]:
q_table = np.zeros(NUM_BUCKETS + (NUM_ACTIONS,))

In [16]:
q_table.shape

(1, 1, 6, 3, 2)

In [17]:
q_table.size

36

In [18]:
EXPLORE_RATE_MIN = 0.01

In [19]:
LEARNING_RATE_MIN = 0.1

In [20]:
def get_explore_rate(t):
    return max(EXPLORE_RATE_MIN, min(1, 1.0-math.log10((t+1)/25)))

In [21]:
def get_learning_rate(t):
    return max(LEARNING_RATE_MIN, min(0.5, 1.0-math.log10((t+1)/25)))

In [22]:
def select_action(state, explore_rate):
    if random.random() < explore_rate:
        action = env.action_space.sample()
    else:
        action = np.argmax(q_table[state])
    return action

In [23]:
def state_to_bucket(state):
    
    bucket_indices = []
    
    for i in range(len(state)):
        if state[i] <= STATE_BOUNDS[i][0]:
            bucket_index = 0
            
        elif state[i] >= STATE_BOUNDS[i][1]:
            bucket_index = NUM_BUCKETS[i] - 1
        
        else:
            bound_width = STATE_BOUNDS[i][1] - STATE_BOUNDS[i][0]
            
            offset = (NUM_BUCKETS[i] - 1) * STATE_BOUNDS[i][0] / bound_width
            scaling = (NUM_BUCKETS[i] - 1) / bound_width
            
            bucket_index = int(round(scaling * state[i] - offset))
        
        bucket_indices.append(bucket_index)
    
    return tuple(bucket_indices)

In [24]:
def simulate():
    
    learning_rate = get_learning_rate(0)
    explore_rate = get_explore_rate(0)
    
    discount_factor = 0.99
    num_streaks = 0
    
    for episode in range(1000):
        
        observ = env.reset()
        
        state_0 = state_to_bucket(observ)
        
        for t in range(250):
            
            env.render()
            
            action = select_action(state_0, explore_rate)
            
            observ, reward, done, _ = env.step(action)
            
            state = state_to_bucket(observ)
            
            best_q = np.amax(q_table[state])
            
            q_table[state_0 + (action,)] += learning_rate * (reward + discount_factor*(best_q) - q_table[state_0 + (action,)])
            
            
            state_0 = state
            
            print("\nEpisode = %d" % episode)
            print("t = %d" % t)
            print("Action: %d" % action)
            print("State: %s" %str(state))
            print("Reward: %f" % reward)
            print("Best Q: %f" % best_q)
            print("Explore rate: %f" % explore_rate)
            print("Learning rate: %f" % learning_rate)
            print("Streaks: %d" %num_streaks)
            
            print("")
            
            if done:
                print("Episode %d finished after %f time steps" % (episode, t))
                
                if (t >= 199):
                    num_streaks += 1
                else:
                    num_streaks = 0
                break
            
            if num_streaks > 120:
                break
            
            explore_rate = get_explore_rate(episode)
            learning_rate = get_learning_rate(episode)

In [25]:
simulate()


Episode = 0
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 0.000000
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 0
t = 1
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 0.500000
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 0
t = 2
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 0.747500
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 0
t = 3
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 0.000000
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 0
t = 4
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 1.121881
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 0
t = 5
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 1.055331
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 0
t = 6
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 1.583330
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episo

Streaks: 0


Episode = 3
t = 6
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 3.859592
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 3
t = 7
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 4.340294
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 3
t = 8
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 4.340294
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 3
t = 9
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 4.818592
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 3
t = 10
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 5.018362
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 3
t = 11
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 5.393385
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 3
t = 12
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 5.866418
Explore rate: 1.000000
Learning rate: 0.500000
Str

State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 2.830703
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 6
t = 22
Action: 0
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 2.830703
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0

Episode 6 finished after 22.000000 time steps

Episode = 7
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 4.531920
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 7
t = 1
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 1.563591
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 7
t = 2
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 4.531920
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 7
t = 3
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 3.525096
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 7
t = 4
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 8.492606
Explore rate: 1.000000
Learning rate: 0.500000



Episode = 9
t = 5
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 12.308912
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 9
t = 6
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 12.308912
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 9
t = 7
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 12.747368
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 9
t = 8
Action: 0
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 3.919017
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 9
t = 9
Action: 0
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 3.919017
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0

Episode 9 finished after 9.000000 time steps

Episode = 10
t = 0
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 9.682785
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 10
t = 1
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 10.136436
Explore rate: 



Episode = 13
t = 3
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 12.535417
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 13
t = 4
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 12.535417
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 13
t = 5
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 12.802933
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 13
t = 6
Action: 0
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 3.684177
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 13
t = 7
Action: 1
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 3.684177
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 13
t = 8
Action: 0
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 4.045294
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0

Episode 13 finished after 8.000000 time steps

Episode = 14
t = 0
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 13.327278
Explore

Reward: 1.000000
Best Q: 14.533838
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 15
t = 10
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 14.533838
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 15
t = 11
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 14.961169
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 15
t = 12
Action: 1
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 4.344509
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 15
t = 13
Action: 0
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 4.344509
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0

Episode 15 finished after 13.000000 time steps

Episode = 16
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 15.292957
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 16
t = 1
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 15.716492
Explore rate: 1.000000
Learning rate: 0.500000
Strea


Episode = 18
t = 12
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 15.475808
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 18
t = 13
Action: 0
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 4.822786
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 18
t = 14
Action: 0
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 4.822786
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 18
t = 15
Action: 1
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 5.298672
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 18
t = 16
Action: 0
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 5.298672
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0

Episode 18 finished after 16.000000 time steps

Episode = 19
t = 0
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 15.718702
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 19
t = 1
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 16.140108
Exp


Episode = 21
t = 10
Action: 0
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 6.567291
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 21
t = 11
Action: 0
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 6.567291
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0

Episode 21 finished after 11.000000 time steps

Episode = 22
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 20.205187
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 22
t = 1
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 20.299709
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 22
t = 2
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 20.650949
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 22
t = 3
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 16.656840
Explore rate: 1.000000
Learning rate: 0.500000
Streaks: 0


Episode = 22
t = 4
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 16.656840
Expl

Reward: 1.000000
Best Q: 20.098249
Explore rate: 0.982967
Learning rate: 0.500000
Streaks: 0


Episode = 25
t = 7
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 17.155502
Explore rate: 0.982967
Learning rate: 0.500000
Streaks: 0


Episode = 25
t = 8
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 20.098249
Explore rate: 0.982967
Learning rate: 0.500000
Streaks: 0


Episode = 25
t = 9
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 20.098249
Explore rate: 0.982967
Learning rate: 0.500000
Streaks: 0


Episode = 25
t = 10
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 18.604118
Explore rate: 0.982967
Learning rate: 0.500000
Streaks: 0


Episode = 25
t = 11
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 18.604118
Explore rate: 0.982967
Learning rate: 0.500000
Streaks: 0


Episode = 25
t = 12
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 18.604118
Explore rate: 0.982967
Learning rate: 0.500000
Streaks: 0


Episode = 25
t = 13
Action: 1
State: (


Episode = 27
t = 27
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 22.421743
Explore rate: 0.950782
Learning rate: 0.500000
Streaks: 0


Episode = 27
t = 28
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 22.421743
Explore rate: 0.950782
Learning rate: 0.500000
Streaks: 0


Episode = 27
t = 29
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 22.421743
Explore rate: 0.950782
Learning rate: 0.500000
Streaks: 0


Episode = 27
t = 30
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 22.809634
Explore rate: 0.950782
Learning rate: 0.500000
Streaks: 0


Episode = 27
t = 31
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 22.809634
Explore rate: 0.950782
Learning rate: 0.500000
Streaks: 0


Episode = 27
t = 32
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 22.997548
Explore rate: 0.950782
Learning rate: 0.500000
Streaks: 0


Episode = 27
t = 33
Action: 0
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 8.879867
Explore rate: 0.950782
Learning rate: 0.50000


Episode 28 finished after 52.000000 time steps

Episode = 29
t = 0
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 27.179808
Explore rate: 0.935542
Learning rate: 0.500000
Streaks: 0


Episode = 29
t = 1
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 27.543909
Explore rate: 0.920819
Learning rate: 0.500000
Streaks: 0


Episode = 29
t = 2
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 27.543909
Explore rate: 0.920819
Learning rate: 0.500000
Streaks: 0


Episode = 29
t = 3
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 25.312266
Explore rate: 0.920819
Learning rate: 0.500000
Streaks: 0


Episode = 29
t = 4
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 27.543909
Explore rate: 0.920819
Learning rate: 0.500000
Streaks: 0


Episode = 29
t = 5
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 26.790368
Explore rate: 0.920819
Learning rate: 0.500000
Streaks: 0


Episode = 29
t = 6
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 23.353792
Exp

Explore rate: 0.892790
Learning rate: 0.500000
Streaks: 0


Episode = 32
t = 1
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 28.280603
Explore rate: 0.879426
Learning rate: 0.500000
Streaks: 0


Episode = 32
t = 2
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 28.280603
Explore rate: 0.879426
Learning rate: 0.500000
Streaks: 0


Episode = 32
t = 3
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 28.285820
Explore rate: 0.879426
Learning rate: 0.500000
Streaks: 0


Episode = 32
t = 4
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 28.641783
Explore rate: 0.879426
Learning rate: 0.500000
Streaks: 0


Episode = 32
t = 5
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 28.301262
Explore rate: 0.879426
Learning rate: 0.500000
Streaks: 0


Episode = 32
t = 6
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 28.919421
Explore rate: 0.879426
Learning rate: 0.500000
Streaks: 0


Episode = 32
t = 7
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 28


Episode = 35
t = 0
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 31.072765
Explore rate: 0.853872
Learning rate: 0.500000
Streaks: 0


Episode = 35
t = 1
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 28.665822
Explore rate: 0.841638
Learning rate: 0.500000
Streaks: 0


Episode = 35
t = 2
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 31.072765
Explore rate: 0.841638
Learning rate: 0.500000
Streaks: 0


Episode = 35
t = 3
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 31.072765
Explore rate: 0.841638
Learning rate: 0.500000
Streaks: 0


Episode = 35
t = 4
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 31.417401
Explore rate: 0.841638
Learning rate: 0.500000
Streaks: 0


Episode = 35
t = 5
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 28.665822
Explore rate: 0.841638
Learning rate: 0.500000
Streaks: 0


Episode = 35
t = 6
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 28.665822
Explore rate: 0.841638
Learning rate: 0.500000
Stre


Episode = 35
t = 57
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 26.431260
Explore rate: 0.841638
Learning rate: 0.500000
Streaks: 0


Episode = 35
t = 58
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 26.912624
Explore rate: 0.841638
Learning rate: 0.500000
Streaks: 0


Episode = 35
t = 59
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 26.912624
Explore rate: 0.841638
Learning rate: 0.500000
Streaks: 0


Episode = 35
t = 60
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 27.037379
Explore rate: 0.841638
Learning rate: 0.500000
Streaks: 0


Episode = 35
t = 61
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 27.037379
Explore rate: 0.841638
Learning rate: 0.500000
Streaks: 0


Episode = 35
t = 62
Action: 1
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 11.135204
Explore rate: 0.841638
Learning rate: 0.500000
Streaks: 0


Episode = 35
t = 63
Action: 0
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 11.135204
Explore rate: 0.841638
Learning rate: 0.5000


Episode = 38
t = 11
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 20.151524
Explore rate: 0.806875
Learning rate: 0.500000
Streaks: 0


Episode = 38
t = 12
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 20.151524
Explore rate: 0.806875
Learning rate: 0.500000
Streaks: 0


Episode = 38
t = 13
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 20.550766
Explore rate: 0.806875
Learning rate: 0.500000
Streaks: 0


Episode = 38
t = 14
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 20.948013
Explore rate: 0.806875
Learning rate: 0.500000
Streaks: 0


Episode = 38
t = 15
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 21.343273
Explore rate: 0.806875
Learning rate: 0.500000
Streaks: 0


Episode = 38
t = 16
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 21.736556
Explore rate: 0.806875
Learning rate: 0.500000
Streaks: 0


Episode = 38
t = 17
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 21.736556
Explore rate: 0.806875
Learning rate: 0.5000


Episode = 41
t = 17
Action: 1
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 12.899214
Explore rate: 0.774691
Learning rate: 0.500000
Streaks: 0

Episode 41 finished after 17.000000 time steps

Episode = 42
t = 0
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 30.867668
Explore rate: 0.774691
Learning rate: 0.500000
Streaks: 0


Episode = 42
t = 1
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 30.876596
Explore rate: 0.764472
Learning rate: 0.500000
Streaks: 0


Episode = 42
t = 2
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 30.876596
Explore rate: 0.764472
Learning rate: 0.500000
Streaks: 0


Episode = 42
t = 3
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 30.876596
Explore rate: 0.764472
Learning rate: 0.500000
Streaks: 0


Episode = 42
t = 4
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 31.390580
Explore rate: 0.764472
Learning rate: 0.500000
Streaks: 0


Episode = 42
t = 5
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 31.649443
Exp


Episode = 44
t = 26
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 33.148248
Explore rate: 0.744727
Learning rate: 0.500000
Streaks: 0


Episode = 44
t = 27
Action: 0
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 9.882985
Explore rate: 0.744727
Learning rate: 0.500000
Streaks: 0


Episode = 44
t = 28
Action: 0
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 9.882985
Explore rate: 0.744727
Learning rate: 0.500000
Streaks: 0


Episode = 44
t = 29
Action: 0
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 10.089082
Explore rate: 0.744727
Learning rate: 0.500000
Streaks: 0

Episode 44 finished after 29.000000 time steps

Episode = 45
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 35.238706
Explore rate: 0.744727
Learning rate: 0.500000
Streaks: 0


Episode = 45
t = 1
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 34.112978
Explore rate: 0.735182
Learning rate: 0.500000
Streaks: 0


Episode = 45
t = 2
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 35.238706
Ex

Learning rate: 0.500000
Streaks: 0


Episode = 47
t = 11
Action: 1
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 14.199204
Explore rate: 0.716699
Learning rate: 0.500000
Streaks: 0


Episode = 47
t = 12
Action: 0
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 14.199204
Explore rate: 0.716699
Learning rate: 0.500000
Streaks: 0

Episode 47 finished after 12.000000 time steps

Episode = 48
t = 0
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 36.940144
Explore rate: 0.716699
Learning rate: 0.500000
Streaks: 0


Episode = 48
t = 1
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 30.995511
Explore rate: 0.707744
Learning rate: 0.500000
Streaks: 0


Episode = 48
t = 2
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 36.940144
Explore rate: 0.707744
Learning rate: 0.500000
Streaks: 0


Episode = 48
t = 3
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 36.940144
Explore rate: 0.707744
Learning rate: 0.500000
Streaks: 0


Episode = 48
t = 4
Action: 1
State: (0, 0, 2, 1)
R

t = 9
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 23.735718
Explore rate: 0.690370
Learning rate: 0.500000
Streaks: 0


Episode = 50
t = 10
Action: 1
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 14.628208
Explore rate: 0.690370
Learning rate: 0.500000
Streaks: 0

Episode 50 finished after 10.000000 time steps

Episode = 51
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 37.711037
Explore rate: 0.690370
Learning rate: 0.500000
Streaks: 0


Episode = 51
t = 1
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 35.084477
Explore rate: 0.681937
Learning rate: 0.500000
Streaks: 0


Episode = 51
t = 2
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 37.540332
Explore rate: 0.681937
Learning rate: 0.500000
Streaks: 0


Episode = 51
t = 3
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 33.194635
Explore rate: 0.681937
Learning rate: 0.500000
Streaks: 0


Episode = 51
t = 4
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 33.194635
Explore rate: 0.6


Episode = 53
t = 10
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 37.193667
Explore rate: 0.665546
Learning rate: 0.500000
Streaks: 0


Episode = 53
t = 11
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 35.219948
Explore rate: 0.665546
Learning rate: 0.500000
Streaks: 0


Episode = 53
t = 12
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 35.219948
Explore rate: 0.665546
Learning rate: 0.500000
Streaks: 0


Episode = 53
t = 13
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 35.219948
Explore rate: 0.665546
Learning rate: 0.500000
Streaks: 0


Episode = 53
t = 14
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 35.543848
Explore rate: 0.665546
Learning rate: 0.500000
Streaks: 0


Episode = 53
t = 15
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 35.866129
Explore rate: 0.665546
Learning rate: 0.500000
Streaks: 0


Episode = 53
t = 16
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 35.866129
Explore rate: 0.665546
Learning rate: 0.5000


Episode = 56
t = 8
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 38.858577
Explore rate: 0.642065
Learning rate: 0.500000
Streaks: 0


Episode = 56
t = 9
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 29.546811
Explore rate: 0.642065
Learning rate: 0.500000
Streaks: 0


Episode = 56
t = 10
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 29.546811
Explore rate: 0.642065
Learning rate: 0.500000
Streaks: 0


Episode = 56
t = 11
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 29.546811
Explore rate: 0.642065
Learning rate: 0.500000
Streaks: 0


Episode = 56
t = 12
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 29.546811
Explore rate: 0.642065
Learning rate: 0.500000
Streaks: 0


Episode = 56
t = 13
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 29.899077
Explore rate: 0.642065
Learning rate: 0.500000
Streaks: 0


Episode = 56
t = 14
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 35.865451
Explore rate: 0.642065
Learning rate: 0.500000


Episode = 58
t = 10
Action: 1
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 36.624703
Explore rate: 0.627088
Learning rate: 0.500000
Streaks: 0


Episode = 58
t = 11
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 37.438028
Explore rate: 0.627088
Learning rate: 0.500000
Streaks: 0


Episode = 58
t = 12
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 37.279140
Explore rate: 0.627088
Learning rate: 0.500000
Streaks: 0


Episode = 58
t = 13
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 37.279140
Explore rate: 0.627088
Learning rate: 0.500000
Streaks: 0


Episode = 58
t = 14
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 37.855230
Explore rate: 0.627088
Learning rate: 0.500000
Streaks: 0


Episode = 58
t = 15
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 37.438028
Explore rate: 0.627088
Learning rate: 0.500000
Streaks: 0


Episode = 58
t = 16
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 37.438028
Explore rate: 0.627088
Learning rate: 0.5000

Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 39.958247
Explore rate: 0.619789
Learning rate: 0.500000
Streaks: 0


Episode = 59
t = 8
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 39.958247
Explore rate: 0.619789
Learning rate: 0.500000
Streaks: 0


Episode = 59
t = 9
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 37.271221
Explore rate: 0.619789
Learning rate: 0.500000
Streaks: 0


Episode = 59
t = 10
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 39.958247
Explore rate: 0.619789
Learning rate: 0.500000
Streaks: 0


Episode = 59
t = 11
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 39.958247
Explore rate: 0.619789
Learning rate: 0.500000
Streaks: 0


Episode = 59
t = 12
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 40.258456
Explore rate: 0.619789
Learning rate: 0.500000
Streaks: 0


Episode = 59
t = 13
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 38.755843
Explore rate: 0.619789
Learning rate: 0.500000
Streaks: 0


Episode



Episode = 62
t = 11
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 41.085426
Explore rate: 0.598599
Learning rate: 0.500000
Streaks: 0


Episode = 62
t = 12
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 41.379999
Explore rate: 0.598599
Learning rate: 0.500000
Streaks: 0


Episode = 62
t = 13
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 41.285142
Explore rate: 0.598599
Learning rate: 0.500000
Streaks: 0


Episode = 62
t = 14
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 41.772695
Explore rate: 0.598599
Learning rate: 0.500000
Streaks: 0


Episode = 62
t = 15
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 41.820055
Explore rate: 0.598599
Learning rate: 0.500000
Streaks: 0


Episode = 62
t = 16
Action: 0
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 16.322881
Explore rate: 0.598599
Learning rate: 0.500000
Streaks: 0


Episode = 62
t = 17
Action: 1
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 16.322881
Explore rate: 0.598599
Learning rate: 0.500

Streaks: 0


Episode = 65
t = 15
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 39.301342
Explore rate: 0.578396
Learning rate: 0.500000
Streaks: 0


Episode = 65
t = 16
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 40.232974
Explore rate: 0.578396
Learning rate: 0.500000
Streaks: 0


Episode = 65
t = 17
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 40.065993
Explore rate: 0.578396
Learning rate: 0.500000
Streaks: 0


Episode = 65
t = 18
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 40.065993
Explore rate: 0.578396
Learning rate: 0.500000
Streaks: 0


Episode = 65
t = 19
Action: 0
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 40.065993
Explore rate: 0.578396
Learning rate: 0.500000
Streaks: 0


Episode = 65
t = 20
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 40.365663
Explore rate: 0.578396
Learning rate: 0.500000
Streaks: 0


Episode = 65
t = 21
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 40.397336
Explore rate: 0.578396
Learning 


Episode = 66
t = 1
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 42.029933
Explore rate: 0.571865
Learning rate: 0.500000
Streaks: 0


Episode = 66
t = 2
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 42.319783
Explore rate: 0.571865
Learning rate: 0.500000
Streaks: 0


Episode = 66
t = 3
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 42.608184
Explore rate: 0.571865
Learning rate: 0.500000
Streaks: 0


Episode = 66
t = 4
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 42.608184
Explore rate: 0.571865
Learning rate: 0.500000
Streaks: 0


Episode = 66
t = 5
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 42.895143
Explore rate: 0.571865
Learning rate: 0.500000
Streaks: 0


Episode = 66
t = 6
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 42.895143
Explore rate: 0.571865
Learning rate: 0.500000
Streaks: 0


Episode = 66
t = 7
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 43.039144
Explore rate: 0.571865
Learning rate: 0.500000
Stre


Episode = 67
t = 14
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 45.679458
Explore rate: 0.565431
Learning rate: 0.500000
Streaks: 0


Episode = 67
t = 15
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 45.512151
Explore rate: 0.565431
Learning rate: 0.500000
Streaks: 0


Episode = 67
t = 16
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 45.868244
Explore rate: 0.565431
Learning rate: 0.500000
Streaks: 0


Episode = 67
t = 17
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 45.960856
Explore rate: 0.565431
Learning rate: 0.500000
Streaks: 0


Episode = 67
t = 18
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 46.184746
Explore rate: 0.565431
Learning rate: 0.500000
Streaks: 0


Episode = 67
t = 19
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 46.341877
Explore rate: 0.565431
Learning rate: 0.500000
Streaks: 0


Episode = 67
t = 20
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 46.341877
Explore rate: 0.565431
Learning rate: 0.5000


Episode = 69
t = 25
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 47.845034
Explore rate: 0.552842
Learning rate: 0.500000
Streaks: 0


Episode = 69
t = 26
Action: 1
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 18.802024
Explore rate: 0.552842
Learning rate: 0.500000
Streaks: 0


Episode = 69
t = 27
Action: 0
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 18.802024
Explore rate: 0.552842
Learning rate: 0.500000
Streaks: 0


Episode = 69
t = 28
Action: 1
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 19.208014
Explore rate: 0.552842
Learning rate: 0.500000
Streaks: 0

Episode 69 finished after 28.000000 time steps

Episode = 70
t = 0
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 47.126429
Explore rate: 0.552842
Learning rate: 0.500000
Streaks: 0


Episode = 70
t = 1
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 47.845034
Explore rate: 0.546682
Learning rate: 0.500000
Streaks: 0


Episode = 70
t = 2
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 47.845034



Episode = 73
t = 11
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 47.861851
Explore rate: 0.528708
Learning rate: 0.500000
Streaks: 0


Episode = 73
t = 12
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 47.861851
Explore rate: 0.528708
Learning rate: 0.500000
Streaks: 0


Episode = 73
t = 13
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 47.544830
Explore rate: 0.528708
Learning rate: 0.500000
Streaks: 0


Episode = 73
t = 14
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 48.095962
Explore rate: 0.528708
Learning rate: 0.500000
Streaks: 0


Episode = 73
t = 15
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 48.095962
Explore rate: 0.528708
Learning rate: 0.500000
Streaks: 0


Episode = 73
t = 16
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 48.095962
Explore rate: 0.528708
Learning rate: 0.500000
Streaks: 0


Episode = 73
t = 17
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 42.964300
Explore rate: 0.528708
Learning rate: 0.5000

Explore rate: 0.522879
Learning rate: 0.500000
Streaks: 0


Episode = 74
t = 38
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 48.119927
Explore rate: 0.522879
Learning rate: 0.500000
Streaks: 0


Episode = 74
t = 39
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 48.119927
Explore rate: 0.522879
Learning rate: 0.500000
Streaks: 0


Episode = 74
t = 40
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 47.394591
Explore rate: 0.522879
Learning rate: 0.500000
Streaks: 0


Episode = 74
t = 41
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 48.149986
Explore rate: 0.522879
Learning rate: 0.500000
Streaks: 0


Episode = 74
t = 42
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 48.560668
Explore rate: 0.522879
Learning rate: 0.500000
Streaks: 0


Episode = 74
t = 43
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 49.966558
Explore rate: 0.522879
Learning rate: 0.500000
Streaks: 0


Episode = 74
t = 44
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Bes


Episode = 76
t = 12
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 48.838169
Explore rate: 0.511449
Learning rate: 0.500000
Streaks: 0


Episode = 76
t = 13
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 48.690663
Explore rate: 0.511449
Learning rate: 0.500000
Streaks: 0


Episode = 76
t = 14
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 49.020962
Explore rate: 0.511449
Learning rate: 0.500000
Streaks: 0


Episode = 76
t = 15
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 49.020962
Explore rate: 0.511449
Learning rate: 0.500000
Streaks: 0


Episode = 76
t = 16
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 49.020962
Explore rate: 0.511449
Learning rate: 0.500000
Streaks: 0


Episode = 76
t = 17
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 47.687624
Explore rate: 0.511449
Learning rate: 0.500000
Streaks: 0


Episode = 76
t = 18
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 49.020962
Explore rate: 0.511449
Learning rate: 0.5000


Episode = 77
t = 26
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 50.915458
Explore rate: 0.505845
Learning rate: 0.500000
Streaks: 0


Episode = 77
t = 27
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 50.983500
Explore rate: 0.505845
Learning rate: 0.500000
Streaks: 0


Episode = 77
t = 28
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 50.105282
Explore rate: 0.505845
Learning rate: 0.500000
Streaks: 0


Episode = 77
t = 29
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 50.983500
Explore rate: 0.505845
Learning rate: 0.500000
Streaks: 0


Episode = 77
t = 30
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 50.983500
Explore rate: 0.505845
Learning rate: 0.500000
Streaks: 0


Episode = 77
t = 31
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 51.228582
Explore rate: 0.505845
Learning rate: 0.500000
Streaks: 0


Episode = 77
t = 32
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 49.847149
Explore rate: 0.505845
Learning rate: 0.5000


Episode = 79
t = 12
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 51.683879
Explore rate: 0.494850
Learning rate: 0.494850
Streaks: 0


Episode = 79
t = 13
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 51.922971
Explore rate: 0.494850
Learning rate: 0.494850
Streaks: 0


Episode = 79
t = 14
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 52.160880
Explore rate: 0.494850
Learning rate: 0.494850
Streaks: 0


Episode = 79
t = 15
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 52.397612
Explore rate: 0.494850
Learning rate: 0.494850
Streaks: 0


Episode = 79
t = 16
Action: 0
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 21.601678
Explore rate: 0.494850
Learning rate: 0.494850
Streaks: 0


Episode = 79
t = 17
Action: 0
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 21.601678
Explore rate: 0.494850
Learning rate: 0.494850
Streaks: 0

Episode 79 finished after 17.000000 time steps

Episode = 80
t = 0
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 52.27271


Episode = 81
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 52.745737
Explore rate: 0.489455
Learning rate: 0.489455
Streaks: 0


Episode = 81
t = 1
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 50.718196
Explore rate: 0.484126
Learning rate: 0.484126
Streaks: 0


Episode = 81
t = 2
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 52.122053
Explore rate: 0.484126
Learning rate: 0.484126
Streaks: 0


Episode = 81
t = 3
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 54.118335
Explore rate: 0.484126
Learning rate: 0.484126
Streaks: 0


Episode = 81
t = 4
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 53.794600
Explore rate: 0.484126
Learning rate: 0.484126
Streaks: 0


Episode = 81
t = 5
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 53.794600
Explore rate: 0.484126
Learning rate: 0.484126
Streaks: 0


Episode = 81
t = 6
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 54.018293
Explore rate: 0.484126
Learning rate: 0.484126
Stre

t = 2
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 55.630322
Explore rate: 0.473661
Learning rate: 0.473661
Streaks: 0


Episode = 83
t = 3
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 55.630322
Explore rate: 0.473661
Learning rate: 0.473661
Streaks: 0


Episode = 83
t = 4
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 55.840484
Explore rate: 0.473661
Learning rate: 0.473661
Streaks: 0


Episode = 83
t = 5
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 56.049650
Explore rate: 0.473661
Learning rate: 0.473661
Streaks: 0


Episode = 83
t = 6
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 56.049650
Explore rate: 0.473661
Learning rate: 0.473661
Streaks: 0


Episode = 83
t = 7
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 51.066018
Explore rate: 0.473661
Learning rate: 0.473661
Streaks: 0


Episode = 83
t = 8
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 55.614694
Explore rate: 0.473661
Learning rate: 0.473661
Streaks: 0


Episo


Episode = 83
t = 59
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 56.361954
Explore rate: 0.473661
Learning rate: 0.473661
Streaks: 0


Episode = 83
t = 60
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 56.361954
Explore rate: 0.473661
Learning rate: 0.473661
Streaks: 0


Episode = 83
t = 61
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 56.361954
Explore rate: 0.473661
Learning rate: 0.473661
Streaks: 0


Episode = 83
t = 62
Action: 0
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 56.361954
Explore rate: 0.473661
Learning rate: 0.473661
Streaks: 0


Episode = 83
t = 63
Action: 0
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 56.568650
Explore rate: 0.473661
Learning rate: 0.473661
Streaks: 0


Episode = 83
t = 64
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 56.202893
Explore rate: 0.473661
Learning rate: 0.473661
Streaks: 0


Episode = 83
t = 65
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 56.202893
Explore rate: 0.473661
Learning rate: 0.4736


Episode = 84
t = 20
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 58.506943
Explore rate: 0.468521
Learning rate: 0.468521
Streaks: 0


Episode = 84
t = 21
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 57.794739
Explore rate: 0.468521
Learning rate: 0.468521
Streaks: 0


Episode = 84
t = 22
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 57.794739
Explore rate: 0.468521
Learning rate: 0.468521
Streaks: 0


Episode = 84
t = 23
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 57.992480
Explore rate: 0.468521
Learning rate: 0.468521
Streaks: 0


Episode = 84
t = 24
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 57.992480
Explore rate: 0.468521
Learning rate: 0.468521
Streaks: 0


Episode = 84
t = 25
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 58.506943
Explore rate: 0.468521
Learning rate: 0.468521
Streaks: 0


Episode = 84
t = 26
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 58.506943
Explore rate: 0.468521
Learning rate: 0.4685


Episode = 86
t = 8
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 58.247070
Explore rate: 0.458421
Learning rate: 0.458421
Streaks: 0


Episode = 86
t = 9
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 53.021607
Explore rate: 0.458421
Learning rate: 0.458421
Streaks: 0


Episode = 86
t = 10
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 53.021607
Explore rate: 0.458421
Learning rate: 0.458421
Streaks: 0


Episode = 86
t = 11
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 53.236966
Explore rate: 0.458421
Learning rate: 0.458421
Streaks: 0


Episode = 86
t = 12
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 53.451337
Explore rate: 0.458421
Learning rate: 0.458421
Streaks: 0


Episode = 86
t = 13
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 53.664726
Explore rate: 0.458421
Learning rate: 0.458421
Streaks: 0


Episode = 86
t = 14
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 53.877137
Explore rate: 0.458421
Learning rate: 0.458421

Streaks: 0


Episode = 88
t = 34
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 59.982561
Explore rate: 0.448550
Learning rate: 0.448550
Streaks: 0


Episode = 88
t = 35
Action: 1
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 59.982561
Explore rate: 0.448550
Learning rate: 0.448550
Streaks: 0


Episode = 88
t = 36
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 55.969784
Explore rate: 0.448550
Learning rate: 0.448550
Streaks: 0


Episode = 88
t = 37
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 58.849673
Explore rate: 0.448550
Learning rate: 0.448550
Streaks: 0


Episode = 88
t = 38
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 58.849673
Explore rate: 0.448550
Learning rate: 0.448550
Streaks: 0


Episode = 88
t = 39
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 59.034252
Explore rate: 0.448550
Learning rate: 0.448550
Streaks: 0


Episode = 88
t = 40
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 54.333872
Explore rate: 0.448550
Learning 


Episode = 90
t = 3
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 58.560603
Explore rate: 0.438899
Learning rate: 0.438899
Streaks: 0


Episode = 90
t = 4
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 60.005550
Explore rate: 0.438899
Learning rate: 0.438899
Streaks: 0


Episode = 90
t = 5
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 58.304257
Explore rate: 0.438899
Learning rate: 0.438899
Streaks: 0


Episode = 90
t = 6
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 58.304257
Explore rate: 0.438899
Learning rate: 0.438899
Streaks: 0


Episode = 90
t = 7
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 57.994245
Explore rate: 0.438899
Learning rate: 0.438899
Streaks: 0


Episode = 90
t = 8
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 57.994245
Explore rate: 0.438899
Learning rate: 0.438899
Streaks: 0


Episode = 90
t = 9
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 57.994245
Explore rate: 0.438899
Learning rate: 0.438899
Stre

t = 26
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 60.885424
Explore rate: 0.434152
Learning rate: 0.434152
Streaks: 0


Episode = 91
t = 27
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 56.952890
Explore rate: 0.434152
Learning rate: 0.434152
Streaks: 0


Episode = 91
t = 28
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 56.952890
Explore rate: 0.434152
Learning rate: 0.434152
Streaks: 0


Episode = 91
t = 29
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 57.139779
Explore rate: 0.434152
Learning rate: 0.434152
Streaks: 0


Episode = 91
t = 30
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 57.325858
Explore rate: 0.434152
Learning rate: 0.434152
Streaks: 0


Episode = 91
t = 31
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 57.325858
Explore rate: 0.434152
Learning rate: 0.434152
Streaks: 0


Episode = 91
t = 32
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 56.991468
Explore rate: 0.434152
Learning rate: 0.434152
Streaks: 0



Episode = 93
t = 10
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 60.073352
Explore rate: 0.424812
Learning rate: 0.424812
Streaks: 0


Episode = 93
t = 11
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 60.242965
Explore rate: 0.424812
Learning rate: 0.424812
Streaks: 0


Episode = 93
t = 12
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 60.905882
Explore rate: 0.424812
Learning rate: 0.424812
Streaks: 0


Episode = 93
t = 13
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 60.787802
Explore rate: 0.424812
Learning rate: 0.424812
Streaks: 0


Episode = 93
t = 14
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 60.905882
Explore rate: 0.424812
Learning rate: 0.424812
Streaks: 0


Episode = 93
t = 15
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 60.905882
Explore rate: 0.424812
Learning rate: 0.424812
Streaks: 0


Episode = 93
t = 16
Action: 0
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 61.071958
Explore rate: 0.424812
Learning rate: 0.4248


Episode = 94
t = 50
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 57.667136
Explore rate: 0.420216
Learning rate: 0.420216
Streaks: 0


Episode = 94
t = 51
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 57.845026
Explore rate: 0.420216
Learning rate: 0.420216
Streaks: 0


Episode = 94
t = 52
Action: 1
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 24.436947
Explore rate: 0.420216
Learning rate: 0.420216
Streaks: 0


Episode = 94
t = 53
Action: 1
State: (0, 0, 4, 1)
Reward: 1.000000
Best Q: 20.868310
Explore rate: 0.420216
Learning rate: 0.420216
Streaks: 0


Episode = 94
t = 54
Action: 0
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 24.436947
Explore rate: 0.420216
Learning rate: 0.420216
Streaks: 0


Episode = 94
t = 55
Action: 0
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 24.436947
Explore rate: 0.420216
Learning rate: 0.420216
Streaks: 0

Episode 94 finished after 55.000000 time steps

Episode = 95
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 60.56562


Episode = 96
t = 31
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 61.433562
Explore rate: 0.411168
Learning rate: 0.411168
Streaks: 0


Episode = 96
t = 32
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 61.603700
Explore rate: 0.411168
Learning rate: 0.411168
Streaks: 0


Episode = 96
t = 33
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 61.661391
Explore rate: 0.411168
Learning rate: 0.411168
Streaks: 0


Episode = 96
t = 34
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 61.785057
Explore rate: 0.411168
Learning rate: 0.411168
Streaks: 0


Episode = 96
t = 35
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 61.869366
Explore rate: 0.411168
Learning rate: 0.411168
Streaks: 0


Episode = 96
t = 36
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 61.976503
Explore rate: 0.411168
Learning rate: 0.411168
Streaks: 0


Episode = 96
t = 37
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 62.069758
Explore rate: 0.411168
Learning rate: 0.4111

Explore rate: 0.402305
Learning rate: 0.402305
Streaks: 0


Episode = 98
t = 25
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 60.795826
Explore rate: 0.402305
Learning rate: 0.402305
Streaks: 0


Episode = 98
t = 26
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 58.784758
Explore rate: 0.402305
Learning rate: 0.402305
Streaks: 0


Episode = 98
t = 27
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 58.784758
Explore rate: 0.402305
Learning rate: 0.402305
Streaks: 0


Episode = 98
t = 28
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 58.784758
Explore rate: 0.402305
Learning rate: 0.402305
Streaks: 0


Episode = 98
t = 29
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 63.084092
Explore rate: 0.402305
Learning rate: 0.402305
Streaks: 0


Episode = 98
t = 30
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 60.762019
Explore rate: 0.402305
Learning rate: 0.402305
Streaks: 0


Episode = 98
t = 31
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Bes


Episode = 98
t = 88
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 57.839009
Explore rate: 0.402305
Learning rate: 0.402305
Streaks: 0


Episode = 98
t = 89
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 58.008625
Explore rate: 0.402305
Learning rate: 0.402305
Streaks: 0


Episode = 98
t = 90
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 61.936606
Explore rate: 0.402305
Learning rate: 0.402305
Streaks: 0


Episode = 98
t = 91
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 61.936606
Explore rate: 0.402305
Learning rate: 0.402305
Streaks: 0


Episode = 98
t = 92
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 62.089736
Explore rate: 0.402305
Learning rate: 0.402305
Streaks: 0


Episode = 98
t = 93
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 62.242251
Explore rate: 0.402305
Learning rate: 0.402305
Streaks: 0


Episode = 98
t = 94
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 62.242251
Explore rate: 0.402305
Learning rate: 0.4023


Episode = 99
t = 3
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 59.502835
Explore rate: 0.397940
Learning rate: 0.397940
Streaks: 0


Episode = 99
t = 4
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 62.415997
Explore rate: 0.397940
Learning rate: 0.397940
Streaks: 0


Episode = 99
t = 5
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 62.415997
Explore rate: 0.397940
Learning rate: 0.397940
Streaks: 0


Episode = 99
t = 6
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 62.565559
Explore rate: 0.397940
Learning rate: 0.397940
Streaks: 0


Episode = 99
t = 7
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 62.833229
Explore rate: 0.397940
Learning rate: 0.397940
Streaks: 0


Episode = 99
t = 8
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 62.833229
Explore rate: 0.397940
Learning rate: 0.397940
Streaks: 0


Episode = 99
t = 9
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 62.833229
Explore rate: 0.397940
Learning rate: 0.397940
Stre


Episode = 101
t = 23
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 55.724450
Explore rate: 0.389340
Learning rate: 0.389340
Streaks: 0


Episode = 101
t = 24
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 55.896833
Explore rate: 0.389340
Learning rate: 0.389340
Streaks: 0


Episode = 101
t = 25
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 56.068544
Explore rate: 0.389340
Learning rate: 0.389340
Streaks: 0


Episode = 101
t = 26
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 56.239586
Explore rate: 0.389340
Learning rate: 0.389340
Streaks: 0


Episode = 101
t = 27
Action: 1
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 25.649202
Explore rate: 0.389340
Learning rate: 0.389340
Streaks: 0


Episode = 101
t = 28
Action: 0
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 25.649202
Explore rate: 0.389340
Learning rate: 0.389340
Streaks: 0

Episode 101 finished after 28.000000 time steps

Episode = 102
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 

Episode = 103
t = 47
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 62.655586
Explore rate: 0.380907
Learning rate: 0.380907
Streaks: 0


Episode = 103
t = 48
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 62.726196
Explore rate: 0.380907
Learning rate: 0.380907
Streaks: 0


Episode = 103
t = 49
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 57.439640
Explore rate: 0.380907
Learning rate: 0.380907
Streaks: 0


Episode = 103
t = 50
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 57.439640
Explore rate: 0.380907
Learning rate: 0.380907
Streaks: 0


Episode = 103
t = 51
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 62.534275
Explore rate: 0.380907
Learning rate: 0.380907
Streaks: 0


Episode = 103
t = 52
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 59.623294
Explore rate: 0.380907
Learning rate: 0.380907
Streaks: 0


Episode = 103
t = 53
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 61.579261
Explore rate: 0.380907
Learning rate: 


Episode = 105
t = 1
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 63.167947
Explore rate: 0.372634
Learning rate: 0.372634
Streaks: 0


Episode = 105
t = 2
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 63.167947
Explore rate: 0.372634
Learning rate: 0.372634
Streaks: 0


Episode = 105
t = 3
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 63.434090
Explore rate: 0.372634
Learning rate: 0.372634
Streaks: 0


Episode = 105
t = 4
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 63.211006
Explore rate: 0.372634
Learning rate: 0.372634
Streaks: 0


Episode = 105
t = 5
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 57.361357
Explore rate: 0.372634
Learning rate: 0.372634
Streaks: 0


Episode = 105
t = 6
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 57.361357
Explore rate: 0.372634
Learning rate: 0.372634
Streaks: 0


Episode = 105
t = 7
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 57.520243
Explore rate: 0.372634
Learning rate: 0.3726


Episode = 108
t = 7
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 51.195652
Explore rate: 0.360514
Learning rate: 0.360514
Streaks: 0


Episode = 108
t = 8
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 51.371599
Explore rate: 0.360514
Learning rate: 0.360514
Streaks: 0


Episode = 108
t = 9
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 51.371599
Explore rate: 0.360514
Learning rate: 0.360514
Streaks: 0


Episode = 108
t = 10
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 51.546911
Explore rate: 0.360514
Learning rate: 0.360514
Streaks: 0


Episode = 108
t = 11
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 51.721591
Explore rate: 0.360514
Learning rate: 0.360514
Streaks: 0


Episode = 108
t = 12
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 51.895641
Explore rate: 0.360514
Learning rate: 0.360514
Streaks: 0


Episode = 108
t = 13
Action: 1
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 35.304220
Explore rate: 0.360514
Learning rate: 0.

Episode = 111
t = 6
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 57.018027
Explore rate: 0.348722
Learning rate: 0.348722
Streaks: 0


Episode = 111
t = 7
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 57.167915
Explore rate: 0.348722
Learning rate: 0.348722
Streaks: 0


Episode = 111
t = 8
Action: 1
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 26.460064
Explore rate: 0.348722
Learning rate: 0.348722
Streaks: 0


Episode = 111
t = 9
Action: 0
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 26.460064
Explore rate: 0.348722
Learning rate: 0.348722
Streaks: 0

Episode 111 finished after 9.000000 time steps

Episode = 112
t = 0
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 63.635476
Explore rate: 0.348722
Learning rate: 0.348722
Streaks: 0


Episode = 112
t = 1
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 63.762287
Explore rate: 0.344862
Learning rate: 0.344862
Streaks: 0


Episode = 112
t = 2
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 63.76228


Episode = 113
t = 43
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 57.307594
Explore rate: 0.341035
Learning rate: 0.341035
Streaks: 0


Episode = 113
t = 44
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 57.307594
Explore rate: 0.341035
Learning rate: 0.341035
Streaks: 0


Episode = 113
t = 45
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 57.453190
Explore rate: 0.341035
Learning rate: 0.341035
Streaks: 0


Episode = 113
t = 46
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 57.453190
Explore rate: 0.341035
Learning rate: 0.341035
Streaks: 0


Episode = 113
t = 47
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 57.453190
Explore rate: 0.341035
Learning rate: 0.341035
Streaks: 0


Episode = 113
t = 48
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 57.453190
Explore rate: 0.341035
Learning rate: 0.341035
Streaks: 0


Episode = 113
t = 49
Action: 1
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 26.969241
Explore rate: 0.341035
Learning rate:


Episode = 115
t = 28
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 60.274673
Explore rate: 0.333482
Learning rate: 0.333482
Streaks: 0


Episode = 115
t = 29
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 60.475803
Explore rate: 0.333482
Learning rate: 0.333482
Streaks: 0


Episode = 115
t = 30
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 60.475803
Explore rate: 0.333482
Learning rate: 0.333482
Streaks: 0


Episode = 115
t = 31
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 60.475803
Explore rate: 0.333482
Learning rate: 0.333482
Streaks: 0


Episode = 115
t = 32
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 60.475803
Explore rate: 0.333482
Learning rate: 0.333482
Streaks: 0


Episode = 115
t = 33
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 59.075921
Explore rate: 0.333482
Learning rate: 0.333482
Streaks: 0


Episode = 115
t = 34
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 60.475803
Explore rate: 0.333482
Learning rate:

Explore rate: 0.333482
Learning rate: 0.333482
Streaks: 0


Episode = 115
t = 95
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 61.892671
Explore rate: 0.333482
Learning rate: 0.333482
Streaks: 0


Episode = 115
t = 96
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 61.682048
Explore rate: 0.333482
Learning rate: 0.333482
Streaks: 0


Episode = 115
t = 97
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 61.892671
Explore rate: 0.333482
Learning rate: 0.333482
Streaks: 0


Episode = 115
t = 98
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 61.892671
Explore rate: 0.333482
Learning rate: 0.333482
Streaks: 0


Episode = 115
t = 99
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 62.019752
Explore rate: 0.333482
Learning rate: 0.333482
Streaks: 0


Episode = 115
t = 100
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 61.879355
Explore rate: 0.333482
Learning rate: 0.333482
Streaks: 0


Episode = 115
t = 101
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0


Episode = 116
t = 29
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 54.138857
Explore rate: 0.329754
Learning rate: 0.329754
Streaks: 0


Episode = 116
t = 30
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 59.791948
Explore rate: 0.329754
Learning rate: 0.329754
Streaks: 0


Episode = 116
t = 31
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 56.135575
Explore rate: 0.329754
Learning rate: 0.329754
Streaks: 0


Episode = 116
t = 32
Action: 0
State: (0, 0, 1, 1)
Reward: 1.000000
Best Q: 45.399053
Explore rate: 0.329754
Learning rate: 0.329754
Streaks: 0


Episode = 116
t = 33
Action: 1
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 38.322563
Explore rate: 0.329754
Learning rate: 0.329754
Streaks: 0


Episode = 116
t = 34
Action: 0
State: (0, 0, 1, 1)
Reward: 1.000000
Best Q: 45.399053
Explore rate: 0.329754
Learning rate: 0.329754
Streaks: 0


Episode = 116
t = 35
Action: 0
State: (0, 0, 1, 1)
Reward: 1.000000
Best Q: 45.399053
Explore rate: 0.329754
Learning rate:


Episode = 116
t = 87
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 54.485531
Explore rate: 0.329754
Learning rate: 0.329754
Streaks: 0


Episode = 116
t = 88
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 54.635617
Explore rate: 0.329754
Learning rate: 0.329754
Streaks: 0


Episode = 116
t = 89
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 54.635617
Explore rate: 0.329754
Learning rate: 0.329754
Streaks: 0


Episode = 116
t = 90
Action: 1
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 40.836114
Explore rate: 0.329754
Learning rate: 0.329754
Streaks: 0


Episode = 116
t = 91
Action: 1
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 40.836114
Explore rate: 0.329754
Learning rate: 0.329754
Streaks: 0

Episode 116 finished after 91.000000 time steps

Episode = 117
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 61.787311
Explore rate: 0.329754
Learning rate: 0.329754
Streaks: 0


Episode = 117
t = 1
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 6


Episode = 119
t = 2
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 60.309651
Explore rate: 0.318759
Learning rate: 0.318759
Streaks: 0


Episode = 119
t = 3
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 47.605835
Explore rate: 0.318759
Learning rate: 0.318759
Streaks: 0


Episode = 119
t = 4
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 61.062142
Explore rate: 0.318759
Learning rate: 0.318759
Streaks: 0


Episode = 119
t = 5
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 52.019268
Explore rate: 0.318759
Learning rate: 0.318759
Streaks: 0


Episode = 119
t = 6
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 61.062142
Explore rate: 0.318759
Learning rate: 0.318759
Streaks: 0


Episode = 119
t = 7
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 61.062142
Explore rate: 0.318759
Learning rate: 0.318759
Streaks: 0


Episode = 119
t = 8
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 61.186260
Explore rate: 0.318759
Learning rate: 0.3187

Streaks: 0


Episode = 119
t = 59
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 60.942243
Explore rate: 0.318759
Learning rate: 0.318759
Streaks: 0


Episode = 119
t = 60
Action: 1
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 60.942243
Explore rate: 0.318759
Learning rate: 0.318759
Streaks: 0


Episode = 119
t = 61
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 59.179177
Explore rate: 0.318759
Learning rate: 0.318759
Streaks: 0


Episode = 119
t = 62
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 59.179177
Explore rate: 0.318759
Learning rate: 0.318759
Streaks: 0


Episode = 119
t = 63
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 59.309297
Explore rate: 0.318759
Learning rate: 0.318759
Streaks: 0


Episode = 119
t = 64
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 59.309297
Explore rate: 0.318759
Learning rate: 0.318759
Streaks: 0


Episode = 119
t = 65
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 60.637044
Explore rate: 0.318759
Le


Episode = 119
t = 116
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 61.642032
Explore rate: 0.318759
Learning rate: 0.318759
Streaks: 0


Episode = 119
t = 117
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 60.555147
Explore rate: 0.318759
Learning rate: 0.318759
Streaks: 0


Episode = 119
t = 118
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 60.555147
Explore rate: 0.318759
Learning rate: 0.318759
Streaks: 0


Episode = 119
t = 119
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 60.680881
Explore rate: 0.318759
Learning rate: 0.318759
Streaks: 0


Episode = 119
t = 120
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 60.680881
Explore rate: 0.318759
Learning rate: 0.318759
Streaks: 0


Episode = 119
t = 121
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 60.806214
Explore rate: 0.318759
Learning rate: 0.318759
Streaks: 0


Episode = 119
t = 122
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 61.239107
Explore rate: 0.318759
Learnin


Episode = 120
t = 6
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 57.228765
Explore rate: 0.315155
Learning rate: 0.315155
Streaks: 0


Episode = 120
t = 7
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 57.228765
Explore rate: 0.315155
Learning rate: 0.315155
Streaks: 0


Episode = 120
t = 8
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 57.363560
Explore rate: 0.315155
Learning rate: 0.315155
Streaks: 0


Episode = 120
t = 9
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 57.363560
Explore rate: 0.315155
Learning rate: 0.315155
Streaks: 0


Episode = 120
t = 10
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 57.497931
Explore rate: 0.315155
Learning rate: 0.315155
Streaks: 0


Episode = 120
t = 11
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 57.631878
Explore rate: 0.315155
Learning rate: 0.315155
Streaks: 0


Episode = 120
t = 12
Action: 0
State: (0, 0, 1, 0)
Reward: 1.000000
Best Q: 40.836114
Explore rate: 0.315155
Learning rate: 0.3


Episode = 122
t = 26
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 62.173953
Explore rate: 0.308035
Learning rate: 0.308035
Streaks: 0


Episode = 122
t = 27
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 62.447895
Explore rate: 0.308035
Learning rate: 0.308035
Streaks: 0


Episode = 122
t = 28
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 62.374010
Explore rate: 0.308035
Learning rate: 0.308035
Streaks: 0


Episode = 122
t = 29
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 62.541037
Explore rate: 0.308035
Learning rate: 0.308035
Streaks: 0


Episode = 122
t = 30
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 62.540847
Explore rate: 0.308035
Learning rate: 0.308035
Streaks: 0


Episode = 122
t = 31
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 62.656366
Explore rate: 0.308035
Learning rate: 0.308035
Streaks: 0


Episode = 122
t = 32
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 62.691462
Explore rate: 0.308035
Learning rate:

Episode = 124
t = 5
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 60.348970
Explore rate: 0.301030
Learning rate: 0.301030
Streaks: 0


Episode = 124
t = 6
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 61.378491
Explore rate: 0.301030
Learning rate: 0.301030
Streaks: 0


Episode = 124
t = 7
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 58.507999
Explore rate: 0.301030
Learning rate: 0.301030
Streaks: 0


Episode = 124
t = 8
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 58.507999
Explore rate: 0.301030
Learning rate: 0.301030
Streaks: 0


Episode = 124
t = 9
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 58.632903
Explore rate: 0.301030
Learning rate: 0.301030
Streaks: 0


Episode = 124
t = 10
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 58.757430
Explore rate: 0.301030
Learning rate: 0.301030
Streaks: 0


Episode = 124
t = 11
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 58.881583
Explore rate: 0.301030
Learning rate: 0.301


Episode = 125
t = 55
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 61.845730
Explore rate: 0.297569
Learning rate: 0.297569
Streaks: 0


Episode = 125
t = 56
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 60.375201
Explore rate: 0.297569
Learning rate: 0.297569
Streaks: 0


Episode = 125
t = 57
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 60.375201
Explore rate: 0.297569
Learning rate: 0.297569
Streaks: 0


Episode = 125
t = 58
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 60.493112
Explore rate: 0.297569
Learning rate: 0.297569
Streaks: 0


Episode = 125
t = 59
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 62.133470
Explore rate: 0.297569
Learning rate: 0.297569
Streaks: 0


Episode = 125
t = 60
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 61.176490
Explore rate: 0.297569
Learning rate: 0.297569
Streaks: 0


Episode = 125
t = 61
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 61.964229
Explore rate: 0.297569
Learning rate:


Episode = 126
t = 10
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 59.280087
Explore rate: 0.294136
Learning rate: 0.294136
Streaks: 0


Episode = 126
t = 11
Action: 0
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 28.810566
Explore rate: 0.294136
Learning rate: 0.294136
Streaks: 0


Episode = 126
t = 12
Action: 1
State: (0, 0, 4, 2)
Reward: 1.000000
Best Q: 28.810566
Explore rate: 0.294136
Learning rate: 0.294136
Streaks: 0

Episode 126 finished after 12.000000 time steps

Episode = 127
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 63.741256
Explore rate: 0.294136
Learning rate: 0.294136
Streaks: 0


Episode = 127
t = 1
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 63.847906
Explore rate: 0.290730
Learning rate: 0.290730
Streaks: 0


Episode = 127
t = 2
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 63.847906
Explore rate: 0.290730
Learning rate: 0.290730
Streaks: 0


Episode = 127
t = 3
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 63.


Episode = 127
t = 58
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 63.706560
Explore rate: 0.290730
Learning rate: 0.290730
Streaks: 0


Episode = 127
t = 59
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 63.812076
Explore rate: 0.290730
Learning rate: 0.290730
Streaks: 0


Episode = 127
t = 60
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 58.725516
Explore rate: 0.290730
Learning rate: 0.290730
Streaks: 0


Episode = 127
t = 61
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 63.768647
Explore rate: 0.290730
Learning rate: 0.290730
Streaks: 0


Episode = 127
t = 62
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 63.768647
Explore rate: 0.290730
Learning rate: 0.290730
Streaks: 0


Episode = 127
t = 63
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 63.873982
Explore rate: 0.290730
Learning rate: 0.290730
Streaks: 0


Episode = 127
t = 64
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 63.757529
Explore rate: 0.290730
Learning rate:


Episode = 129
t = 15
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 63.816854
Explore rate: 0.283997
Learning rate: 0.283997
Streaks: 0


Episode = 129
t = 16
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 64.319700
Explore rate: 0.283997
Learning rate: 0.283997
Streaks: 0


Episode = 129
t = 17
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 64.060991
Explore rate: 0.283997
Learning rate: 0.283997
Streaks: 0


Episode = 129
t = 18
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 63.090044
Explore rate: 0.283997
Learning rate: 0.283997
Streaks: 0


Episode = 129
t = 19
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 63.090044
Explore rate: 0.283997
Learning rate: 0.283997
Streaks: 0


Episode = 129
t = 20
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 63.194867
Explore rate: 0.283997
Learning rate: 0.283997
Streaks: 0


Episode = 129
t = 21
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 63.370750
Explore rate: 0.283997
Learning rate:


Episode = 129
t = 74
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 63.830741
Explore rate: 0.283997
Learning rate: 0.283997
Streaks: 0


Episode = 129
t = 75
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 63.830741
Explore rate: 0.283997
Learning rate: 0.283997
Streaks: 0


Episode = 129
t = 76
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 63.830741
Explore rate: 0.283997
Learning rate: 0.283997
Streaks: 0


Episode = 129
t = 77
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 63.933461
Explore rate: 0.283997
Learning rate: 0.283997
Streaks: 0


Episode = 129
t = 78
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 63.928203
Explore rate: 0.283997
Learning rate: 0.283997
Streaks: 0


Episode = 129
t = 79
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 64.107749
Explore rate: 0.283997
Learning rate: 0.283997
Streaks: 0


Episode = 129
t = 80
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 64.081126
Explore rate: 0.283997
Learning rate:


Episode = 129
t = 132
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 65.512590
Explore rate: 0.283997
Learning rate: 0.283997
Streaks: 0


Episode = 129
t = 133
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 65.339250
Explore rate: 0.283997
Learning rate: 0.283997
Streaks: 0


Episode = 129
t = 134
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 65.631925
Explore rate: 0.283997
Learning rate: 0.283997
Streaks: 0


Episode = 129
t = 135
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 65.519973
Explore rate: 0.283997
Learning rate: 0.283997
Streaks: 0


Episode = 129
t = 136
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 64.583423
Explore rate: 0.283997
Learning rate: 0.283997
Streaks: 0


Episode = 129
t = 137
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 63.500249
Explore rate: 0.283997
Learning rate: 0.283997
Streaks: 0


Episode = 129
t = 138
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 64.379463
Explore rate: 0.283997
Learnin

Episode = 129
t = 189
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 66.383445
Explore rate: 0.283997
Learning rate: 0.283997
Streaks: 0


Episode = 129
t = 190
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 64.088993
Explore rate: 0.283997
Learning rate: 0.283997
Streaks: 0


Episode = 129
t = 191
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 65.673831
Explore rate: 0.283997
Learning rate: 0.283997
Streaks: 0


Episode = 129
t = 192
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 65.673831
Explore rate: 0.283997
Learning rate: 0.283997
Streaks: 0


Episode = 129
t = 193
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 65.771316
Explore rate: 0.283997
Learning rate: 0.283997
Streaks: 0


Episode = 129
t = 194
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 65.477371
Explore rate: 0.283997
Learning rate: 0.283997
Streaks: 0


Episode = 129
t = 195
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 65.477371
Explore rate: 0.283997
Learning


Episode = 130
t = 49
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 66.810551
Explore rate: 0.280669
Learning rate: 0.280669
Streaks: 1


Episode = 130
t = 50
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 66.810551
Explore rate: 0.280669
Learning rate: 0.280669
Streaks: 1


Episode = 130
t = 51
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 65.696584
Explore rate: 0.280669
Learning rate: 0.280669
Streaks: 1


Episode = 130
t = 52
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 66.947946
Explore rate: 0.280669
Learning rate: 0.280669
Streaks: 1


Episode = 130
t = 53
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 66.947946
Explore rate: 0.280669
Learning rate: 0.280669
Streaks: 1


Episode = 130
t = 54
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 67.040712
Explore rate: 0.280669
Learning rate: 0.280669
Streaks: 1


Episode = 130
t = 55
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 67.040712
Explore rate: 0.280669
Learning rate:


Episode = 130
t = 108
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 67.835315
Explore rate: 0.280669
Learning rate: 0.280669
Streaks: 1


Episode = 130
t = 109
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 67.450041
Explore rate: 0.280669
Learning rate: 0.280669
Streaks: 1


Episode = 130
t = 110
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 67.504362
Explore rate: 0.280669
Learning rate: 0.280669
Streaks: 1


Episode = 130
t = 111
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 67.504362
Explore rate: 0.280669
Learning rate: 0.280669
Streaks: 1


Episode = 130
t = 112
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 67.595568
Explore rate: 0.280669
Learning rate: 0.280669
Streaks: 1


Episode = 130
t = 113
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 67.529644
Explore rate: 0.280669
Learning rate: 0.280669
Streaks: 1


Episode = 130
t = 114
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 67.733622
Explore rate: 0.280669
Learnin

Explore rate: 0.280669
Learning rate: 0.280669
Streaks: 1


Episode = 130
t = 169
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 68.566826
Explore rate: 0.280669
Learning rate: 0.280669
Streaks: 1


Episode = 130
t = 170
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 68.566826
Explore rate: 0.280669
Learning rate: 0.280669
Streaks: 1


Episode = 130
t = 171
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 68.655049
Explore rate: 0.280669
Learning rate: 0.280669
Streaks: 1


Episode = 130
t = 172
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 67.872620
Explore rate: 0.280669
Learning rate: 0.280669
Streaks: 1


Episode = 130
t = 173
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 68.291028
Explore rate: 0.280669
Learning rate: 0.280669
Streaks: 1


Episode = 130
t = 174
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 68.291028
Explore rate: 0.280669
Learning rate: 0.280669
Streaks: 1


Episode = 130
t = 175
Action: 0
State: (0, 0, 2, 1)
Reward


Episode = 131
t = 26
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 69.351863
Explore rate: 0.277366
Learning rate: 0.277366
Streaks: 2


Episode = 131
t = 27
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 69.010059
Explore rate: 0.277366
Learning rate: 0.277366
Streaks: 2


Episode = 131
t = 28
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 69.033066
Explore rate: 0.277366
Learning rate: 0.277366
Streaks: 2


Episode = 131
t = 29
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 69.033066
Explore rate: 0.277366
Learning rate: 0.277366
Streaks: 2


Episode = 131
t = 30
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 69.118958
Explore rate: 0.277366
Learning rate: 0.277366
Streaks: 2


Episode = 131
t = 31
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 69.204611
Explore rate: 0.277366
Learning rate: 0.277366
Streaks: 2


Episode = 131
t = 32
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 69.404443
Explore rate: 0.277366
Learning rate:

Reward: 1.000000
Best Q: 69.833762
Explore rate: 0.277366
Learning rate: 0.277366
Streaks: 2


Episode = 131
t = 86
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 69.917432
Explore rate: 0.277366
Learning rate: 0.277366
Streaks: 2


Episode = 131
t = 87
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 69.496941
Explore rate: 0.277366
Learning rate: 0.277366
Streaks: 2


Episode = 131
t = 88
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 70.033211
Explore rate: 0.277366
Learning rate: 0.277366
Streaks: 2


Episode = 131
t = 89
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 70.033211
Explore rate: 0.277366
Learning rate: 0.277366
Streaks: 2


Episode = 131
t = 90
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 70.116328
Explore rate: 0.277366
Learning rate: 0.277366
Streaks: 2


Episode = 131
t = 91
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 69.955826
Explore rate: 0.277366
Learning rate: 0.277366
Streaks: 2


Episode = 131
t = 92
Action: 


Episode = 131
t = 142
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 70.413349
Explore rate: 0.277366
Learning rate: 0.277366
Streaks: 2


Episode = 131
t = 143
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 70.632867
Explore rate: 0.277366
Learning rate: 0.277366
Streaks: 2


Episode = 131
t = 144
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 70.632867
Explore rate: 0.277366
Learning rate: 0.277366
Streaks: 2


Episode = 131
t = 145
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 69.574796
Explore rate: 0.277366
Learning rate: 0.277366
Streaks: 2


Episode = 131
t = 146
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 70.803139
Explore rate: 0.277366
Learning rate: 0.277366
Streaks: 2


Episode = 131
t = 147
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 69.996479
Explore rate: 0.277366
Learning rate: 0.277366
Streaks: 2


Episode = 131
t = 148
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 70.803139
Explore rate: 0.277366
Learnin


Episode = 132
t = 2
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 71.244513
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 3


Episode = 132
t = 3
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 71.244513
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 3


Episode = 132
t = 4
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 71.323328
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 3


Episode = 132
t = 5
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 71.143451
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 3


Episode = 132
t = 6
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 71.195447
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 3


Episode = 132
t = 7
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 71.195447
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 3


Episode = 132
t = 8
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 71.274397
Explore rate: 0.274088
Learning rate: 0.2740


Episode = 132
t = 62
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 72.094202
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 3


Episode = 132
t = 63
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 72.094202
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 3


Episode = 132
t = 64
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 72.170689
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 3


Episode = 132
t = 65
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 72.001512
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 3


Episode = 132
t = 66
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 72.256430
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 3


Episode = 132
t = 67
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 72.256430
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 3


Episode = 132
t = 68
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 72.147424
Explore rate: 0.274088
Learning rate:

Learning rate: 0.274088
Streaks: 3


Episode = 132
t = 120
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 72.746648
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 3


Episode = 132
t = 121
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 72.650662
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 3


Episode = 132
t = 122
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 72.795300
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 3


Episode = 132
t = 123
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 71.692425
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 3


Episode = 132
t = 124
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 72.584052
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 3


Episode = 132
t = 125
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 72.584052
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 3


Episode = 132
t = 126
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 72.6


Episode = 132
t = 178
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 72.998520
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 3


Episode = 132
t = 179
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 73.119888
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 3


Episode = 132
t = 180
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 73.105461
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 3


Episode = 132
t = 181
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 73.283415
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 3


Episode = 132
t = 182
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 72.650007
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 3


Episode = 132
t = 183
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 73.251804
Explore rate: 0.274088
Learning rate: 0.274088
Streaks: 3


Episode = 132
t = 184
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 73.251804
Explore rate: 0.274088
Learnin


Episode = 133
t = 38
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 73.749118
Explore rate: 0.270835
Learning rate: 0.270835
Streaks: 4


Episode = 133
t = 39
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 73.320434
Explore rate: 0.270835
Learning rate: 0.270835
Streaks: 4


Episode = 133
t = 40
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 74.040400
Explore rate: 0.270835
Learning rate: 0.270835
Streaks: 4


Episode = 133
t = 41
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 74.040400
Explore rate: 0.270835
Learning rate: 0.270835
Streaks: 4


Episode = 133
t = 42
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 74.110707
Explore rate: 0.270835
Learning rate: 0.270835
Streaks: 4


Episode = 133
t = 43
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 74.180825
Explore rate: 0.270835
Learning rate: 0.270835
Streaks: 4


Episode = 133
t = 44
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 73.986094
Explore rate: 0.270835
Learning rate:


Episode = 133
t = 97
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 74.543525
Explore rate: 0.270835
Learning rate: 0.270835
Streaks: 4


Episode = 133
t = 98
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 74.556072
Explore rate: 0.270835
Learning rate: 0.270835
Streaks: 4


Episode = 133
t = 99
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 74.437312
Explore rate: 0.270835
Learning rate: 0.270835
Streaks: 4


Episode = 133
t = 100
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 73.961610
Explore rate: 0.270835
Learning rate: 0.270835
Streaks: 4


Episode = 133
t = 101
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 74.437312
Explore rate: 0.270835
Learning rate: 0.270835
Streaks: 4


Episode = 133
t = 102
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 74.437312
Explore rate: 0.270835
Learning rate: 0.270835
Streaks: 4


Episode = 133
t = 103
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 74.506545
Explore rate: 0.270835
Learning r


Episode = 133
t = 158
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 75.386215
Explore rate: 0.270835
Learning rate: 0.270835
Streaks: 4


Episode = 133
t = 159
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 75.452878
Explore rate: 0.270835
Learning rate: 0.270835
Streaks: 4


Episode = 133
t = 160
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 75.519361
Explore rate: 0.270835
Learning rate: 0.270835
Streaks: 4


Episode = 133
t = 161
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 75.519361
Explore rate: 0.270835
Learning rate: 0.270835
Streaks: 4


Episode = 133
t = 162
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 75.519361
Explore rate: 0.270835
Learning rate: 0.270835
Streaks: 4


Episode = 133
t = 163
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 74.483508
Explore rate: 0.270835
Learning rate: 0.270835
Streaks: 4


Episode = 133
t = 164
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 75.585663
Explore rate: 0.270835
Learnin


Episode = 134
t = 18
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 75.351110
Explore rate: 0.267606
Learning rate: 0.267606
Streaks: 5


Episode = 134
t = 19
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 75.351110
Explore rate: 0.267606
Learning rate: 0.267606
Streaks: 5


Episode = 134
t = 20
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 75.690514
Explore rate: 0.267606
Learning rate: 0.267606
Streaks: 5


Episode = 134
t = 21
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 75.690514
Explore rate: 0.267606
Learning rate: 0.267606
Streaks: 5


Episode = 134
t = 22
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 75.755568
Explore rate: 0.267606
Learning rate: 0.267606
Streaks: 5


Episode = 134
t = 23
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 75.684329
Explore rate: 0.267606
Learning rate: 0.267606
Streaks: 5


Episode = 134
t = 24
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 75.849092
Explore rate: 0.267606
Learning rate:


Episode = 134
t = 76
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 76.199816
Explore rate: 0.267606
Learning rate: 0.267606
Streaks: 5


Episode = 134
t = 77
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 76.000454
Explore rate: 0.267606
Learning rate: 0.267606
Streaks: 5


Episode = 134
t = 78
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 76.000454
Explore rate: 0.267606
Learning rate: 0.267606
Streaks: 5


Episode = 134
t = 79
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 76.199816
Explore rate: 0.267606
Learning rate: 0.267606
Streaks: 5


Episode = 134
t = 80
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 76.199816
Explore rate: 0.267606
Learning rate: 0.267606
Streaks: 5


Episode = 134
t = 81
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 76.263507
Explore rate: 0.267606
Learning rate: 0.267606
Streaks: 5


Episode = 134
t = 82
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 76.271652
Explore rate: 0.267606
Learning rate:


Episode = 134
t = 132
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 76.767273
Explore rate: 0.267606
Learning rate: 0.267606
Streaks: 5


Episode = 134
t = 133
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 76.767273
Explore rate: 0.267606
Learning rate: 0.267606
Streaks: 5


Episode = 134
t = 134
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 76.829445
Explore rate: 0.267606
Learning rate: 0.267606
Streaks: 5


Episode = 134
t = 135
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 76.751063
Explore rate: 0.267606
Learning rate: 0.267606
Streaks: 5


Episode = 134
t = 136
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 76.916098
Explore rate: 0.267606
Learning rate: 0.267606
Streaks: 5


Episode = 134
t = 137
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 76.857001
Explore rate: 0.267606
Learning rate: 0.267606
Streaks: 5


Episode = 134
t = 138
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 76.766810
Explore rate: 0.267606
Learnin


Episode = 134
t = 191
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 77.532542
Explore rate: 0.267606
Learning rate: 0.267606
Streaks: 5


Episode = 134
t = 192
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 77.592667
Explore rate: 0.267606
Learning rate: 0.267606
Streaks: 5


Episode = 134
t = 193
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 77.356944
Explore rate: 0.267606
Learning rate: 0.267606
Streaks: 5


Episode = 134
t = 194
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 77.634097
Explore rate: 0.267606
Learning rate: 0.267606
Streaks: 5


Episode = 134
t = 195
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 77.490964
Explore rate: 0.267606
Learning rate: 0.267606
Streaks: 5


Episode = 134
t = 196
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 77.490964
Explore rate: 0.267606
Learning rate: 0.267606
Streaks: 5


Episode = 134
t = 197
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 76.133989
Explore rate: 0.267606
Learnin


Episode = 135
t = 55
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 77.975660
Explore rate: 0.264401
Learning rate: 0.264401
Streaks: 6


Episode = 135
t = 56
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 77.908675
Explore rate: 0.264401
Learning rate: 0.264401
Streaks: 6


Episode = 135
t = 57
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 77.953950
Explore rate: 0.264401
Learning rate: 0.264401
Streaks: 6


Episode = 135
t = 58
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 77.953950
Explore rate: 0.264401
Learning rate: 0.264401
Streaks: 6


Episode = 135
t = 59
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 78.012240
Explore rate: 0.264401
Learning rate: 0.264401
Streaks: 6


Episode = 135
t = 60
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 77.859281
Explore rate: 0.264401
Learning rate: 0.264401
Streaks: 6


Episode = 135
t = 61
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 78.073102
Explore rate: 0.264401
Learning rate:

Learning rate: 0.264401
Streaks: 6


Episode = 135
t = 112
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 78.611080
Explore rate: 0.264401
Learning rate: 0.264401
Streaks: 6


Episode = 135
t = 113
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 78.342421
Explore rate: 0.264401
Learning rate: 0.264401
Streaks: 6


Episode = 135
t = 114
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 78.322443
Explore rate: 0.264401
Learning rate: 0.264401
Streaks: 6


Episode = 135
t = 115
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 78.322443
Explore rate: 0.264401
Learning rate: 0.264401
Streaks: 6


Episode = 135
t = 116
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 78.379759
Explore rate: 0.264401
Learning rate: 0.264401
Streaks: 6


Episode = 135
t = 117
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 78.314785
Explore rate: 0.264401
Learning rate: 0.264401
Streaks: 6


Episode = 135
t = 118
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 78.6


Episode = 135
t = 170
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 79.080745
Explore rate: 0.264401
Learning rate: 0.264401
Streaks: 6


Episode = 135
t = 171
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 79.080745
Explore rate: 0.264401
Learning rate: 0.264401
Streaks: 6


Episode = 135
t = 172
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 77.867618
Explore rate: 0.264401
Learning rate: 0.264401
Streaks: 6


Episode = 135
t = 173
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 78.842597
Explore rate: 0.264401
Learning rate: 0.264401
Streaks: 6


Episode = 135
t = 174
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 78.181344
Explore rate: 0.264401
Learning rate: 0.264401
Streaks: 6


Episode = 135
t = 175
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 78.842597
Explore rate: 0.264401
Learning rate: 0.264401
Streaks: 6


Episode = 135
t = 176
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 78.842597
Explore rate: 0.264401
Learnin


Episode = 136
t = 27
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 79.217856
Explore rate: 0.261219
Learning rate: 0.261219
Streaks: 7


Episode = 136
t = 28
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 79.217856
Explore rate: 0.261219
Learning rate: 0.261219
Streaks: 7


Episode = 136
t = 29
Action: 0
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 79.217856
Explore rate: 0.261219
Learning rate: 0.261219
Streaks: 7


Episode = 136
t = 30
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 79.387868
Explore rate: 0.261219
Learning rate: 0.261219
Streaks: 7


Episode = 136
t = 31
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 79.356216
Explore rate: 0.261219
Learning rate: 0.261219
Streaks: 7


Episode = 136
t = 32
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 79.356216
Explore rate: 0.261219
Learning rate: 0.261219
Streaks: 7


Episode = 136
t = 33
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 79.356216
Explore rate: 0.261219
Learning rate:



Episode = 136
t = 84
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 79.684764
Explore rate: 0.261219
Learning rate: 0.261219
Streaks: 7


Episode = 136
t = 85
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 79.684764
Explore rate: 0.261219
Learning rate: 0.261219
Streaks: 7


Episode = 136
t = 86
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 79.737831
Explore rate: 0.261219
Learning rate: 0.261219
Streaks: 7


Episode = 136
t = 87
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 79.312192
Explore rate: 0.261219
Learning rate: 0.261219
Streaks: 7


Episode = 136
t = 88
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 79.899656
Explore rate: 0.261219
Learning rate: 0.261219
Streaks: 7


Episode = 136
t = 89
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 79.899656
Explore rate: 0.261219
Learning rate: 0.261219
Streaks: 7


Episode = 136
t = 90
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 79.952162
Explore rate: 0.261219
Learning rate

Best Q: 80.136729
Explore rate: 0.261219
Learning rate: 0.261219
Streaks: 7


Episode = 136
t = 150
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 79.930011
Explore rate: 0.261219
Learning rate: 0.261219
Streaks: 7


Episode = 136
t = 151
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 79.930011
Explore rate: 0.261219
Learning rate: 0.261219
Streaks: 7


Episode = 136
t = 152
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 79.930011
Explore rate: 0.261219
Learning rate: 0.261219
Streaks: 7


Episode = 136
t = 153
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 79.930011
Explore rate: 0.261219
Learning rate: 0.261219
Streaks: 7


Episode = 136
t = 154
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 79.982437
Explore rate: 0.261219
Learning rate: 0.261219
Streaks: 7


Episode = 136
t = 155
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 80.034727
Explore rate: 0.261219
Learning rate: 0.261219
Streaks: 7


Episode = 136
t = 156
Action: 0
State: (

Streaks: 8


Episode = 137
t = 6
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 80.514765
Explore rate: 0.258061
Learning rate: 0.258061
Streaks: 8


Episode = 137
t = 7
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 80.514765
Explore rate: 0.258061
Learning rate: 0.258061
Streaks: 8


Episode = 137
t = 8
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 80.565048
Explore rate: 0.258061
Learning rate: 0.258061
Streaks: 8


Episode = 137
t = 9
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 80.470547
Explore rate: 0.258061
Learning rate: 0.258061
Streaks: 8


Episode = 137
t = 10
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 80.861255
Explore rate: 0.258061
Learning rate: 0.258061
Streaks: 8


Episode = 137
t = 11
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 80.861255
Explore rate: 0.258061
Learning rate: 0.258061
Streaks: 8


Episode = 137
t = 12
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 80.910644
Explore rate: 0.258061
Learni


Episode = 137
t = 72
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 81.185110
Explore rate: 0.258061
Learning rate: 0.258061
Streaks: 8


Episode = 137
t = 73
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 81.233664
Explore rate: 0.258061
Learning rate: 0.258061
Streaks: 8


Episode = 137
t = 74
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 81.233664
Explore rate: 0.258061
Learning rate: 0.258061
Streaks: 8


Episode = 137
t = 75
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 81.282093
Explore rate: 0.258061
Learning rate: 0.258061
Streaks: 8


Episode = 137
t = 76
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 81.131836
Explore rate: 0.258061
Learning rate: 0.258061
Streaks: 8


Episode = 137
t = 77
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 81.215780
Explore rate: 0.258061
Learning rate: 0.258061
Streaks: 8


Episode = 137
t = 78
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 81.215780
Explore rate: 0.258061
Learning rate:

Explore rate: 0.258061
Learning rate: 0.258061
Streaks: 8


Episode = 137
t = 137
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 81.614478
Explore rate: 0.258061
Learning rate: 0.258061
Streaks: 8


Episode = 137
t = 138
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 81.614478
Explore rate: 0.258061
Learning rate: 0.258061
Streaks: 8


Episode = 137
t = 139
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 81.661924
Explore rate: 0.258061
Learning rate: 0.258061
Streaks: 8


Episode = 137
t = 140
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 81.619194
Explore rate: 0.258061
Learning rate: 0.258061
Streaks: 8


Episode = 137
t = 141
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 81.733442
Explore rate: 0.258061
Learning rate: 0.258061
Streaks: 8


Episode = 137
t = 142
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 81.354663
Explore rate: 0.258061
Learning rate: 0.258061
Streaks: 8


Episode = 137
t = 143
Action: 1
State: (0, 0, 3, 1)
Reward


Episode = 137
t = 193
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 82.050071
Explore rate: 0.258061
Learning rate: 0.258061
Streaks: 8


Episode = 137
t = 194
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 82.050071
Explore rate: 0.258061
Learning rate: 0.258061
Streaks: 8


Episode = 137
t = 195
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 82.050071
Explore rate: 0.258061
Learning rate: 0.258061
Streaks: 8


Episode = 137
t = 196
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 82.050071
Explore rate: 0.258061
Learning rate: 0.258061
Streaks: 8


Episode = 137
t = 197
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 82.096393
Explore rate: 0.258061
Learning rate: 0.258061
Streaks: 8


Episode = 137
t = 198
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 81.972189
Explore rate: 0.258061
Learning rate: 0.258061
Streaks: 8


Episode = 137
t = 199
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 82.214869
Explore rate: 0.258061
Learnin

Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 9


Episode = 138
t = 50
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 81.685151
Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 9


Episode = 138
t = 51
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 81.685151
Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 9


Episode = 138
t = 52
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 81.731840
Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 9


Episode = 138
t = 53
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 82.669234
Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 9


Episode = 138
t = 54
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 82.669234
Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 9


Episode = 138
t = 55
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 82.713415
Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 9


Episode = 138
t = 56
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000

t = 116
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 82.733991
Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 9


Episode = 138
t = 117
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 82.982581
Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 9


Episode = 138
t = 118
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 82.982581
Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 9


Episode = 138
t = 119
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 83.025963
Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 9


Episode = 138
t = 120
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 82.788973
Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 9


Episode = 138
t = 121
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 82.796268
Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 9


Episode = 138
t = 122
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 82.796268
Explore rate: 0.254925
Learning rate: 0.25492


Episode = 138
t = 175
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 83.366873
Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 9


Episode = 138
t = 176
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 83.199660
Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 9


Episode = 138
t = 177
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 83.199660
Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 9


Episode = 138
t = 178
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 83.242488
Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 9


Episode = 138
t = 179
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 83.095395
Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 9


Episode = 138
t = 180
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 82.615829
Explore rate: 0.254925
Learning rate: 0.254925
Streaks: 9


Episode = 138
t = 181
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 82.615829
Explore rate: 0.254925
Learnin


Episode = 139
t = 40
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 83.478614
Explore rate: 0.251812
Learning rate: 0.251812
Streaks: 10


Episode = 139
t = 41
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 83.478614
Explore rate: 0.251812
Learning rate: 0.251812
Streaks: 10


Episode = 139
t = 42
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 83.520217
Explore rate: 0.251812
Learning rate: 0.251812
Streaks: 10


Episode = 139
t = 43
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 83.388955
Explore rate: 0.251812
Learning rate: 0.251812
Streaks: 10


Episode = 139
t = 44
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 83.394552
Explore rate: 0.251812
Learning rate: 0.251812
Streaks: 10


Episode = 139
t = 45
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 83.394552
Explore rate: 0.251812
Learning rate: 0.251812
Streaks: 10


Episode = 139
t = 46
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 83.725003
Explore rate: 0.251812
Learning


Episode = 139
t = 98
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 83.720996
Explore rate: 0.251812
Learning rate: 0.251812
Streaks: 10


Episode = 139
t = 99
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 84.044996
Explore rate: 0.251812
Learning rate: 0.251812
Streaks: 10


Episode = 139
t = 100
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 84.044996
Explore rate: 0.251812
Learning rate: 0.251812
Streaks: 10


Episode = 139
t = 101
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 84.085173
Explore rate: 0.251812
Learning rate: 0.251812
Streaks: 10


Episode = 139
t = 102
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 84.003700
Explore rate: 0.251812
Learning rate: 0.251812
Streaks: 10


Episode = 139
t = 103
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 83.846357
Explore rate: 0.251812
Learning rate: 0.251812
Streaks: 10


Episode = 139
t = 104
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 83.846357
Explore rate: 0.251812
Lea


Episode = 139
t = 154
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 84.417633
Explore rate: 0.251812
Learning rate: 0.251812
Streaks: 10


Episode = 139
t = 155
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 84.417633
Explore rate: 0.251812
Learning rate: 0.251812
Streaks: 10


Episode = 139
t = 156
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 84.456871
Explore rate: 0.251812
Learning rate: 0.251812
Streaks: 10


Episode = 139
t = 157
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 84.496011
Explore rate: 0.251812
Learning rate: 0.251812
Streaks: 10


Episode = 139
t = 158
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 84.496011
Explore rate: 0.251812
Learning rate: 0.251812
Streaks: 10


Episode = 139
t = 159
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 84.215232
Explore rate: 0.251812
Learning rate: 0.251812
Streaks: 10


Episode = 139
t = 160
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 84.494265
Explore rate: 0.251812
L


Episode = 140
t = 11
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 84.538743
Explore rate: 0.248721
Learning rate: 0.248721
Streaks: 11


Episode = 140
t = 12
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 84.577198
Explore rate: 0.248721
Learning rate: 0.248721
Streaks: 11


Episode = 140
t = 13
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 84.560129
Explore rate: 0.248721
Learning rate: 0.248721
Streaks: 11


Episode = 140
t = 14
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 84.704252
Explore rate: 0.248721
Learning rate: 0.248721
Streaks: 11


Episode = 140
t = 15
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 84.704252
Explore rate: 0.248721
Learning rate: 0.248721
Streaks: 11


Episode = 140
t = 16
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 84.742296
Explore rate: 0.248721
Learning rate: 0.248721
Streaks: 11


Episode = 140
t = 17
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 84.763810
Explore rate: 0.248721
Learning


Episode = 140
t = 68
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 85.106650
Explore rate: 0.248721
Learning rate: 0.248721
Streaks: 11


Episode = 140
t = 69
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 85.082133
Explore rate: 0.248721
Learning rate: 0.248721
Streaks: 11


Episode = 140
t = 70
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 85.083601
Explore rate: 0.248721
Learning rate: 0.248721
Streaks: 11


Episode = 140
t = 71
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 85.083601
Explore rate: 0.248721
Learning rate: 0.248721
Streaks: 11


Episode = 140
t = 72
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 85.120701
Explore rate: 0.248721
Learning rate: 0.248721
Streaks: 11


Episode = 140
t = 73
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 84.993470
Explore rate: 0.248721
Learning rate: 0.248721
Streaks: 11


Episode = 140
t = 74
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 85.137656
Explore rate: 0.248721
Learning


Episode = 140
t = 127
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 85.359203
Explore rate: 0.248721
Learning rate: 0.248721
Streaks: 11


Episode = 140
t = 128
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 85.276891
Explore rate: 0.248721
Learning rate: 0.248721
Streaks: 11


Episode = 140
t = 129
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 85.094683
Explore rate: 0.248721
Learning rate: 0.248721
Streaks: 11


Episode = 140
t = 130
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 85.276891
Explore rate: 0.248721
Learning rate: 0.248721
Streaks: 11


Episode = 140
t = 131
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 85.276891
Explore rate: 0.248721
Learning rate: 0.248721
Streaks: 11


Episode = 140
t = 132
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 85.313511
Explore rate: 0.248721
Learning rate: 0.248721
Streaks: 11


Episode = 140
t = 133
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 85.283397
Explore rate: 0.248721
L


Episode = 140
t = 186
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 85.791457
Explore rate: 0.248721
Learning rate: 0.248721
Streaks: 11


Episode = 140
t = 187
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 85.641493
Explore rate: 0.248721
Learning rate: 0.248721
Streaks: 11


Episode = 140
t = 188
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 85.816420
Explore rate: 0.248721
Learning rate: 0.248721
Streaks: 11


Episode = 140
t = 189
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 85.720278
Explore rate: 0.248721
Learning rate: 0.248721
Streaks: 11


Episode = 140
t = 190
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 85.828024
Explore rate: 0.248721
Learning rate: 0.248721
Streaks: 11


Episode = 140
t = 191
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 85.782326
Explore rate: 0.248721
Learning rate: 0.248721
Streaks: 11


Episode = 140
t = 192
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 85.654251
Explore rate: 0.248721
L


Episode = 141
t = 42
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 86.222158
Explore rate: 0.245652
Learning rate: 0.245652
Streaks: 12


Episode = 141
t = 43
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 86.071386
Explore rate: 0.245652
Learning rate: 0.245652
Streaks: 12


Episode = 141
t = 44
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 85.884271
Explore rate: 0.245652
Learning rate: 0.245652
Streaks: 12


Episode = 141
t = 45
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 85.884271
Explore rate: 0.245652
Learning rate: 0.245652
Streaks: 12


Episode = 141
t = 46
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 85.918946
Explore rate: 0.245652
Learning rate: 0.245652
Streaks: 12


Episode = 141
t = 47
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 85.950907
Explore rate: 0.245652
Learning rate: 0.245652
Streaks: 12


Episode = 141
t = 48
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 86.219337
Explore rate: 0.245652
Learning


Episode = 141
t = 109
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 86.328530
Explore rate: 0.245652
Learning rate: 0.245652
Streaks: 12


Episode = 141
t = 110
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 86.362114
Explore rate: 0.245652
Learning rate: 0.245652
Streaks: 12


Episode = 141
t = 111
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 86.395615
Explore rate: 0.245652
Learning rate: 0.245652
Streaks: 12


Episode = 141
t = 112
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 86.483000
Explore rate: 0.245652
Learning rate: 0.245652
Streaks: 12


Episode = 141
t = 113
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 86.483000
Explore rate: 0.245652
Learning rate: 0.245652
Streaks: 12


Episode = 141
t = 114
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 86.516205
Explore rate: 0.245652
Learning rate: 0.245652
Streaks: 12


Episode = 141
t = 115
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 86.343647
Explore rate: 0.245652
L


Episode = 141
t = 168
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 86.689156
Explore rate: 0.245652
Learning rate: 0.245652
Streaks: 12


Episode = 141
t = 169
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 86.689156
Explore rate: 0.245652
Learning rate: 0.245652
Streaks: 12


Episode = 141
t = 170
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 86.721855
Explore rate: 0.245652
Learning rate: 0.245652
Streaks: 12


Episode = 141
t = 171
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 86.743698
Explore rate: 0.245652
Learning rate: 0.245652
Streaks: 12


Episode = 141
t = 172
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 86.743698
Explore rate: 0.245652
Learning rate: 0.245652
Streaks: 12


Episode = 141
t = 173
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 86.702609
Explore rate: 0.245652
Learning rate: 0.245652
Streaks: 12


Episode = 141
t = 174
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 86.754473
Explore rate: 0.245652
L


Episode = 142
t = 24
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 86.907482
Explore rate: 0.242604
Learning rate: 0.242604
Streaks: 13


Episode = 142
t = 25
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 86.939199
Explore rate: 0.242604
Learning rate: 0.242604
Streaks: 13


Episode = 142
t = 26
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 86.946862
Explore rate: 0.242604
Learning rate: 0.242604
Streaks: 13


Episode = 142
t = 27
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 86.972726
Explore rate: 0.242604
Learning rate: 0.242604
Streaks: 13


Episode = 142
t = 28
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 86.984742
Explore rate: 0.242604
Learning rate: 0.242604
Streaks: 13


Episode = 142
t = 29
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 87.007216
Explore rate: 0.242604
Learning rate: 0.242604
Streaks: 13


Episode = 142
t = 30
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 87.021715
Explore rate: 0.242604
Learning


Episode = 142
t = 81
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 86.783529
Explore rate: 0.242604
Learning rate: 0.242604
Streaks: 13


Episode = 142
t = 82
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 87.185906
Explore rate: 0.242604
Learning rate: 0.242604
Streaks: 13


Episode = 142
t = 83
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 87.185906
Explore rate: 0.242604
Learning rate: 0.242604
Streaks: 13


Episode = 142
t = 84
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 87.216994
Explore rate: 0.242604
Learning rate: 0.242604
Streaks: 13


Episode = 142
t = 85
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 87.248006
Explore rate: 0.242604
Learning rate: 0.242604
Streaks: 13


Episode = 142
t = 86
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 87.203986
Explore rate: 0.242604
Learning rate: 0.242604
Streaks: 13


Episode = 142
t = 87
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 87.291801
Explore rate: 0.242604
Learning


Episode = 142
t = 147
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 87.586957
Explore rate: 0.242604
Learning rate: 0.242604
Streaks: 13


Episode = 142
t = 148
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 87.586957
Explore rate: 0.242604
Learning rate: 0.242604
Streaks: 13


Episode = 142
t = 149
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 87.617071
Explore rate: 0.242604
Learning rate: 0.242604
Streaks: 13


Episode = 142
t = 150
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 87.495918
Explore rate: 0.242604
Learning rate: 0.242604
Streaks: 13


Episode = 142
t = 151
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 87.495918
Explore rate: 0.242604
Learning rate: 0.242604
Streaks: 13


Episode = 142
t = 152
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 87.294688
Explore rate: 0.242604
Learning rate: 0.242604
Streaks: 13


Episode = 142
t = 153
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 87.450556
Explore rate: 0.242604
L

t = 3
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 87.764999
Explore rate: 0.239578
Learning rate: 0.239578
Streaks: 14


Episode = 143
t = 4
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 87.810144
Explore rate: 0.239578
Learning rate: 0.239578
Streaks: 14


Episode = 143
t = 5
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 87.805019
Explore rate: 0.239578
Learning rate: 0.239578
Streaks: 14


Episode = 143
t = 6
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 87.877457
Explore rate: 0.239578
Learning rate: 0.239578
Streaks: 14


Episode = 143
t = 7
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 87.877457
Explore rate: 0.239578
Learning rate: 0.239578
Streaks: 14


Episode = 143
t = 8
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 87.906500
Explore rate: 0.239578
Learning rate: 0.239578
Streaks: 14


Episode = 143
t = 9
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 87.363895
Explore rate: 0.239578
Learning rate: 0.239578
Streak


Episode = 143
t = 63
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 88.189622
Explore rate: 0.239578
Learning rate: 0.239578
Streaks: 14


Episode = 143
t = 64
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 88.100074
Explore rate: 0.239578
Learning rate: 0.239578
Streaks: 14


Episode = 143
t = 65
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 88.033515
Explore rate: 0.239578
Learning rate: 0.239578
Streaks: 14


Episode = 143
t = 66
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 88.033515
Explore rate: 0.239578
Learning rate: 0.239578
Streaks: 14


Episode = 143
t = 67
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 88.062184
Explore rate: 0.239578
Learning rate: 0.239578
Streaks: 14


Episode = 143
t = 68
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 88.090785
Explore rate: 0.239578
Learning rate: 0.239578
Streaks: 14


Episode = 143
t = 69
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 88.022715
Explore rate: 0.239578
Learning

Explore rate: 0.239578
Learning rate: 0.239578
Streaks: 14


Episode = 143
t = 122
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 88.352661
Explore rate: 0.239578
Learning rate: 0.239578
Streaks: 14


Episode = 143
t = 123
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 88.352661
Explore rate: 0.239578
Learning rate: 0.239578
Streaks: 14


Episode = 143
t = 124
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 88.003469
Explore rate: 0.239578
Learning rate: 0.239578
Streaks: 14


Episode = 143
t = 125
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 88.369936
Explore rate: 0.239578
Learning rate: 0.239578
Streaks: 14


Episode = 143
t = 126
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 88.369936
Explore rate: 0.239578
Learning rate: 0.239578
Streaks: 14


Episode = 143
t = 127
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 88.397799
Explore rate: 0.239578
Learning rate: 0.239578
Streaks: 14


Episode = 143
t = 128
Action: 0
State: (0, 0, 2, 1)


Episode = 143
t = 180
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 88.537363
Explore rate: 0.239578
Learning rate: 0.239578
Streaks: 14


Episode = 143
t = 181
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 88.585891
Explore rate: 0.239578
Learning rate: 0.239578
Streaks: 14


Episode = 143
t = 182
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 88.597218
Explore rate: 0.239578
Learning rate: 0.239578
Streaks: 14


Episode = 143
t = 183
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 88.615923
Explore rate: 0.239578
Learning rate: 0.239578
Streaks: 14


Episode = 143
t = 184
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 88.628973
Explore rate: 0.239578
Learning rate: 0.239578
Streaks: 14


Episode = 143
t = 185
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 88.646292
Explore rate: 0.239578
Learning rate: 0.239578
Streaks: 14


Episode = 143
t = 186
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 88.660323
Explore rate: 0.239578
L


Episode = 144
t = 40
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 88.829476
Explore rate: 0.236572
Learning rate: 0.236572
Streaks: 15


Episode = 144
t = 41
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 88.829476
Explore rate: 0.236572
Learning rate: 0.236572
Streaks: 15


Episode = 144
t = 42
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 88.855903
Explore rate: 0.236572
Learning rate: 0.236572
Streaks: 15


Episode = 144
t = 43
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 88.855903
Explore rate: 0.236572
Learning rate: 0.236572
Streaks: 15


Episode = 144
t = 44
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 88.882266
Explore rate: 0.236572
Learning rate: 0.236572
Streaks: 15


Episode = 144
t = 45
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 88.650695
Explore rate: 0.236572
Learning rate: 0.236572
Streaks: 15


Episode = 144
t = 46
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 88.882266
Explore rate: 0.236572
Learning


Episode = 144
t = 99
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 88.922208
Explore rate: 0.236572
Learning rate: 0.236572
Streaks: 15


Episode = 144
t = 100
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 88.866632
Explore rate: 0.236572
Learning rate: 0.236572
Streaks: 15


Episode = 144
t = 101
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 89.075781
Explore rate: 0.236572
Learning rate: 0.236572
Streaks: 15


Episode = 144
t = 102
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 89.075781
Explore rate: 0.236572
Learning rate: 0.236572
Streaks: 15


Episode = 144
t = 103
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 89.101624
Explore rate: 0.236572
Learning rate: 0.236572
Streaks: 15


Episode = 144
t = 104
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 89.043768
Explore rate: 0.236572
Learning rate: 0.236572
Streaks: 15


Episode = 144
t = 105
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 89.133540
Explore rate: 0.236572
Le


Episode = 144
t = 159
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 88.985318
Explore rate: 0.236572
Learning rate: 0.236572
Streaks: 15


Episode = 144
t = 160
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 89.246445
Explore rate: 0.236572
Learning rate: 0.236572
Streaks: 15


Episode = 144
t = 161
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 89.246445
Explore rate: 0.236572
Learning rate: 0.236572
Streaks: 15


Episode = 144
t = 162
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 89.271885
Explore rate: 0.236572
Learning rate: 0.236572
Streaks: 15


Episode = 144
t = 163
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 89.108472
Explore rate: 0.236572
Learning rate: 0.236572
Streaks: 15


Episode = 144
t = 164
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 89.278368
Explore rate: 0.236572
Learning rate: 0.236572
Streaks: 15


Episode = 144
t = 165
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 89.278368
Explore rate: 0.236572
L

Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 89.434109
Explore rate: 0.233587
Learning rate: 0.233587
Streaks: 16


Episode = 145
t = 18
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 89.458790
Explore rate: 0.233587
Learning rate: 0.233587
Streaks: 16


Episode = 145
t = 19
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 89.435240
Explore rate: 0.233587
Learning rate: 0.233587
Streaks: 16


Episode = 145
t = 20
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 89.496838
Explore rate: 0.233587
Learning rate: 0.233587
Streaks: 16


Episode = 145
t = 21
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 89.546437
Explore rate: 0.233587
Learning rate: 0.233587
Streaks: 16


Episode = 145
t = 22
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 89.496838
Explore rate: 0.233587
Learning rate: 0.233587
Streaks: 16


Episode = 145
t = 23
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 89.496838
Explore rate: 0.233587
Learning rate: 0.233587
Streak


Episode = 145
t = 78
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 89.542581
Explore rate: 0.233587
Learning rate: 0.233587
Streaks: 16


Episode = 145
t = 79
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 89.670657
Explore rate: 0.233587
Learning rate: 0.233587
Streaks: 16


Episode = 145
t = 80
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 89.670657
Explore rate: 0.233587
Learning rate: 0.233587
Streaks: 16


Episode = 145
t = 81
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 89.694785
Explore rate: 0.233587
Learning rate: 0.233587
Streaks: 16


Episode = 145
t = 82
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 89.718857
Explore rate: 0.233587
Learning rate: 0.233587
Streaks: 16


Episode = 145
t = 83
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 89.718857
Explore rate: 0.233587
Learning rate: 0.233587
Streaks: 16


Episode = 145
t = 84
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 89.699382
Explore rate: 0.233587
Learning


Episode = 145
t = 137
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 89.986438
Explore rate: 0.233587
Learning rate: 0.233587
Streaks: 16


Episode = 145
t = 138
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 90.009829
Explore rate: 0.233587
Learning rate: 0.233587
Streaks: 16


Episode = 145
t = 139
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 89.944544
Explore rate: 0.233587
Learning rate: 0.233587
Streaks: 16


Episode = 145
t = 140
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 89.926756
Explore rate: 0.233587
Learning rate: 0.233587
Streaks: 16


Episode = 145
t = 141
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 89.604769
Explore rate: 0.233587
Learning rate: 0.233587
Streaks: 16


Episode = 145
t = 142
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 89.926756
Explore rate: 0.233587
Learning rate: 0.233587
Streaks: 16


Episode = 145
t = 143
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 89.926756
Explore rate: 0.233587
L


Episode = 145
t = 195
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 90.040977
Explore rate: 0.233587
Learning rate: 0.233587
Streaks: 16


Episode = 145
t = 196
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 90.235655
Explore rate: 0.233587
Learning rate: 0.233587
Streaks: 16


Episode = 145
t = 197
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 90.235655
Explore rate: 0.233587
Learning rate: 0.233587
Streaks: 16


Episode = 145
t = 198
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 90.258463
Explore rate: 0.233587
Learning rate: 0.233587
Streaks: 16


Episode = 145
t = 199
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 90.187056
Explore rate: 0.233587
Learning rate: 0.233587
Streaks: 16

Episode 145 finished after 199.000000 time steps

Episode = 146
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 90.130779
Explore rate: 0.233587
Learning rate: 0.233587
Streaks: 17


Episode = 146
t = 1
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0000


Episode = 146
t = 54
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 90.447364
Explore rate: 0.230623
Learning rate: 0.230623
Streaks: 17


Episode = 146
t = 55
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 90.447364
Explore rate: 0.230623
Learning rate: 0.230623
Streaks: 17


Episode = 146
t = 56
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 90.469394
Explore rate: 0.230623
Learning rate: 0.230623
Streaks: 17


Episode = 146
t = 57
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 90.469394
Explore rate: 0.230623
Learning rate: 0.230623
Streaks: 17


Episode = 146
t = 58
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 90.424159
Explore rate: 0.230623
Learning rate: 0.230623
Streaks: 17


Episode = 146
t = 59
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 90.497957
Explore rate: 0.230623
Learning rate: 0.230623
Streaks: 17


Episode = 146
t = 60
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 90.048115
Explore rate: 0.230623
Learning


Episode = 146
t = 113
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 90.497282
Explore rate: 0.230623
Learning rate: 0.230623
Streaks: 17


Episode = 146
t = 114
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 90.497282
Explore rate: 0.230623
Learning rate: 0.230623
Streaks: 17


Episode = 146
t = 115
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 90.519197
Explore rate: 0.230623
Learning rate: 0.230623
Streaks: 17


Episode = 146
t = 116
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 90.445217
Explore rate: 0.230623
Learning rate: 0.230623
Streaks: 17


Episode = 146
t = 117
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 90.540993
Explore rate: 0.230623
Learning rate: 0.230623
Streaks: 17


Episode = 146
t = 118
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 90.489120
Explore rate: 0.230623
Learning rate: 0.230623
Streaks: 17


Episode = 146
t = 119
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 90.489120
Explore rate: 0.230623
L


Episode = 146
t = 173
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 90.812255
Explore rate: 0.230623
Learning rate: 0.230623
Streaks: 17


Episode = 146
t = 174
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 90.605530
Explore rate: 0.230623
Learning rate: 0.230623
Streaks: 17


Episode = 146
t = 175
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 90.750848
Explore rate: 0.230623
Learning rate: 0.230623
Streaks: 17


Episode = 146
t = 176
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 90.473583
Explore rate: 0.230623
Learning rate: 0.230623
Streaks: 17


Episode = 146
t = 177
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 90.750848
Explore rate: 0.230623
Learning rate: 0.230623
Streaks: 17


Episode = 146
t = 178
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 90.750848
Explore rate: 0.230623
Learning rate: 0.230623
Streaks: 17


Episode = 146
t = 179
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 90.772179
Explore rate: 0.230623
L


Episode = 147
t = 39
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 90.921410
Explore rate: 0.227678
Learning rate: 0.227678
Streaks: 18


Episode = 147
t = 40
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 90.982000
Explore rate: 0.227678
Learning rate: 0.227678
Streaks: 18


Episode = 147
t = 41
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 90.955737
Explore rate: 0.227678
Learning rate: 0.227678
Streaks: 18


Episode = 147
t = 42
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 90.996612
Explore rate: 0.227678
Learning rate: 0.227678
Streaks: 18


Episode = 147
t = 43
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 90.985542
Explore rate: 0.227678
Learning rate: 0.227678
Streaks: 18


Episode = 147
t = 44
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 91.014616
Explore rate: 0.227678
Learning rate: 0.227678
Streaks: 18


Episode = 147
t = 45
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 91.012619
Explore rate: 0.227678
Learning


Episode = 147
t = 99
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 91.088581
Explore rate: 0.227678
Learning rate: 0.227678
Streaks: 18


Episode = 147
t = 100
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 91.095990
Explore rate: 0.227678
Learning rate: 0.227678
Streaks: 18


Episode = 147
t = 101
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 91.110541
Explore rate: 0.227678
Learning rate: 0.227678
Streaks: 18


Episode = 147
t = 102
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 91.119542
Explore rate: 0.227678
Learning rate: 0.227678
Streaks: 18


Episode = 147
t = 103
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 91.175004
Explore rate: 0.227678
Learning rate: 0.227678
Streaks: 18


Episode = 147
t = 104
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 91.175004
Explore rate: 0.227678
Learning rate: 0.227678
Streaks: 18


Episode = 147
t = 105
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 91.195097
Explore rate: 0.227678
Le

Learning rate: 0.227678
Streaks: 18


Episode = 147
t = 158
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 91.274160
Explore rate: 0.227678
Learning rate: 0.227678
Streaks: 18


Episode = 147
t = 159
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 91.294027
Explore rate: 0.227678
Learning rate: 0.227678
Streaks: 18


Episode = 147
t = 160
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 91.273599
Explore rate: 0.227678
Learning rate: 0.227678
Streaks: 18


Episode = 147
t = 161
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 91.286814
Explore rate: 0.227678
Learning rate: 0.227678
Streaks: 18


Episode = 147
t = 162
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 91.286814
Explore rate: 0.227678
Learning rate: 0.227678
Streaks: 18


Episode = 147
t = 163
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 91.306652
Explore rate: 0.227678
Learning rate: 0.227678
Streaks: 18


Episode = 147
t = 164
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best 

Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 91.235635
Explore rate: 0.224754
Learning rate: 0.224754
Streaks: 19


Episode = 148
t = 16
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 91.376606
Explore rate: 0.224754
Learning rate: 0.224754
Streaks: 19


Episode = 148
t = 17
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 91.376606
Explore rate: 0.224754
Learning rate: 0.224754
Streaks: 19


Episode = 148
t = 18
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 91.395987
Explore rate: 0.224754
Learning rate: 0.224754
Streaks: 19


Episode = 148
t = 19
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 91.395987
Explore rate: 0.224754
Learning rate: 0.224754
Streaks: 19


Episode = 148
t = 20
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 91.415325
Explore rate: 0.224754
Learning rate: 0.224754
Streaks: 19


Episode = 148
t = 21
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 91.397620
Explore rate: 0.224754
Learning rate: 0.224754
Streak


Episode = 148
t = 74
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 91.690710
Explore rate: 0.224754
Learning rate: 0.224754
Streaks: 19


Episode = 148
t = 75
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 91.690710
Explore rate: 0.224754
Learning rate: 0.224754
Streaks: 19


Episode = 148
t = 76
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 91.709385
Explore rate: 0.224754
Learning rate: 0.224754
Streaks: 19


Episode = 148
t = 77
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 91.728019
Explore rate: 0.224754
Learning rate: 0.224754
Streaks: 19


Episode = 148
t = 78
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 91.648472
Explore rate: 0.224754
Learning rate: 0.224754
Streaks: 19


Episode = 148
t = 79
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 91.743324
Explore rate: 0.224754
Learning rate: 0.224754
Streaks: 19


Episode = 148
t = 80
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 91.688347
Explore rate: 0.224754
Learning

t = 131
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 91.889387
Explore rate: 0.224754
Learning rate: 0.224754
Streaks: 19


Episode = 148
t = 132
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 91.839111
Explore rate: 0.224754
Learning rate: 0.224754
Streaks: 19


Episode = 148
t = 133
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 91.896429
Explore rate: 0.224754
Learning rate: 0.224754
Streaks: 19


Episode = 148
t = 134
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 91.870206
Explore rate: 0.224754
Learning rate: 0.224754
Streaks: 19


Episode = 148
t = 135
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 91.908808
Explore rate: 0.224754
Learning rate: 0.224754
Streaks: 19


Episode = 148
t = 136
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 91.426814
Explore rate: 0.224754
Learning rate: 0.224754
Streaks: 19


Episode = 148
t = 137
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 91.761055
Explore rate: 0.224754
Learning rate: 0


Episode = 148
t = 189
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 91.862028
Explore rate: 0.224754
Learning rate: 0.224754
Streaks: 19


Episode = 148
t = 190
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 91.862028
Explore rate: 0.224754
Learning rate: 0.224754
Streaks: 19


Episode = 148
t = 191
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 92.027250
Explore rate: 0.224754
Learning rate: 0.224754
Streaks: 19


Episode = 148
t = 192
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 92.027250
Explore rate: 0.224754
Learning rate: 0.224754
Streaks: 19


Episode = 148
t = 193
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 92.045169
Explore rate: 0.224754
Learning rate: 0.224754
Streaks: 19


Episode = 148
t = 194
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 91.955261
Explore rate: 0.224754
Learning rate: 0.224754
Streaks: 19


Episode = 148
t = 195
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 92.056903
Explore rate: 0.224754
L


Episode = 149
t = 48
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 92.117436
Explore rate: 0.221849
Learning rate: 0.221849
Streaks: 20


Episode = 149
t = 49
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 91.937666
Explore rate: 0.221849
Learning rate: 0.221849
Streaks: 20


Episode = 149
t = 50
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 92.117436
Explore rate: 0.221849
Learning rate: 0.221849
Streaks: 20


Episode = 149
t = 51
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 92.117436
Explore rate: 0.221849
Learning rate: 0.221849
Streaks: 20


Episode = 149
t = 52
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 92.134924
Explore rate: 0.221849
Learning rate: 0.221849
Streaks: 20


Episode = 149
t = 53
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 92.152372
Explore rate: 0.221849
Learning rate: 0.221849
Streaks: 20


Episode = 149
t = 54
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 92.082505
Explore rate: 0.221849
Learning


Episode = 149
t = 108
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 92.242727
Explore rate: 0.221849
Learning rate: 0.221849
Streaks: 20


Episode = 149
t = 109
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 92.259936
Explore rate: 0.221849
Learning rate: 0.221849
Streaks: 20


Episode = 149
t = 110
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 92.259936
Explore rate: 0.221849
Learning rate: 0.221849
Streaks: 20


Episode = 149
t = 111
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 92.173388
Explore rate: 0.221849
Learning rate: 0.221849
Streaks: 20


Episode = 149
t = 112
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 92.271461
Explore rate: 0.221849
Learning rate: 0.221849
Streaks: 20


Episode = 149
t = 113
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 92.271461
Explore rate: 0.221849
Learning rate: 0.221849
Streaks: 20


Episode = 149
t = 114
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 92.288606
Explore rate: 0.221849
L


Episode = 149
t = 168
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 92.434611
Explore rate: 0.221849
Learning rate: 0.221849
Streaks: 20


Episode = 149
t = 169
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 92.434611
Explore rate: 0.221849
Learning rate: 0.221849
Streaks: 20


Episode = 149
t = 170
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 92.408143
Explore rate: 0.221849
Learning rate: 0.221849
Streaks: 20


Episode = 149
t = 171
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 92.344415
Explore rate: 0.221849
Learning rate: 0.221849
Streaks: 20


Episode = 149
t = 172
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 92.344415
Explore rate: 0.221849
Learning rate: 0.221849
Streaks: 20


Episode = 149
t = 173
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 92.361399
Explore rate: 0.221849
Learning rate: 0.221849
Streaks: 20


Episode = 149
t = 174
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 92.294072
Explore rate: 0.221849
L


Episode = 150
t = 28
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 92.344037
Explore rate: 0.218963
Learning rate: 0.218963
Streaks: 21


Episode = 150
t = 29
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 92.344037
Explore rate: 0.218963
Learning rate: 0.218963
Streaks: 21


Episode = 150
t = 30
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 92.360801
Explore rate: 0.218963
Learning rate: 0.218963
Streaks: 21


Episode = 150
t = 31
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 92.597814
Explore rate: 0.218963
Learning rate: 0.218963
Streaks: 21


Episode = 150
t = 32
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 92.597814
Explore rate: 0.218963
Learning rate: 0.218963
Streaks: 21


Episode = 150
t = 33
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 92.614022
Explore rate: 0.218963
Learning rate: 0.218963
Streaks: 21


Episode = 150
t = 34
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 92.532042
Explore rate: 0.218963
Learning


Episode = 150
t = 87
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 92.577876
Explore rate: 0.218963
Learning rate: 0.218963
Streaks: 21


Episode = 150
t = 88
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 92.635468
Explore rate: 0.218963
Learning rate: 0.218963
Streaks: 21


Episode = 150
t = 89
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 92.635468
Explore rate: 0.218963
Learning rate: 0.218963
Streaks: 21


Episode = 150
t = 90
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 92.606612
Explore rate: 0.218963
Learning rate: 0.218963
Streaks: 21


Episode = 150
t = 91
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 92.681830
Explore rate: 0.218963
Learning rate: 0.218963
Streaks: 21


Episode = 150
t = 92
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 92.681830
Explore rate: 0.218963
Learning rate: 0.218963
Streaks: 21


Episode = 150
t = 93
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 92.697854
Explore rate: 0.218963
Learning


Episode = 150
t = 147
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 92.863888
Explore rate: 0.218963
Learning rate: 0.218963
Streaks: 21


Episode = 150
t = 148
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 92.836720
Explore rate: 0.218963
Learning rate: 0.218963
Streaks: 21


Episode = 150
t = 149
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 92.782203
Explore rate: 0.218963
Learning rate: 0.218963
Streaks: 21


Episode = 150
t = 150
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 92.782203
Explore rate: 0.218963
Learning rate: 0.218963
Streaks: 21


Episode = 150
t = 151
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 92.798008
Explore rate: 0.218963
Learning rate: 0.218963
Streaks: 21


Episode = 150
t = 152
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 92.749200
Explore rate: 0.218963
Learning rate: 0.218963
Streaks: 21


Episode = 150
t = 153
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 92.873624
Explore rate: 0.218963
L


Episode = 151
t = 6
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 92.946635
Explore rate: 0.216096
Learning rate: 0.216096
Streaks: 22


Episode = 151
t = 7
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 92.896867
Explore rate: 0.216096
Learning rate: 0.216096
Streaks: 22


Episode = 151
t = 8
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 93.068494
Explore rate: 0.216096
Learning rate: 0.216096
Streaks: 22


Episode = 151
t = 9
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 93.068494
Explore rate: 0.216096
Learning rate: 0.216096
Streaks: 22


Episode = 151
t = 10
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 93.083472
Explore rate: 0.216096
Learning rate: 0.216096
Streaks: 22


Episode = 151
t = 11
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 92.554998
Explore rate: 0.216096
Learning rate: 0.216096
Streaks: 22


Episode = 151
t = 12
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 92.951230
Explore rate: 0.216096
Learning rat


Episode = 151
t = 63
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 93.031075
Explore rate: 0.216096
Learning rate: 0.216096
Streaks: 22


Episode = 151
t = 64
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 93.023129
Explore rate: 0.216096
Learning rate: 0.216096
Streaks: 22


Episode = 151
t = 65
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 93.056240
Explore rate: 0.216096
Learning rate: 0.216096
Streaks: 22


Episode = 151
t = 66
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 93.045290
Explore rate: 0.216096
Learning rate: 0.216096
Streaks: 22


Episode = 151
t = 67
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 93.068902
Explore rate: 0.216096
Learning rate: 0.216096
Streaks: 22


Episode = 151
t = 68
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 93.065370
Explore rate: 0.216096
Learning rate: 0.216096
Streaks: 22


Episode = 151
t = 69
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 93.083125
Explore rate: 0.216096
Learning


Episode = 151
t = 122
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 93.178885
Explore rate: 0.216096
Learning rate: 0.216096
Streaks: 22


Episode = 151
t = 123
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 93.155257
Explore rate: 0.216096
Learning rate: 0.216096
Streaks: 22


Episode = 151
t = 124
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 93.211487
Explore rate: 0.216096
Learning rate: 0.216096
Streaks: 22


Episode = 151
t = 125
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 93.211487
Explore rate: 0.216096
Learning rate: 0.216096
Streaks: 22


Episode = 151
t = 126
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 93.226156
Explore rate: 0.216096
Learning rate: 0.216096
Streaks: 22


Episode = 151
t = 127
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 92.971733
Explore rate: 0.216096
Learning rate: 0.216096
Streaks: 22


Episode = 151
t = 128
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 93.188570
Explore rate: 0.216096
L


Episode = 151
t = 178
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 93.342961
Explore rate: 0.216096
Learning rate: 0.216096
Streaks: 22


Episode = 151
t = 179
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 93.357346
Explore rate: 0.216096
Learning rate: 0.216096
Streaks: 22


Episode = 151
t = 180
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 93.288725
Explore rate: 0.216096
Learning rate: 0.216096
Streaks: 22


Episode = 151
t = 181
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 93.291095
Explore rate: 0.216096
Learning rate: 0.216096
Streaks: 22


Episode = 151
t = 182
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 93.291095
Explore rate: 0.216096
Learning rate: 0.216096
Streaks: 22


Episode = 151
t = 183
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 93.305593
Explore rate: 0.216096
Learning rate: 0.216096
Streaks: 22


Episode = 151
t = 184
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 93.295345
Explore rate: 0.216096
L


Episode = 152
t = 38
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 93.425193
Explore rate: 0.213249
Learning rate: 0.213249
Streaks: 23


Episode = 152
t = 39
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 93.407551
Explore rate: 0.213249
Learning rate: 0.213249
Streaks: 23


Episode = 152
t = 40
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 93.413130
Explore rate: 0.213249
Learning rate: 0.213249
Streaks: 23


Episode = 152
t = 41
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 93.413130
Explore rate: 0.213249
Learning rate: 0.213249
Streaks: 23


Episode = 152
t = 42
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 93.427176
Explore rate: 0.213249
Learning rate: 0.213249
Streaks: 23


Episode = 152
t = 43
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 93.409347
Explore rate: 0.213249
Learning rate: 0.213249
Streaks: 23


Episode = 152
t = 44
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 93.435489
Explore rate: 0.213249
Learning


Episode = 152
t = 106
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 93.529126
Explore rate: 0.213249
Learning rate: 0.213249
Streaks: 23


Episode = 152
t = 107
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 93.592556
Explore rate: 0.213249
Learning rate: 0.213249
Streaks: 23


Episode = 152
t = 108
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 93.592556
Explore rate: 0.213249
Learning rate: 0.213249
Streaks: 23


Episode = 152
t = 109
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 93.606220
Explore rate: 0.213249
Learning rate: 0.213249
Streaks: 23


Episode = 152
t = 110
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 93.518661
Explore rate: 0.213249
Learning rate: 0.213249
Streaks: 23


Episode = 152
t = 111
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 93.570268
Explore rate: 0.213249
Learning rate: 0.213249
Streaks: 23


Episode = 152
t = 112
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 93.570268
Explore rate: 0.213249
L

Explore rate: 0.213249
Learning rate: 0.213249
Streaks: 23


Episode = 152
t = 165
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 93.703288
Explore rate: 0.213249
Learning rate: 0.213249
Streaks: 23


Episode = 152
t = 166
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 93.627960
Explore rate: 0.213249
Learning rate: 0.213249
Streaks: 23


Episode = 152
t = 167
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 93.711377
Explore rate: 0.213249
Learning rate: 0.213249
Streaks: 23


Episode = 152
t = 168
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 93.659159
Explore rate: 0.213249
Learning rate: 0.213249
Streaks: 23


Episode = 152
t = 169
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 93.708037
Explore rate: 0.213249
Learning rate: 0.213249
Streaks: 23


Episode = 152
t = 170
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 93.708037
Explore rate: 0.213249
Learning rate: 0.213249
Streaks: 23


Episode = 152
t = 171
Action: 0
State: (0, 0, 3, 1)


Episode = 153
t = 24
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 93.822976
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 24


Episode = 153
t = 25
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 93.785201
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 24


Episode = 153
t = 26
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 93.792502
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 24


Episode = 153
t = 27
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 93.792502
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 24


Episode = 153
t = 28
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 93.805564
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 24


Episode = 153
t = 29
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 93.818598
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 24


Episode = 153
t = 30
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 93.818598
Explore rate: 0.210419
Learning


Episode = 153
t = 80
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 93.906238
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 24


Episode = 153
t = 81
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 93.832953
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 24


Episode = 153
t = 82
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 93.925171
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 24


Episode = 153
t = 83
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 93.925171
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 24


Episode = 153
t = 84
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 93.937954
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 24


Episode = 153
t = 85
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 93.950710
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 24


Episode = 153
t = 86
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 93.942013
Explore rate: 0.210419
Learning


Episode = 153
t = 137
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 94.016851
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 24


Episode = 153
t = 138
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 93.974095
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 24


Episode = 153
t = 139
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 94.030475
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 24


Episode = 153
t = 140
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 93.863764
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 24


Episode = 153
t = 141
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 94.066138
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 24


Episode = 153
t = 142
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 94.066138
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 24


Episode = 153
t = 143
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 94.078624
Explore rate: 0.210419
L

Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 24


Episode = 153
t = 196
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 94.081175
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 24


Episode = 153
t = 197
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 94.081175
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 24


Episode = 153
t = 198
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 94.093630
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 24


Episode = 153
t = 199
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 94.093630
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 24

Episode 153 finished after 199.000000 time steps

Episode = 154
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 94.196216
Explore rate: 0.210419
Learning rate: 0.210419
Streaks: 25


Episode = 154
t = 1
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 94.165518
Explore rate: 0.207608
Learning rate: 0.207608
Streaks: 25


Episod


Episode = 154
t = 55
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 94.243385
Explore rate: 0.207608
Learning rate: 0.207608
Streaks: 25


Episode = 154
t = 56
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 94.255336
Explore rate: 0.207608
Learning rate: 0.207608
Streaks: 25


Episode = 154
t = 57
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 94.267263
Explore rate: 0.207608
Learning rate: 0.207608
Streaks: 25


Episode = 154
t = 58
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 94.208553
Explore rate: 0.207608
Learning rate: 0.207608
Streaks: 25


Episode = 154
t = 59
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 94.276528
Explore rate: 0.207608
Learning rate: 0.207608
Streaks: 25


Episode = 154
t = 60
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 94.040087
Explore rate: 0.207608
Learning rate: 0.207608
Streaks: 25


Episode = 154
t = 61
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 94.205310
Explore rate: 0.207608
Learning


Episode = 154
t = 114
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 94.349182
Explore rate: 0.207608
Learning rate: 0.207608
Streaks: 25


Episode = 154
t = 115
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 94.112833
Explore rate: 0.207608
Learning rate: 0.207608
Streaks: 25


Episode = 154
t = 116
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 94.337116
Explore rate: 0.207608
Learning rate: 0.207608
Streaks: 25


Episode = 154
t = 117
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 94.171152
Explore rate: 0.207608
Learning rate: 0.207608
Streaks: 25


Episode = 154
t = 118
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 94.171152
Explore rate: 0.207608
Learning rate: 0.207608
Streaks: 25


Episode = 154
t = 119
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 94.337116
Explore rate: 0.207608
Learning rate: 0.207608
Streaks: 25


Episode = 154
t = 120
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 94.337116
Explore rate: 0.207608
L


Episode = 154
t = 171
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 94.205743
Explore rate: 0.207608
Learning rate: 0.207608
Streaks: 25


Episode = 154
t = 172
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 94.205743
Explore rate: 0.207608
Learning rate: 0.207608
Streaks: 25


Episode = 154
t = 173
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 94.419969
Explore rate: 0.207608
Learning rate: 0.207608
Streaks: 25


Episode = 154
t = 174
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 94.271334
Explore rate: 0.207608
Learning rate: 0.207608
Streaks: 25


Episode = 154
t = 175
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 94.419969
Explore rate: 0.207608
Learning rate: 0.207608
Streaks: 25


Episode = 154
t = 176
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 94.419969
Explore rate: 0.207608
Learning rate: 0.207608
Streaks: 25


Episode = 154
t = 177
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 94.431553
Explore rate: 0.207608
L


Episode = 155
t = 27
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 94.400178
Explore rate: 0.204815
Learning rate: 0.204815
Streaks: 26


Episode = 155
t = 28
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 94.547860
Explore rate: 0.204815
Learning rate: 0.204815
Streaks: 26


Episode = 155
t = 29
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 94.547860
Explore rate: 0.204815
Learning rate: 0.204815
Streaks: 26


Episode = 155
t = 30
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 94.559027
Explore rate: 0.204815
Learning rate: 0.204815
Streaks: 26


Episode = 155
t = 31
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 94.488186
Explore rate: 0.204815
Learning rate: 0.204815
Streaks: 26


Episode = 155
t = 32
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 94.564668
Explore rate: 0.204815
Learning rate: 0.204815
Streaks: 26


Episode = 155
t = 33
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 94.400178
Explore rate: 0.204815
Learning


Episode = 155
t = 86
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 94.629344
Explore rate: 0.204815
Learning rate: 0.204815
Streaks: 26


Episode = 155
t = 87
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 94.629344
Explore rate: 0.204815
Learning rate: 0.204815
Streaks: 26


Episode = 155
t = 88
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 94.640344
Explore rate: 0.204815
Learning rate: 0.204815
Streaks: 26


Episode = 155
t = 89
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 94.596219
Explore rate: 0.204815
Learning rate: 0.204815
Streaks: 26


Episode = 155
t = 90
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 94.651104
Explore rate: 0.204815
Learning rate: 0.204815
Streaks: 26


Episode = 155
t = 91
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 94.618416
Explore rate: 0.204815
Learning rate: 0.204815
Streaks: 26


Episode = 155
t = 92
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 94.655431
Explore rate: 0.204815
Learning

Streaks: 26


Episode = 155
t = 144
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 94.654145
Explore rate: 0.204815
Learning rate: 0.204815
Streaks: 26


Episode = 155
t = 145
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 94.729034
Explore rate: 0.204815
Learning rate: 0.204815
Streaks: 26


Episode = 155
t = 146
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 94.680279
Explore rate: 0.204815
Learning rate: 0.204815
Streaks: 26


Episode = 155
t = 147
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 94.729944
Explore rate: 0.204815
Learning rate: 0.204815
Streaks: 26


Episode = 155
t = 148
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 94.729944
Explore rate: 0.204815
Learning rate: 0.204815
Streaks: 26


Episode = 155
t = 149
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 94.729944
Explore rate: 0.204815
Learning rate: 0.204815
Streaks: 26


Episode = 155
t = 150
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 94.740738
Explore rat


Episode = 156
t = 2
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 94.835504
Explore rate: 0.202040
Learning rate: 0.202040
Streaks: 27


Episode = 156
t = 3
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 94.769138
Explore rate: 0.202040
Learning rate: 0.202040
Streaks: 27


Episode = 156
t = 4
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 94.840990
Explore rate: 0.202040
Learning rate: 0.202040
Streaks: 27


Episode = 156
t = 5
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 94.840990
Explore rate: 0.202040
Learning rate: 0.202040
Streaks: 27


Episode = 156
t = 6
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 94.840990
Explore rate: 0.202040
Learning rate: 0.202040
Streaks: 27


Episode = 156
t = 7
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 94.667050
Explore rate: 0.202040
Learning rate: 0.202040
Streaks: 27


Episode = 156
t = 8
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 94.813689
Explore rate: 0.202040
Learning rate: 

Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 94.906103
Explore rate: 0.202040
Learning rate: 0.202040
Streaks: 27


Episode = 156
t = 60
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 94.930404
Explore rate: 0.202040
Learning rate: 0.202040
Streaks: 27


Episode = 156
t = 61
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 94.773497
Explore rate: 0.202040
Learning rate: 0.202040
Streaks: 27


Episode = 156
t = 62
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 94.773497
Explore rate: 0.202040
Learning rate: 0.202040
Streaks: 27


Episode = 156
t = 63
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 94.773497
Explore rate: 0.202040
Learning rate: 0.202040
Streaks: 27


Episode = 156
t = 64
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 94.902529
Explore rate: 0.202040
Learning rate: 0.202040
Streaks: 27


Episode = 156
t = 65
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 94.902529
Explore rate: 0.202040
Learning rate: 0.202040
Streak


Episode = 156
t = 121
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 95.006718
Explore rate: 0.202040
Learning rate: 0.202040
Streaks: 27


Episode = 156
t = 122
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 95.006718
Explore rate: 0.202040
Learning rate: 0.202040
Streaks: 27


Episode = 156
t = 123
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 95.016807
Explore rate: 0.202040
Learning rate: 0.202040
Streaks: 27


Episode = 156
t = 124
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 94.974292
Explore rate: 0.202040
Learning rate: 0.202040
Streaks: 27


Episode = 156
t = 125
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 95.026405
Explore rate: 0.202040
Learning rate: 0.202040
Streaks: 27


Episode = 156
t = 126
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 94.994870
Explore rate: 0.202040
Learning rate: 0.202040
Streaks: 27


Episode = 156
t = 127
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 95.030146
Explore rate: 0.202040
L


Episode = 156
t = 182
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 95.080320
Explore rate: 0.202040
Learning rate: 0.202040
Streaks: 27


Episode = 156
t = 183
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 95.080320
Explore rate: 0.202040
Learning rate: 0.202040
Streaks: 27


Episode = 156
t = 184
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 95.090260
Explore rate: 0.202040
Learning rate: 0.202040
Streaks: 27


Episode = 156
t = 185
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 95.012880
Explore rate: 0.202040
Learning rate: 0.202040
Streaks: 27


Episode = 156
t = 186
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 95.104792
Explore rate: 0.202040
Learning rate: 0.202040
Streaks: 27


Episode = 156
t = 187
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 95.104792
Explore rate: 0.202040
Learning rate: 0.202040
Streaks: 27


Episode = 156
t = 188
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 95.114682
Explore rate: 0.202040
L

Explore rate: 0.199283
Learning rate: 0.199283
Streaks: 28


Episode = 157
t = 44
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 95.186015
Explore rate: 0.199283
Learning rate: 0.199283
Streaks: 28


Episode = 157
t = 45
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 95.186015
Explore rate: 0.199283
Learning rate: 0.199283
Streaks: 28


Episode = 157
t = 46
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 95.195608
Explore rate: 0.199283
Learning rate: 0.199283
Streaks: 28


Episode = 157
t = 47
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 95.205182
Explore rate: 0.199283
Learning rate: 0.199283
Streaks: 28


Episode = 157
t = 48
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 95.184591
Explore rate: 0.199283
Learning rate: 0.199283
Streaks: 28


Episode = 157
t = 49
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 95.218326
Explore rate: 0.199283
Learning rate: 0.199283
Streaks: 28


Episode = 157
t = 50
Action: 1
State: (0, 0, 3, 0)
Reward

Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 95.277683
Explore rate: 0.199283
Learning rate: 0.199283
Streaks: 28


Episode = 157
t = 103
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 95.277683
Explore rate: 0.199283
Learning rate: 0.199283
Streaks: 28


Episode = 157
t = 104
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 95.287094
Explore rate: 0.199283
Learning rate: 0.199283
Streaks: 28


Episode = 157
t = 105
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 95.260711
Explore rate: 0.199283
Learning rate: 0.199283
Streaks: 28


Episode = 157
t = 106
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 95.298801
Explore rate: 0.199283
Learning rate: 0.199283
Streaks: 28


Episode = 157
t = 107
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 95.277670
Explore rate: 0.199283
Learning rate: 0.199283
Streaks: 28


Episode = 157
t = 108
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 95.304001
Explore rate: 0.199283
Learning rate: 0.199283



Episode = 157
t = 161
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 95.409665
Explore rate: 0.199283
Learning rate: 0.199283
Streaks: 28


Episode = 157
t = 162
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 95.200648
Explore rate: 0.199283
Learning rate: 0.199283
Streaks: 28


Episode = 157
t = 163
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 95.339836
Explore rate: 0.199283
Learning rate: 0.199283
Streaks: 28


Episode = 157
t = 164
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 95.339836
Explore rate: 0.199283
Learning rate: 0.199283
Streaks: 28


Episode = 157
t = 165
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 95.349122
Explore rate: 0.199283
Learning rate: 0.199283
Streaks: 28


Episode = 157
t = 166
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 95.336503
Explore rate: 0.199283
Learning rate: 0.199283
Streaks: 28


Episode = 157
t = 167
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 95.363323
Explore rate: 0.199283
L


Episode = 158
t = 20
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 95.458568
Explore rate: 0.196543
Learning rate: 0.196543
Streaks: 29


Episode = 158
t = 21
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 95.458568
Explore rate: 0.196543
Learning rate: 0.196543
Streaks: 29


Episode = 158
t = 22
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 95.467494
Explore rate: 0.196543
Learning rate: 0.196543
Streaks: 29


Episode = 158
t = 23
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 95.438726
Explore rate: 0.196543
Learning rate: 0.196543
Streaks: 29


Episode = 158
t = 24
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 95.477962
Explore rate: 0.196543
Learning rate: 0.196543
Streaks: 29


Episode = 158
t = 25
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 95.455326
Explore rate: 0.196543
Learning rate: 0.196543
Streaks: 29


Episode = 158
t = 26
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 95.455326
Explore rate: 0.196543
Learning


Episode = 158
t = 78
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 95.482629
Explore rate: 0.196543
Learning rate: 0.196543
Streaks: 29


Episode = 158
t = 79
Action: 1
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 95.482629
Explore rate: 0.196543
Learning rate: 0.196543
Streaks: 29


Episode = 158
t = 80
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 95.495627
Explore rate: 0.196543
Learning rate: 0.196543
Streaks: 29


Episode = 158
t = 81
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 95.495627
Explore rate: 0.196543
Learning rate: 0.196543
Streaks: 29


Episode = 158
t = 82
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 95.504480
Explore rate: 0.196543
Learning rate: 0.196543
Streaks: 29


Episode = 158
t = 83
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 95.492537
Explore rate: 0.196543
Learning rate: 0.196543
Streaks: 29


Episode = 158
t = 84
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 95.508422
Explore rate: 0.196543
Learning


Episode = 158
t = 136
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 95.613125
Explore rate: 0.196543
Learning rate: 0.196543
Streaks: 29


Episode = 158
t = 137
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 95.613125
Explore rate: 0.196543
Learning rate: 0.196543
Streaks: 29


Episode = 158
t = 138
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 95.621747
Explore rate: 0.196543
Learning rate: 0.196543
Streaks: 29


Episode = 158
t = 139
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 95.569181
Explore rate: 0.196543
Learning rate: 0.196543
Streaks: 29


Episode = 158
t = 140
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 95.599663
Explore rate: 0.196543
Learning rate: 0.196543
Streaks: 29


Episode = 158
t = 141
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 95.599663
Explore rate: 0.196543
Learning rate: 0.196543
Streaks: 29


Episode = 158
t = 142
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 95.608311
Explore rate: 0.196543
L


Episode = 158
t = 197
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 95.720587
Explore rate: 0.196543
Learning rate: 0.196543
Streaks: 29


Episode = 158
t = 198
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 95.529889
Explore rate: 0.196543
Learning rate: 0.196543
Streaks: 29


Episode = 158
t = 199
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 95.680778
Explore rate: 0.196543
Learning rate: 0.196543
Streaks: 29

Episode 158 finished after 199.000000 time steps

Episode = 159
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 95.680778
Explore rate: 0.196543
Learning rate: 0.196543
Streaks: 30


Episode = 159
t = 1
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 95.683284
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 30


Episode = 159
t = 2
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 95.683284
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 30


Episode = 159
t = 3
Action: 0
State: (0, 0, 3, 0)
Reward: 1.000000
B


Episode = 159
t = 56
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 95.755855
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 30


Episode = 159
t = 57
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 95.767955
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 30


Episode = 159
t = 58
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 95.773035
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 30


Episode = 159
t = 59
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 95.773035
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 30


Episode = 159
t = 60
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 95.777132
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 30


Episode = 159
t = 61
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 95.745939
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 30


Episode = 159
t = 62
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 95.745939
Explore rate: 0.193820
Learning


Episode = 159
t = 117
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 95.758851
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 30


Episode = 159
t = 118
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 95.813119
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 30


Episode = 159
t = 119
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 95.777484
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 30


Episode = 159
t = 120
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 95.890402
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 30


Episode = 159
t = 121
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 95.890402
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 30


Episode = 159
t = 122
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 95.898368
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 30


Episode = 159
t = 123
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 95.881748
Explore rate: 0.193820
L


Episode = 159
t = 176
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 95.963184
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 30


Episode = 159
t = 177
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 95.971008
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 30


Episode = 159
t = 178
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 95.971008
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 30


Episode = 159
t = 179
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 95.971008
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 30


Episode = 159
t = 180
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 95.978817
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 30


Episode = 159
t = 181
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 95.978817
Explore rate: 0.193820
Learning rate: 0.193820
Streaks: 30


Episode = 159
t = 182
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 95.986611
Explore rate: 0.193820
L


Episode = 160
t = 35
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.030905
Explore rate: 0.191114
Learning rate: 0.191114
Streaks: 31


Episode = 160
t = 36
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.038491
Explore rate: 0.191114
Learning rate: 0.191114
Streaks: 31


Episode = 160
t = 37
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 96.002493
Explore rate: 0.191114
Learning rate: 0.191114
Streaks: 31


Episode = 160
t = 38
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.045375
Explore rate: 0.191114
Learning rate: 0.191114
Streaks: 31


Episode = 160
t = 39
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 95.801500
Explore rate: 0.191114
Learning rate: 0.191114
Streaks: 31


Episode = 160
t = 40
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 95.979846
Explore rate: 0.191114
Learning rate: 0.191114
Streaks: 31


Episode = 160
t = 41
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 95.979846
Explore rate: 0.191114
Learning


Episode = 160
t = 97
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 95.951448
Explore rate: 0.191114
Learning rate: 0.191114
Streaks: 31


Episode = 160
t = 98
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.071375
Explore rate: 0.191114
Learning rate: 0.191114
Streaks: 31


Episode = 160
t = 99
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.071375
Explore rate: 0.191114
Learning rate: 0.191114
Streaks: 31


Episode = 160
t = 100
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.078883
Explore rate: 0.191114
Learning rate: 0.191114
Streaks: 31


Episode = 160
t = 101
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 96.059483
Explore rate: 0.191114
Learning rate: 0.191114
Streaks: 31


Episode = 160
t = 102
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.088768
Explore rate: 0.191114
Learning rate: 0.191114
Streaks: 31


Episode = 160
t = 103
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 95.925769
Explore rate: 0.191114
Lear


Episode = 160
t = 157
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 96.133630
Explore rate: 0.191114
Learning rate: 0.191114
Streaks: 31


Episode = 160
t = 158
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 96.164612
Explore rate: 0.191114
Learning rate: 0.191114
Streaks: 31


Episode = 160
t = 159
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 96.009337
Explore rate: 0.191114
Learning rate: 0.191114
Streaks: 31


Episode = 160
t = 160
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.135201
Explore rate: 0.191114
Learning rate: 0.191114
Streaks: 31


Episode = 160
t = 161
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.135201
Explore rate: 0.191114
Learning rate: 0.191114
Streaks: 31


Episode = 160
t = 162
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.142588
Explore rate: 0.191114
Learning rate: 0.191114
Streaks: 31


Episode = 160
t = 163
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 96.119500
Explore rate: 0.191114
L


Episode = 161
t = 17
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.226706
Explore rate: 0.188425
Learning rate: 0.188425
Streaks: 32


Episode = 161
t = 18
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.233816
Explore rate: 0.188425
Learning rate: 0.188425
Streaks: 32


Episode = 161
t = 19
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.233816
Explore rate: 0.188425
Learning rate: 0.188425
Streaks: 32


Episode = 161
t = 20
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.240912
Explore rate: 0.188425
Learning rate: 0.188425
Streaks: 32


Episode = 161
t = 21
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.240912
Explore rate: 0.188425
Learning rate: 0.188425
Streaks: 32


Episode = 161
t = 22
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 96.029694
Explore rate: 0.188425
Learning rate: 0.188425
Streaks: 32


Episode = 161
t = 23
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 96.215280
Explore rate: 0.188425
Learning


Episode = 161
t = 79
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 96.282386
Explore rate: 0.188425
Learning rate: 0.188425
Streaks: 32


Episode = 161
t = 80
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 96.282386
Explore rate: 0.188425
Learning rate: 0.188425
Streaks: 32


Episode = 161
t = 81
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 96.289391
Explore rate: 0.188425
Learning rate: 0.188425
Streaks: 32


Episode = 161
t = 82
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 96.296383
Explore rate: 0.188425
Learning rate: 0.188425
Streaks: 32


Episode = 161
t = 83
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 96.296383
Explore rate: 0.188425
Learning rate: 0.188425
Streaks: 32


Episode = 161
t = 84
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 96.303361
Explore rate: 0.188425
Learning rate: 0.188425
Streaks: 32


Episode = 161
t = 85
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 96.236677
Explore rate: 0.188425
Learning

t = 140
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.317733
Explore rate: 0.188425
Learning rate: 0.188425
Streaks: 32


Episode = 161
t = 141
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.324671
Explore rate: 0.188425
Learning rate: 0.188425
Streaks: 32


Episode = 161
t = 142
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.324671
Explore rate: 0.188425
Learning rate: 0.188425
Streaks: 32


Episode = 161
t = 143
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 96.212102
Explore rate: 0.188425
Learning rate: 0.188425
Streaks: 32


Episode = 161
t = 144
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 96.212102
Explore rate: 0.188425
Learning rate: 0.188425
Streaks: 32


Episode = 161
t = 145
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 96.212102
Explore rate: 0.188425
Learning rate: 0.188425
Streaks: 32


Episode = 161
t = 146
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 96.338576
Explore rate: 0.188425
Learning rate: 0


Episode = 162
t = 1
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 96.390571
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 33


Episode = 162
t = 2
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 96.415500
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 33


Episode = 162
t = 3
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 96.401860
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 33


Episode = 162
t = 4
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 96.419650
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 33


Episode = 162
t = 5
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 96.299299
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 33


Episode = 162
t = 6
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.362319
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 33


Episode = 162
t = 7
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.362319
Explore rate: 0.185752
Learning rate: 


Episode = 162
t = 62
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 96.344435
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 33


Episode = 162
t = 63
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.423798
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 33


Episode = 162
t = 64
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.423798
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 33


Episode = 162
t = 65
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.430441
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 33


Episode = 162
t = 66
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 96.385362
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 33


Episode = 162
t = 67
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 96.385362
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 33


Episode = 162
t = 68
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 96.423588
Explore rate: 0.185752
Learning


Episode = 162
t = 123
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.512085
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 33


Episode = 162
t = 124
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 96.509912
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 33


Episode = 162
t = 125
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.523439
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 33


Episode = 162
t = 126
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 96.458965
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 33


Episode = 162
t = 127
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 96.529145
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 33


Episode = 162
t = 128
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 96.529145
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 33


Episode = 162
t = 129
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 96.535592
Explore rate: 0.185752
L



Episode = 162
t = 183
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 96.548986
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 33


Episode = 162
t = 184
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.576752
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 33


Episode = 162
t = 185
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 96.560502
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 33


Episode = 162
t = 186
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 96.560347
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 33


Episode = 162
t = 187
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 96.560347
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 33


Episode = 162
t = 188
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 96.566736
Explore rate: 0.185752
Learning rate: 0.185752
Streaks: 33


Episode = 162
t = 189
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 96.495219
Explore rate: 0.185752



Episode = 163
t = 43
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 96.608386
Explore rate: 0.183096
Learning rate: 0.183096
Streaks: 34


Episode = 163
t = 44
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.647197
Explore rate: 0.183096
Learning rate: 0.183096
Streaks: 34


Episode = 163
t = 45
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 96.621631
Explore rate: 0.183096
Learning rate: 0.183096
Streaks: 34


Episode = 163
t = 46
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.648702
Explore rate: 0.183096
Learning rate: 0.183096
Streaks: 34


Episode = 163
t = 47
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 96.632724
Explore rate: 0.183096
Learning rate: 0.183096
Streaks: 34


Episode = 163
t = 48
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 96.638577
Explore rate: 0.183096
Learning rate: 0.183096
Streaks: 34


Episode = 163
t = 49
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 96.638577
Explore rate: 0.183096
Learning


Episode = 163
t = 105
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 96.668694
Explore rate: 0.183096
Learning rate: 0.183096
Streaks: 34


Episode = 163
t = 106
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 96.660298
Explore rate: 0.183096
Learning rate: 0.183096
Streaks: 34


Episode = 163
t = 107
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 96.660298
Explore rate: 0.183096
Learning rate: 0.183096
Streaks: 34


Episode = 163
t = 108
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 96.616504
Explore rate: 0.183096
Learning rate: 0.183096
Streaks: 34


Episode = 163
t = 109
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.719330
Explore rate: 0.183096
Learning rate: 0.183096
Streaks: 34


Episode = 163
t = 110
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.719330
Explore rate: 0.183096
Learning rate: 0.183096
Streaks: 34


Episode = 163
t = 111
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.725337
Explore rate: 0.183096
L


Episode = 163
t = 166
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 96.666750
Explore rate: 0.183096
Learning rate: 0.183096
Streaks: 34


Episode = 163
t = 167
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 96.666750
Explore rate: 0.183096
Learning rate: 0.183096
Streaks: 34


Episode = 163
t = 168
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.762758
Explore rate: 0.183096
Learning rate: 0.183096
Streaks: 34


Episode = 163
t = 169
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.762758
Explore rate: 0.183096
Learning rate: 0.183096
Streaks: 34


Episode = 163
t = 170
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.768685
Explore rate: 0.183096
Learning rate: 0.183096
Streaks: 34


Episode = 163
t = 171
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.774602
Explore rate: 0.183096
Learning rate: 0.183096
Streaks: 34


Episode = 163
t = 172
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 96.748381
Explore rate: 0.183096
L


Episode = 164
t = 26
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 96.804249
Explore rate: 0.180456
Learning rate: 0.180456
Streaks: 35


Episode = 164
t = 27
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 96.788165
Explore rate: 0.180456
Learning rate: 0.180456
Streaks: 35


Episode = 164
t = 28
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.836135
Explore rate: 0.180456
Learning rate: 0.180456
Streaks: 35


Episode = 164
t = 29
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.836135
Explore rate: 0.180456
Learning rate: 0.180456
Streaks: 35


Episode = 164
t = 30
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.841844
Explore rate: 0.180456
Learning rate: 0.180456
Streaks: 35


Episode = 164
t = 31
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 96.814404
Explore rate: 0.180456
Learning rate: 0.180456
Streaks: 35


Episode = 164
t = 32
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 96.811869
Explore rate: 0.180456
Learning


Episode = 164
t = 88
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 96.903938
Explore rate: 0.180456
Learning rate: 0.180456
Streaks: 35


Episode = 164
t = 89
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 96.768044
Explore rate: 0.180456
Learning rate: 0.180456
Streaks: 35


Episode = 164
t = 90
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.831214
Explore rate: 0.180456
Learning rate: 0.180456
Streaks: 35


Episode = 164
t = 91
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.831214
Explore rate: 0.180456
Learning rate: 0.180456
Streaks: 35


Episode = 164
t = 92
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.836932
Explore rate: 0.180456
Learning rate: 0.180456
Streaks: 35


Episode = 164
t = 93
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.842640
Explore rate: 0.180456
Learning rate: 0.180456
Streaks: 35


Episode = 164
t = 94
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.842640
Explore rate: 0.180456
Learning


Episode = 164
t = 148
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 96.946305
Explore rate: 0.180456
Learning rate: 0.180456
Streaks: 35


Episode = 164
t = 149
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 96.930843
Explore rate: 0.180456
Learning rate: 0.180456
Streaks: 35


Episode = 164
t = 150
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 96.953570
Explore rate: 0.180456
Learning rate: 0.180456
Streaks: 35


Episode = 164
t = 151
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 96.940442
Explore rate: 0.180456
Learning rate: 0.180456
Streaks: 35


Episode = 164
t = 152
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.878280
Explore rate: 0.180456
Learning rate: 0.180456
Streaks: 35


Episode = 164
t = 153
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.878280
Explore rate: 0.180456
Learning rate: 0.180456
Streaks: 35


Episode = 164
t = 154
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.883914
Explore rate: 0.180456
L


Episode = 165
t = 10
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 96.984571
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 36


Episode = 165
t = 11
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 96.984571
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 36


Episode = 165
t = 12
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 96.989933
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 36


Episode = 165
t = 13
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 96.951929
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 36


Episode = 165
t = 14
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 96.992996
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 36


Episode = 165
t = 15
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 96.964580
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 36


Episode = 165
t = 16
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 96.949830
Explore rate: 0.177832
Learning


Episode = 165
t = 71
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.007776
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 36


Episode = 165
t = 72
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.013097
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 36


Episode = 165
t = 73
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 96.975020
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 36


Episode = 165
t = 74
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.046482
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 36


Episode = 165
t = 75
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.046482
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 36


Episode = 165
t = 76
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.051734
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 36


Episode = 165
t = 77
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 97.023899
Explore rate: 0.177832
Learning


Episode = 165
t = 130
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.076077
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 36


Episode = 165
t = 131
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.081276
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 36


Episode = 165
t = 132
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 97.068092
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 36


Episode = 165
t = 133
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.062205
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 36


Episode = 165
t = 134
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 96.951342
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 36


Episode = 165
t = 135
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.062205
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 36


Episode = 165
t = 136
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 96.976281
Explore rate: 0.177832
L


Episode = 165
t = 190
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 97.091967
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 36


Episode = 165
t = 191
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.134548
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 36


Episode = 165
t = 192
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.134548
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 36


Episode = 165
t = 193
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.134548
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 36


Episode = 165
t = 194
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 97.018205
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 36


Episode = 165
t = 195
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.120582
Explore rate: 0.177832
Learning rate: 0.177832
Streaks: 36


Episode = 165
t = 196
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.120582
Explore rate: 0.177832
L


Episode = 166
t = 50
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.152865
Explore rate: 0.175224
Learning rate: 0.175224
Streaks: 37


Episode = 166
t = 51
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 97.145889
Explore rate: 0.175224
Learning rate: 0.175224
Streaks: 37


Episode = 166
t = 52
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.173971
Explore rate: 0.175224
Learning rate: 0.175224
Streaks: 37


Episode = 166
t = 53
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.173971
Explore rate: 0.175224
Learning rate: 0.175224
Streaks: 37


Episode = 166
t = 54
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.178922
Explore rate: 0.175224
Learning rate: 0.175224
Streaks: 37


Episode = 166
t = 55
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 97.151409
Explore rate: 0.175224
Learning rate: 0.175224
Streaks: 37


Episode = 166
t = 56
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.183170
Explore rate: 0.175224
Learning


Episode = 166
t = 109
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.201686
Explore rate: 0.175224
Learning rate: 0.175224
Streaks: 37


Episode = 166
t = 110
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 97.166677
Explore rate: 0.175224
Learning rate: 0.175224
Streaks: 37


Episode = 166
t = 111
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.204560
Explore rate: 0.175224
Learning rate: 0.175224
Streaks: 37


Episode = 166
t = 112
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 97.178214
Explore rate: 0.175224
Learning rate: 0.175224
Streaks: 37


Episode = 166
t = 113
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.204888
Explore rate: 0.175224
Learning rate: 0.175224
Streaks: 37


Episode = 166
t = 114
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 97.167362
Explore rate: 0.175224
Learning rate: 0.175224
Streaks: 37


Episode = 166
t = 115
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.218737
Explore rate: 0.175224
L


Episode = 166
t = 171
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.276679
Explore rate: 0.175224
Learning rate: 0.175224
Streaks: 37


Episode = 166
t = 172
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 97.246157
Explore rate: 0.175224
Learning rate: 0.175224
Streaks: 37


Episode = 166
t = 173
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.280092
Explore rate: 0.175224
Learning rate: 0.175224
Streaks: 37


Episode = 166
t = 174
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 97.256869
Explore rate: 0.175224
Learning rate: 0.175224
Streaks: 37


Episode = 166
t = 175
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.280829
Explore rate: 0.175224
Learning rate: 0.175224
Streaks: 37


Episode = 166
t = 176
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 97.265832
Explore rate: 0.175224
Learning rate: 0.175224
Streaks: 37


Episode = 166
t = 177
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.245236
Explore rate: 0.175224
L


Episode = 167
t = 30
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.305999
Explore rate: 0.172631
Learning rate: 0.172631
Streaks: 38


Episode = 167
t = 31
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.305999
Explore rate: 0.172631
Learning rate: 0.172631
Streaks: 38


Episode = 167
t = 32
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 97.280565
Explore rate: 0.172631
Learning rate: 0.172631
Streaks: 38


Episode = 167
t = 33
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.302632
Explore rate: 0.172631
Learning rate: 0.172631
Streaks: 38


Episode = 167
t = 34
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 97.225668
Explore rate: 0.172631
Learning rate: 0.172631
Streaks: 38


Episode = 167
t = 35
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.302632
Explore rate: 0.172631
Learning rate: 0.172631
Streaks: 38


Episode = 167
t = 36
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.302632
Explore rate: 0.172631
Learning


Episode = 167
t = 89
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.333491
Explore rate: 0.172631
Learning rate: 0.172631
Streaks: 38


Episode = 167
t = 90
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.333491
Explore rate: 0.172631
Learning rate: 0.172631
Streaks: 38


Episode = 167
t = 91
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.333491
Explore rate: 0.172631
Learning rate: 0.172631
Streaks: 38


Episode = 167
t = 92
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.338095
Explore rate: 0.172631
Learning rate: 0.172631
Streaks: 38


Episode = 167
t = 93
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 97.318761
Explore rate: 0.172631
Learning rate: 0.172631
Streaks: 38


Episode = 167
t = 94
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.343188
Explore rate: 0.172631
Learning rate: 0.172631
Streaks: 38


Episode = 167
t = 95
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 97.288339
Explore rate: 0.172631
Learning


Episode = 167
t = 148
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.385941
Explore rate: 0.172631
Learning rate: 0.172631
Streaks: 38


Episode = 167
t = 149
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.390454
Explore rate: 0.172631
Learning rate: 0.172631
Streaks: 38


Episode = 167
t = 150
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.390454
Explore rate: 0.172631
Learning rate: 0.172631
Streaks: 38


Episode = 167
t = 151
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 97.299208
Explore rate: 0.172631
Learning rate: 0.172631
Streaks: 38


Episode = 167
t = 152
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.394861
Explore rate: 0.172631
Learning rate: 0.172631
Streaks: 38


Episode = 167
t = 153
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.394861
Explore rate: 0.172631
Learning rate: 0.172631
Streaks: 38


Episode = 167
t = 154
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.399358
Explore rate: 0.172631
L


Episode = 168
t = 10
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 97.421594
Explore rate: 0.170053
Learning rate: 0.170053
Streaks: 39


Episode = 168
t = 11
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.451333
Explore rate: 0.170053
Learning rate: 0.170053
Streaks: 39


Episode = 168
t = 12
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 97.430985
Explore rate: 0.170053
Learning rate: 0.170053
Streaks: 39


Episode = 168
t = 13
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 97.430985
Explore rate: 0.170053
Learning rate: 0.170053
Streaks: 39


Episode = 168
t = 14
Action: 1
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 97.430985
Explore rate: 0.170053
Learning rate: 0.170053
Streaks: 39


Episode = 168
t = 15
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.408109
Explore rate: 0.170053
Learning rate: 0.170053
Streaks: 39


Episode = 168
t = 16
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.408109
Explore rate: 0.170053
Learning


Episode = 168
t = 70
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.477085
Explore rate: 0.170053
Learning rate: 0.170053
Streaks: 39


Episode = 168
t = 71
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 97.463796
Explore rate: 0.170053
Learning rate: 0.170053
Streaks: 39


Episode = 168
t = 72
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.482698
Explore rate: 0.170053
Learning rate: 0.170053
Streaks: 39


Episode = 168
t = 73
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 97.471291
Explore rate: 0.170053
Learning rate: 0.170053
Streaks: 39


Episode = 168
t = 74
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.485059
Explore rate: 0.170053
Learning rate: 0.170053
Streaks: 39


Episode = 168
t = 75
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 97.477909
Explore rate: 0.170053
Learning rate: 0.170053
Streaks: 39


Episode = 168
t = 76
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.488132
Explore rate: 0.170053
Learning


Episode = 168
t = 130
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.498296
Explore rate: 0.170053
Learning rate: 0.170053
Streaks: 39


Episode = 168
t = 131
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 97.469263
Explore rate: 0.170053
Learning rate: 0.170053
Streaks: 39


Episode = 168
t = 132
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.522352
Explore rate: 0.170053
Learning rate: 0.170053
Streaks: 39


Episode = 168
t = 133
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.522352
Explore rate: 0.170053
Learning rate: 0.170053
Streaks: 39


Episode = 168
t = 134
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.526565
Explore rate: 0.170053
Learning rate: 0.170053
Streaks: 39


Episode = 168
t = 135
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.526565
Explore rate: 0.170053
Learning rate: 0.170053
Streaks: 39


Episode = 168
t = 136
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.530771
Explore rate: 0.170053
L


Episode = 168
t = 191
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.545982
Explore rate: 0.170053
Learning rate: 0.170053
Streaks: 39


Episode = 168
t = 192
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.545982
Explore rate: 0.170053
Learning rate: 0.170053
Streaks: 39


Episode = 168
t = 193
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.550155
Explore rate: 0.170053
Learning rate: 0.170053
Streaks: 39


Episode = 168
t = 194
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 97.539388
Explore rate: 0.170053
Learning rate: 0.170053
Streaks: 39


Episode = 168
t = 195
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.555966
Explore rate: 0.170053
Learning rate: 0.170053
Streaks: 39


Episode = 168
t = 196
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 97.546363
Explore rate: 0.170053
Learning rate: 0.170053
Streaks: 39


Episode = 168
t = 197
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 97.511320
Explore rate: 0.170053
L



Episode = 169
t = 52
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.603611
Explore rate: 0.167491
Learning rate: 0.167491
Streaks: 40


Episode = 169
t = 53
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 97.584418
Explore rate: 0.167491
Learning rate: 0.167491
Streaks: 40


Episode = 169
t = 54
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.607784
Explore rate: 0.167491
Learning rate: 0.167491
Streaks: 40


Episode = 169
t = 55
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 97.527944
Explore rate: 0.167491
Learning rate: 0.167491
Streaks: 40


Episode = 169
t = 56
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.566604
Explore rate: 0.167491
Learning rate: 0.167491
Streaks: 40


Episode = 169
t = 57
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.566604
Explore rate: 0.167491
Learning rate: 0.167491
Streaks: 40


Episode = 169
t = 58
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.570679
Explore rate: 0.167491
Learnin


Episode = 169
t = 113
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 97.606499
Explore rate: 0.167491
Learning rate: 0.167491
Streaks: 40


Episode = 169
t = 114
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.625047
Explore rate: 0.167491
Learning rate: 0.167491
Streaks: 40


Episode = 169
t = 115
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 97.613584
Explore rate: 0.167491
Learning rate: 0.167491
Streaks: 40


Episode = 169
t = 116
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.627124
Explore rate: 0.167491
Learning rate: 0.167491
Streaks: 40


Episode = 169
t = 117
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 97.619826
Explore rate: 0.167491
Learning rate: 0.167491
Streaks: 40


Episode = 169
t = 118
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.640713
Explore rate: 0.167491
Learning rate: 0.167491
Streaks: 40


Episode = 169
t = 119
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.640713
Explore rate: 0.167491
L

Learning rate: 0.167491
Streaks: 40


Episode = 169
t = 174
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.661717
Explore rate: 0.167491
Learning rate: 0.167491
Streaks: 40


Episode = 169
t = 175
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 97.654582
Explore rate: 0.167491
Learning rate: 0.167491
Streaks: 40


Episode = 169
t = 176
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.667711
Explore rate: 0.167491
Learning rate: 0.167491
Streaks: 40


Episode = 169
t = 177
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 97.599520
Explore rate: 0.167491
Learning rate: 0.167491
Streaks: 40


Episode = 169
t = 178
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.645412
Explore rate: 0.167491
Learning rate: 0.167491
Streaks: 40


Episode = 169
t = 179
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.645412
Explore rate: 0.167491
Learning rate: 0.167491
Streaks: 40


Episode = 169
t = 180
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best 


Episode = 170
t = 33
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.707621
Explore rate: 0.164944
Learning rate: 0.164944
Streaks: 41


Episode = 170
t = 34
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.711402
Explore rate: 0.164944
Learning rate: 0.164944
Streaks: 41


Episode = 170
t = 35
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 97.680736
Explore rate: 0.164944
Learning rate: 0.164944
Streaks: 41


Episode = 170
t = 36
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.695441
Explore rate: 0.164944
Learning rate: 0.164944
Streaks: 41


Episode = 170
t = 37
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.695441
Explore rate: 0.164944
Learning rate: 0.164944
Streaks: 41


Episode = 170
t = 38
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.699243
Explore rate: 0.164944
Learning rate: 0.164944
Streaks: 41


Episode = 170
t = 39
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 97.687995
Explore rate: 0.164944
Learning


Episode = 170
t = 93
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 97.710263
Explore rate: 0.164944
Learning rate: 0.164944
Streaks: 41


Episode = 170
t = 94
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.739765
Explore rate: 0.164944
Learning rate: 0.164944
Streaks: 41


Episode = 170
t = 95
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.739765
Explore rate: 0.164944
Learning rate: 0.164944
Streaks: 41


Episode = 170
t = 96
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.743493
Explore rate: 0.164944
Learning rate: 0.164944
Streaks: 41


Episode = 170
t = 97
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 97.712028
Explore rate: 0.164944
Learning rate: 0.164944
Streaks: 41


Episode = 170
t = 98
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.745185
Explore rate: 0.164944
Learning rate: 0.164944
Streaks: 41


Episode = 170
t = 99
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 97.721216
Explore rate: 0.164944
Learning


Episode = 170
t = 153
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.766363
Explore rate: 0.164944
Learning rate: 0.164944
Streaks: 41


Episode = 170
t = 154
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 97.751263
Explore rate: 0.164944
Learning rate: 0.164944
Streaks: 41


Episode = 170
t = 155
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.765235
Explore rate: 0.164944
Learning rate: 0.164944
Streaks: 41


Episode = 170
t = 156
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.765235
Explore rate: 0.164944
Learning rate: 0.164944
Streaks: 41


Episode = 170
t = 157
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.768921
Explore rate: 0.164944
Learning rate: 0.164944
Streaks: 41


Episode = 170
t = 158
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 97.757302
Explore rate: 0.164944
Learning rate: 0.164944
Streaks: 41


Episode = 170
t = 159
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.773777
Explore rate: 0.164944
L


Episode = 171
t = 14
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.796419
Explore rate: 0.162412
Learning rate: 0.162412
Streaks: 42


Episode = 171
t = 15
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 97.787552
Explore rate: 0.162412
Learning rate: 0.162412
Streaks: 42


Episode = 171
t = 16
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.798572
Explore rate: 0.162412
Learning rate: 0.162412
Streaks: 42


Episode = 171
t = 17
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 97.757836
Explore rate: 0.162412
Learning rate: 0.162412
Streaks: 42


Episode = 171
t = 18
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.787424
Explore rate: 0.162412
Learning rate: 0.162412
Streaks: 42


Episode = 171
t = 19
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.787424
Explore rate: 0.162412
Learning rate: 0.162412
Streaks: 42


Episode = 171
t = 20
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.791017
Explore rate: 0.162412
Learning



Episode = 171
t = 74
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 97.777893
Explore rate: 0.162412
Learning rate: 0.162412
Streaks: 42


Episode = 171
t = 75
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.831527
Explore rate: 0.162412
Learning rate: 0.162412
Streaks: 42


Episode = 171
t = 76
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.831527
Explore rate: 0.162412
Learning rate: 0.162412
Streaks: 42


Episode = 171
t = 77
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.835048
Explore rate: 0.162412
Learning rate: 0.162412
Streaks: 42


Episode = 171
t = 78
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 97.817955
Explore rate: 0.162412
Learning rate: 0.162412
Streaks: 42


Episode = 171
t = 79
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.838761
Explore rate: 0.162412
Learning rate: 0.162412
Streaks: 42


Episode = 171
t = 80
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 97.824844
Explore rate: 0.162412
Learnin

Episode = 171
t = 133
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.868851
Explore rate: 0.162412
Learning rate: 0.162412
Streaks: 42


Episode = 171
t = 134
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.868851
Explore rate: 0.162412
Learning rate: 0.162412
Streaks: 42


Episode = 171
t = 135
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.872313
Explore rate: 0.162412
Learning rate: 0.162412
Streaks: 42


Episode = 171
t = 136
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.875768
Explore rate: 0.162412
Learning rate: 0.162412
Streaks: 42


Episode = 171
t = 137
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.875768
Explore rate: 0.162412
Learning rate: 0.162412
Streaks: 42


Episode = 171
t = 138
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.879218
Explore rate: 0.162412
Learning rate: 0.162412
Streaks: 42


Episode = 171
t = 139
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.879218
Explore rate: 0.162412
Le


Episode = 171
t = 193
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.893777
Explore rate: 0.162412
Learning rate: 0.162412
Streaks: 42


Episode = 171
t = 194
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 97.873671
Explore rate: 0.162412
Learning rate: 0.162412
Streaks: 42


Episode = 171
t = 195
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.893879
Explore rate: 0.162412
Learning rate: 0.162412
Streaks: 42


Episode = 171
t = 196
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.893879
Explore rate: 0.162412
Learning rate: 0.162412
Streaks: 42


Episode = 171
t = 197
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.897299
Explore rate: 0.162412
Learning rate: 0.162412
Streaks: 42


Episode = 171
t = 198
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 97.894994
Explore rate: 0.162412
Learning rate: 0.162412
Streaks: 42


Episode = 171
t = 199
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.903204
Explore rate: 0.162412
L


Episode = 172
t = 55
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.932869
Explore rate: 0.159894
Learning rate: 0.159894
Streaks: 43


Episode = 172
t = 56
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.936174
Explore rate: 0.159894
Learning rate: 0.159894
Streaks: 43


Episode = 172
t = 57
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 97.922971
Explore rate: 0.159894
Learning rate: 0.159894
Streaks: 43


Episode = 172
t = 58
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.940156
Explore rate: 0.159894
Learning rate: 0.159894
Streaks: 43


Episode = 172
t = 59
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 97.929012
Explore rate: 0.159894
Learning rate: 0.159894
Streaks: 43


Episode = 172
t = 60
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.941686
Explore rate: 0.159894
Learning rate: 0.159894
Streaks: 43


Episode = 172
t = 61
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 97.913633
Explore rate: 0.159894
Learning

t = 114
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.974689
Explore rate: 0.159894
Learning rate: 0.159894
Streaks: 43


Episode = 172
t = 115
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 97.929262
Explore rate: 0.159894
Learning rate: 0.159894
Streaks: 43


Episode = 172
t = 116
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.974689
Explore rate: 0.159894
Learning rate: 0.159894
Streaks: 43


Episode = 172
t = 117
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.974689
Explore rate: 0.159894
Learning rate: 0.159894
Streaks: 43


Episode = 172
t = 118
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.977927
Explore rate: 0.159894
Learning rate: 0.159894
Streaks: 43


Episode = 172
t = 119
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 97.939763
Explore rate: 0.159894
Learning rate: 0.159894
Streaks: 43


Episode = 172
t = 120
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 97.977927
Explore rate: 0.159894
Learning rate: 0


Episode = 172
t = 174
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 97.966539
Explore rate: 0.159894
Learning rate: 0.159894
Streaks: 43


Episode = 172
t = 175
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.003844
Explore rate: 0.159894
Learning rate: 0.159894
Streaks: 43


Episode = 172
t = 176
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.003844
Explore rate: 0.159894
Learning rate: 0.159894
Streaks: 43


Episode = 172
t = 177
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.007036
Explore rate: 0.159894
Learning rate: 0.159894
Streaks: 43


Episode = 172
t = 178
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 97.990777
Explore rate: 0.159894
Learning rate: 0.159894
Streaks: 43


Episode = 172
t = 179
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.010326
Explore rate: 0.159894
Learning rate: 0.159894
Streaks: 43


Episode = 172
t = 180
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 97.997084
Explore rate: 0.159894
L

Learning rate: 0.157391
Streaks: 44


Episode = 173
t = 34
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.021473
Explore rate: 0.157391
Learning rate: 0.157391
Streaks: 44


Episode = 173
t = 35
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.030257
Explore rate: 0.157391
Learning rate: 0.157391
Streaks: 44


Episode = 173
t = 36
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.025956
Explore rate: 0.157391
Learning rate: 0.157391
Streaks: 44


Episode = 173
t = 37
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.025956
Explore rate: 0.157391
Learning rate: 0.157391
Streaks: 44


Episode = 173
t = 38
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 97.986881
Explore rate: 0.157391
Learning rate: 0.157391
Streaks: 44


Episode = 173
t = 39
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.017827
Explore rate: 0.157391
Learning rate: 0.157391
Streaks: 44


Episode = 173
t = 40
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.0


Episode = 173
t = 94
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.037988
Explore rate: 0.157391
Learning rate: 0.157391
Streaks: 44


Episode = 173
t = 95
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 98.026938
Explore rate: 0.157391
Learning rate: 0.157391
Streaks: 44


Episode = 173
t = 96
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.056429
Explore rate: 0.157391
Learning rate: 0.157391
Streaks: 44


Episode = 173
t = 97
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.056429
Explore rate: 0.157391
Learning rate: 0.157391
Streaks: 44


Episode = 173
t = 98
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.059488
Explore rate: 0.157391
Learning rate: 0.157391
Streaks: 44


Episode = 173
t = 99
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.059759
Explore rate: 0.157391
Learning rate: 0.157391
Streaks: 44


Episode = 173
t = 100
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.065158
Explore rate: 0.157391
Learnin


Episode = 173
t = 156
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.081294
Explore rate: 0.157391
Learning rate: 0.157391
Streaks: 44


Episode = 173
t = 157
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.072287
Explore rate: 0.157391
Learning rate: 0.157391
Streaks: 44


Episode = 173
t = 158
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.072287
Explore rate: 0.157391
Learning rate: 0.157391
Streaks: 44


Episode = 173
t = 159
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.075321
Explore rate: 0.157391
Learning rate: 0.157391
Streaks: 44


Episode = 173
t = 160
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 98.062415
Explore rate: 0.157391
Learning rate: 0.157391
Streaks: 44


Episode = 173
t = 161
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.097031
Explore rate: 0.157391
Learning rate: 0.157391
Streaks: 44


Episode = 173
t = 162
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.097031
Explore rate: 0.157391
L


Episode = 174
t = 15
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 98.031467
Explore rate: 0.154902
Learning rate: 0.154902
Streaks: 45


Episode = 174
t = 16
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.110888
Explore rate: 0.154902
Learning rate: 0.154902
Streaks: 45


Episode = 174
t = 17
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.110888
Explore rate: 0.154902
Learning rate: 0.154902
Streaks: 45


Episode = 174
t = 18
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.113815
Explore rate: 0.154902
Learning rate: 0.154902
Streaks: 45


Episode = 174
t = 19
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 98.102510
Explore rate: 0.154902
Learning rate: 0.154902
Streaks: 45


Episode = 174
t = 20
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.125293
Explore rate: 0.154902
Learning rate: 0.154902
Streaks: 45


Episode = 174
t = 21
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.125293
Explore rate: 0.154902
Learning


Episode = 174
t = 76
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.135024
Explore rate: 0.154902
Learning rate: 0.154902
Streaks: 45


Episode = 174
t = 77
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 98.088598
Explore rate: 0.154902
Learning rate: 0.154902
Streaks: 45


Episode = 174
t = 78
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 98.088598
Explore rate: 0.154902
Learning rate: 0.154902
Streaks: 45


Episode = 174
t = 79
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 98.088598
Explore rate: 0.154902
Learning rate: 0.154902
Streaks: 45


Episode = 174
t = 80
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 98.088598
Explore rate: 0.154902
Learning rate: 0.154902
Streaks: 45


Episode = 174
t = 81
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 98.091559
Explore rate: 0.154902
Learning rate: 0.154902
Streaks: 45


Episode = 174
t = 82
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.139527
Explore rate: 0.154902
Learning


Episode = 174
t = 139
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.176568
Explore rate: 0.154902
Learning rate: 0.154902
Streaks: 45


Episode = 174
t = 140
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.179392
Explore rate: 0.154902
Learning rate: 0.154902
Streaks: 45


Episode = 174
t = 141
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.182212
Explore rate: 0.154902
Learning rate: 0.154902
Streaks: 45


Episode = 174
t = 142
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.182212
Explore rate: 0.154902
Learning rate: 0.154902
Streaks: 45


Episode = 174
t = 143
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.185028
Explore rate: 0.154902
Learning rate: 0.154902
Streaks: 45


Episode = 174
t = 144
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 98.170724
Explore rate: 0.154902
Learning rate: 0.154902
Streaks: 45


Episode = 174
t = 145
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.188022
Explore rate: 0.154902
L


Episode = 175
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.192902
Explore rate: 0.154902
Learning rate: 0.154902
Streaks: 46


Episode = 175
t = 1
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.177904
Explore rate: 0.152427
Learning rate: 0.152427
Streaks: 46


Episode = 175
t = 2
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.195696
Explore rate: 0.152427
Learning rate: 0.152427
Streaks: 46


Episode = 175
t = 3
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.195696
Explore rate: 0.152427
Learning rate: 0.152427
Streaks: 46


Episode = 175
t = 4
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.198447
Explore rate: 0.152427
Learning rate: 0.152427
Streaks: 46


Episode = 175
t = 5
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 98.185224
Explore rate: 0.152427
Learning rate: 0.152427
Streaks: 46


Episode = 175
t = 6
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.195766
Explore rate: 0.152427
Learning rate: 


Episode = 175
t = 60
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.222037
Explore rate: 0.152427
Learning rate: 0.152427
Streaks: 46


Episode = 175
t = 61
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 98.165564
Explore rate: 0.152427
Learning rate: 0.152427
Streaks: 46


Episode = 175
t = 62
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.229011
Explore rate: 0.152427
Learning rate: 0.152427
Streaks: 46


Episode = 175
t = 63
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.229011
Explore rate: 0.152427
Learning rate: 0.152427
Streaks: 46


Episode = 175
t = 64
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.231710
Explore rate: 0.152427
Learning rate: 0.152427
Streaks: 46


Episode = 175
t = 65
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.219220
Explore rate: 0.152427
Learning rate: 0.152427
Streaks: 46


Episode = 175
t = 66
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.234806
Explore rate: 0.152427
Learning


Episode = 175
t = 122
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.262929
Explore rate: 0.152427
Learning rate: 0.152427
Streaks: 46


Episode = 175
t = 123
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.262929
Explore rate: 0.152427
Learning rate: 0.152427
Streaks: 46


Episode = 175
t = 124
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.265577
Explore rate: 0.152427
Learning rate: 0.152427
Streaks: 46


Episode = 175
t = 125
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.249310
Explore rate: 0.152427
Learning rate: 0.152427
Streaks: 46


Episode = 175
t = 126
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.268007
Explore rate: 0.152427
Learning rate: 0.152427
Streaks: 46


Episode = 175
t = 127
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.254800
Explore rate: 0.152427
Learning rate: 0.152427
Streaks: 46


Episode = 175
t = 128
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.239820
Explore rate: 0.152427
L


Episode = 175
t = 181
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 98.215179
Explore rate: 0.152427
Learning rate: 0.152427
Streaks: 46


Episode = 175
t = 182
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.273974
Explore rate: 0.152427
Learning rate: 0.152427
Streaks: 46


Episode = 175
t = 183
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.273974
Explore rate: 0.152427
Learning rate: 0.152427
Streaks: 46


Episode = 175
t = 184
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.276605
Explore rate: 0.152427
Learning rate: 0.152427
Streaks: 46


Episode = 175
t = 185
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 98.255101
Explore rate: 0.152427
Learning rate: 0.152427
Streaks: 46


Episode = 175
t = 186
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.278213
Explore rate: 0.152427
Learning rate: 0.152427
Streaks: 46


Episode = 175
t = 187
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 98.261249
Explore rate: 0.152427
L


Episode = 176
t = 41
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.299969
Explore rate: 0.149967
Learning rate: 0.149967
Streaks: 47


Episode = 176
t = 42
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.299969
Explore rate: 0.149967
Learning rate: 0.149967
Streaks: 47


Episode = 176
t = 43
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.302519
Explore rate: 0.149967
Learning rate: 0.149967
Streaks: 47


Episode = 176
t = 44
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.302519
Explore rate: 0.149967
Learning rate: 0.149967
Streaks: 47


Episode = 176
t = 45
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.305064
Explore rate: 0.149967
Learning rate: 0.149967
Streaks: 47


Episode = 176
t = 46
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.305064
Explore rate: 0.149967
Learning rate: 0.149967
Streaks: 47


Episode = 176
t = 47
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.296794
Explore rate: 0.149967
Learning


Episode = 176
t = 102
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.336044
Explore rate: 0.149967
Learning rate: 0.149967
Streaks: 47


Episode = 176
t = 103
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.324177
Explore rate: 0.149967
Learning rate: 0.149967
Streaks: 47


Episode = 176
t = 104
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.336777
Explore rate: 0.149967
Learning rate: 0.149967
Streaks: 47


Episode = 176
t = 105
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.328561
Explore rate: 0.149967
Learning rate: 0.149967
Streaks: 47


Episode = 176
t = 106
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.305820
Explore rate: 0.149967
Learning rate: 0.149967
Streaks: 47


Episode = 176
t = 107
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.305820
Explore rate: 0.149967
Learning rate: 0.149967
Streaks: 47


Episode = 176
t = 108
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 98.287085
Explore rate: 0.149967
L


Episode = 176
t = 162
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.330327
Explore rate: 0.149967
Learning rate: 0.149967
Streaks: 47


Episode = 176
t = 163
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.332831
Explore rate: 0.149967
Learning rate: 0.149967
Streaks: 47


Episode = 176
t = 164
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 98.323510
Explore rate: 0.149967
Learning rate: 0.149967
Streaks: 47


Episode = 176
t = 165
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 98.323510
Explore rate: 0.149967
Learning rate: 0.149967
Streaks: 47


Episode = 176
t = 166
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 98.319081
Explore rate: 0.149967
Learning rate: 0.149967
Streaks: 47


Episode = 176
t = 167
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.351708
Explore rate: 0.149967
Learning rate: 0.149967
Streaks: 47


Episode = 176
t = 168
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 98.326446
Explore rate: 0.149967
L


Episode = 177
t = 22
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.361810
Explore rate: 0.147520
Learning rate: 0.147520
Streaks: 48


Episode = 177
t = 23
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.361810
Explore rate: 0.147520
Learning rate: 0.147520
Streaks: 48


Episode = 177
t = 24
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.364227
Explore rate: 0.147520
Learning rate: 0.147520
Streaks: 48


Episode = 177
t = 25
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 98.344028
Explore rate: 0.147520
Learning rate: 0.147520
Streaks: 48


Episode = 177
t = 26
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.365747
Explore rate: 0.147520
Learning rate: 0.147520
Streaks: 48


Episode = 177
t = 27
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 98.349643
Explore rate: 0.147520
Learning rate: 0.147520
Streaks: 48


Episode = 177
t = 28
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.357991
Explore rate: 0.147520
Learning


Episode = 177
t = 84
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.387338
Explore rate: 0.147520
Learning rate: 0.147520
Streaks: 48


Episode = 177
t = 85
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.387338
Explore rate: 0.147520
Learning rate: 0.147520
Streaks: 48


Episode = 177
t = 86
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.389717
Explore rate: 0.147520
Learning rate: 0.147520
Streaks: 48


Episode = 177
t = 87
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 98.376285
Explore rate: 0.147520
Learning rate: 0.147520
Streaks: 48


Episode = 177
t = 88
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.398516
Explore rate: 0.147520
Learning rate: 0.147520
Streaks: 48


Episode = 177
t = 89
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.398516
Explore rate: 0.147520
Learning rate: 0.147520
Streaks: 48


Episode = 177
t = 90
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.400878
Explore rate: 0.147520
Learning


Episode = 177
t = 146
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.414419
Explore rate: 0.147520
Learning rate: 0.147520
Streaks: 48


Episode = 177
t = 147
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 98.405039
Explore rate: 0.147520
Learning rate: 0.147520
Streaks: 48


Episode = 177
t = 148
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.412130
Explore rate: 0.147520
Learning rate: 0.147520
Streaks: 48


Episode = 177
t = 149
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.412130
Explore rate: 0.147520
Learning rate: 0.147520
Streaks: 48


Episode = 177
t = 150
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.414473
Explore rate: 0.147520
Learning rate: 0.147520
Streaks: 48


Episode = 177
t = 151
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.416812
Explore rate: 0.147520
Learning rate: 0.147520
Streaks: 48


Episode = 177
t = 152
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.416812
Explore rate: 0.147520
L


Episode = 178
t = 7
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.446648
Explore rate: 0.145087
Learning rate: 0.145087
Streaks: 49


Episode = 178
t = 8
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.448901
Explore rate: 0.145087
Learning rate: 0.145087
Streaks: 49


Episode = 178
t = 9
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.451152
Explore rate: 0.145087
Learning rate: 0.145087
Streaks: 49


Episode = 178
t = 10
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.451152
Explore rate: 0.145087
Learning rate: 0.145087
Streaks: 49


Episode = 178
t = 11
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.435974
Explore rate: 0.145087
Learning rate: 0.145087
Streaks: 49


Episode = 178
t = 12
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.428974
Explore rate: 0.145087
Learning rate: 0.145087
Streaks: 49


Episode = 178
t = 13
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.428974
Explore rate: 0.145087
Learning ra


Episode = 178
t = 68
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.465200
Explore rate: 0.145087
Learning rate: 0.145087
Streaks: 49


Episode = 178
t = 69
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.467426
Explore rate: 0.145087
Learning rate: 0.145087
Streaks: 49


Episode = 178
t = 70
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.467426
Explore rate: 0.145087
Learning rate: 0.145087
Streaks: 49


Episode = 178
t = 71
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.454766
Explore rate: 0.145087
Learning rate: 0.145087
Streaks: 49


Episode = 178
t = 72
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.457549
Explore rate: 0.145087
Learning rate: 0.145087
Streaks: 49


Episode = 178
t = 73
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.457549
Explore rate: 0.145087
Learning rate: 0.145087
Streaks: 49


Episode = 178
t = 74
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.459787
Explore rate: 0.145087
Learning

Explore rate: 0.145087
Learning rate: 0.145087
Streaks: 49


Episode = 178
t = 130
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.496973
Explore rate: 0.145087
Learning rate: 0.145087
Streaks: 49


Episode = 178
t = 131
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.496973
Explore rate: 0.145087
Learning rate: 0.145087
Streaks: 49


Episode = 178
t = 132
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.478155
Explore rate: 0.145087
Learning rate: 0.145087
Streaks: 49


Episode = 178
t = 133
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.498315
Explore rate: 0.145087
Learning rate: 0.145087
Streaks: 49


Episode = 178
t = 134
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.498315
Explore rate: 0.145087
Learning rate: 0.145087
Streaks: 49


Episode = 178
t = 135
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.498315
Explore rate: 0.145087
Learning rate: 0.145087
Streaks: 49


Episode = 178
t = 136
Action: 1
State: (0, 0, 3, 0)


Episode = 178
t = 188
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.509823
Explore rate: 0.145087
Learning rate: 0.145087
Streaks: 49


Episode = 178
t = 189
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.511985
Explore rate: 0.145087
Learning rate: 0.145087
Streaks: 49


Episode = 178
t = 190
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.494040
Explore rate: 0.145087
Learning rate: 0.145087
Streaks: 49


Episode = 178
t = 191
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.513412
Explore rate: 0.145087
Learning rate: 0.145087
Streaks: 49


Episode = 178
t = 192
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.499007
Explore rate: 0.145087
Learning rate: 0.145087
Streaks: 49


Episode = 178
t = 193
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.502233
Explore rate: 0.145087
Learning rate: 0.145087
Streaks: 49


Episode = 178
t = 194
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.502233
Explore rate: 0.145087
L


Episode = 179
t = 49
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.512965
Explore rate: 0.142668
Learning rate: 0.142668
Streaks: 50


Episode = 179
t = 50
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.507981
Explore rate: 0.142668
Learning rate: 0.142668
Streaks: 50


Episode = 179
t = 51
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.516201
Explore rate: 0.142668
Learning rate: 0.142668
Streaks: 50


Episode = 179
t = 52
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.516201
Explore rate: 0.142668
Learning rate: 0.142668
Streaks: 50


Episode = 179
t = 53
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.511271
Explore rate: 0.142668
Learning rate: 0.142668
Streaks: 50


Episode = 179
t = 54
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.531233
Explore rate: 0.142668
Learning rate: 0.142668
Streaks: 50


Episode = 179
t = 55
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.531233
Explore rate: 0.142668
Learning


Episode = 179
t = 109
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.552324
Explore rate: 0.142668
Learning rate: 0.142668
Streaks: 50


Episode = 179
t = 110
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.552324
Explore rate: 0.142668
Learning rate: 0.142668
Streaks: 50


Episode = 179
t = 111
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.554389
Explore rate: 0.142668
Learning rate: 0.142668
Streaks: 50


Episode = 179
t = 112
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 98.540433
Explore rate: 0.142668
Learning rate: 0.142668
Streaks: 50


Episode = 179
t = 113
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.532195
Explore rate: 0.142668
Learning rate: 0.142668
Streaks: 50


Episode = 179
t = 114
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.532195
Explore rate: 0.142668
Learning rate: 0.142668
Streaks: 50


Episode = 179
t = 115
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.534289
Explore rate: 0.142668
L


Episode = 179
t = 171
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.564985
Explore rate: 0.142668
Learning rate: 0.142668
Streaks: 50


Episode = 179
t = 172
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.567032
Explore rate: 0.142668
Learning rate: 0.142668
Streaks: 50


Episode = 179
t = 173
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.562748
Explore rate: 0.142668
Learning rate: 0.142668
Streaks: 50


Episode = 179
t = 174
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.570224
Explore rate: 0.142668
Learning rate: 0.142668
Streaks: 50


Episode = 179
t = 175
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 98.506455
Explore rate: 0.142668
Learning rate: 0.142668
Streaks: 50


Episode = 179
t = 176
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.566297
Explore rate: 0.142668
Learning rate: 0.142668
Streaks: 50


Episode = 179
t = 177
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 98.517038
Explore rate: 0.142668
L


Episode = 180
t = 31
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.577454
Explore rate: 0.140261
Learning rate: 0.140261
Streaks: 51


Episode = 180
t = 32
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.588412
Explore rate: 0.140261
Learning rate: 0.140261
Streaks: 51


Episode = 180
t = 33
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 98.552727
Explore rate: 0.140261
Learning rate: 0.140261
Streaks: 51


Episode = 180
t = 34
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.585621
Explore rate: 0.140261
Learning rate: 0.140261
Streaks: 51


Episode = 180
t = 35
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.585621
Explore rate: 0.140261
Learning rate: 0.140261
Streaks: 51


Episode = 180
t = 36
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.587604
Explore rate: 0.140261
Learning rate: 0.140261
Streaks: 51


Episode = 180
t = 37
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 98.574785
Explore rate: 0.140261
Learning


Episode = 180
t = 93
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.598727
Explore rate: 0.140261
Learning rate: 0.140261
Streaks: 51


Episode = 180
t = 94
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.607416
Explore rate: 0.140261
Learning rate: 0.140261
Streaks: 51


Episode = 180
t = 95
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.600091
Explore rate: 0.140261
Learning rate: 0.140261
Streaks: 51


Episode = 180
t = 96
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.600091
Explore rate: 0.140261
Learning rate: 0.140261
Streaks: 51


Episode = 180
t = 97
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.602054
Explore rate: 0.140261
Learning rate: 0.140261
Streaks: 51


Episode = 180
t = 98
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.604015
Explore rate: 0.140261
Learning rate: 0.140261
Streaks: 51


Episode = 180
t = 99
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.604015
Explore rate: 0.140261
Learning


Episode = 180
t = 155
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 98.617343
Explore rate: 0.140261
Learning rate: 0.140261
Streaks: 51


Episode = 180
t = 156
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.622712
Explore rate: 0.140261
Learning rate: 0.140261
Streaks: 51


Episode = 180
t = 157
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.622712
Explore rate: 0.140261
Learning rate: 0.140261
Streaks: 51


Episode = 180
t = 158
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.624644
Explore rate: 0.140261
Learning rate: 0.140261
Streaks: 51


Episode = 180
t = 159
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.626573
Explore rate: 0.140261
Learning rate: 0.140261
Streaks: 51


Episode = 180
t = 160
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.626573
Explore rate: 0.140261
Learning rate: 0.140261
Streaks: 51


Episode = 180
t = 161
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 98.595439
Explore rate: 0.140261
L


Episode = 181
t = 17
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.637025
Explore rate: 0.137869
Learning rate: 0.137869
Streaks: 52


Episode = 181
t = 18
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.638904
Explore rate: 0.137869
Learning rate: 0.137869
Streaks: 52


Episode = 181
t = 19
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.635247
Explore rate: 0.137869
Learning rate: 0.137869
Streaks: 52


Episode = 181
t = 20
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.641899
Explore rate: 0.137869
Learning rate: 0.137869
Streaks: 52


Episode = 181
t = 21
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.641899
Explore rate: 0.137869
Learning rate: 0.137869
Streaks: 52


Episode = 181
t = 22
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.638037
Explore rate: 0.137869
Learning rate: 0.137869
Streaks: 52


Episode = 181
t = 23
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.644859
Explore rate: 0.137869
Learning


Episode = 181
t = 78
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.661860
Explore rate: 0.137869
Learning rate: 0.137869
Streaks: 52


Episode = 181
t = 79
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.663705
Explore rate: 0.137869
Learning rate: 0.137869
Streaks: 52


Episode = 181
t = 80
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 98.615415
Explore rate: 0.137869
Learning rate: 0.137869
Streaks: 52


Episode = 181
t = 81
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.654371
Explore rate: 0.137869
Learning rate: 0.137869
Streaks: 52


Episode = 181
t = 82
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.654371
Explore rate: 0.137869
Learning rate: 0.137869
Streaks: 52


Episode = 181
t = 83
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.656226
Explore rate: 0.137869
Learning rate: 0.137869
Streaks: 52


Episode = 181
t = 84
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 98.649833
Explore rate: 0.137869
Learning


Episode = 181
t = 138
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.670796
Explore rate: 0.137869
Learning rate: 0.137869
Streaks: 52


Episode = 181
t = 139
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.670796
Explore rate: 0.137869
Learning rate: 0.137869
Streaks: 52


Episode = 181
t = 140
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 98.636395
Explore rate: 0.137869
Learning rate: 0.137869
Streaks: 52


Episode = 181
t = 141
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.671641
Explore rate: 0.137869
Learning rate: 0.137869
Streaks: 52


Episode = 181
t = 142
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 98.643086
Explore rate: 0.137869
Learning rate: 0.137869
Streaks: 52


Episode = 181
t = 143
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.671641
Explore rate: 0.137869
Learning rate: 0.137869
Streaks: 52


Episode = 181
t = 144
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.671641
Explore rate: 0.137869
L


Episode = 181
t = 198
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.700060
Explore rate: 0.137869
Learning rate: 0.137869
Streaks: 52


Episode = 181
t = 199
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.694228
Explore rate: 0.137869
Learning rate: 0.137869
Streaks: 52

Episode 181 finished after 199.000000 time steps

Episode = 182
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.684449
Explore rate: 0.137869
Learning rate: 0.137869
Streaks: 53


Episode = 182
t = 1
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.686263
Explore rate: 0.135489
Learning rate: 0.135489
Streaks: 53


Episode = 182
t = 2
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.686263
Explore rate: 0.135489
Learning rate: 0.135489
Streaks: 53


Episode = 182
t = 3
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 98.668585
Explore rate: 0.135489
Learning rate: 0.135489
Streaks: 53


Episode = 182
t = 4
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Bes

Best Q: 98.708340
Explore rate: 0.135489
Learning rate: 0.135489
Streaks: 53


Episode = 182
t = 58
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 98.696754
Explore rate: 0.135489
Learning rate: 0.135489
Streaks: 53


Episode = 182
t = 59
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.710049
Explore rate: 0.135489
Learning rate: 0.135489
Streaks: 53


Episode = 182
t = 60
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 98.681287
Explore rate: 0.135489
Learning rate: 0.135489
Streaks: 53


Episode = 182
t = 61
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.709243
Explore rate: 0.135489
Learning rate: 0.135489
Streaks: 53


Episode = 182
t = 62
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.709243
Explore rate: 0.135489
Learning rate: 0.135489
Streaks: 53


Episode = 182
t = 63
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.710992
Explore rate: 0.135489
Learning rate: 0.135489
Streaks: 53


Episode = 182
t = 64
Action: 1
State: (


Episode = 182
t = 120
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.719782
Explore rate: 0.135489
Learning rate: 0.135489
Streaks: 53


Episode = 182
t = 121
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 98.710168
Explore rate: 0.135489
Learning rate: 0.135489
Streaks: 53


Episode = 182
t = 122
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.719676
Explore rate: 0.135489
Learning rate: 0.135489
Streaks: 53


Episode = 182
t = 123
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.719676
Explore rate: 0.135489
Learning rate: 0.135489
Streaks: 53


Episode = 182
t = 124
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.721411
Explore rate: 0.135489
Learning rate: 0.135489
Streaks: 53


Episode = 182
t = 125
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.720330
Explore rate: 0.135489
Learning rate: 0.135489
Streaks: 53


Episode = 182
t = 126
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 98.695506
Explore rate: 0.135489
L


Episode = 182
t = 181
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.737675
Explore rate: 0.135489
Learning rate: 0.135489
Streaks: 53


Episode = 182
t = 182
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.739883
Explore rate: 0.135489
Learning rate: 0.135489
Streaks: 53


Episode = 182
t = 183
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.739883
Explore rate: 0.135489
Learning rate: 0.135489
Streaks: 53


Episode = 182
t = 184
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.741591
Explore rate: 0.135489
Learning rate: 0.135489
Streaks: 53


Episode = 182
t = 185
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.743296
Explore rate: 0.135489
Learning rate: 0.135489
Streaks: 53


Episode = 182
t = 186
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 98.737357
Explore rate: 0.135489
Learning rate: 0.135489
Streaks: 53


Episode = 182
t = 187
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.739682
Explore rate: 0.135489
L


Episode = 183
t = 40
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.754411
Explore rate: 0.133122
Learning rate: 0.133122
Streaks: 54


Episode = 183
t = 41
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 98.735154
Explore rate: 0.133122
Learning rate: 0.133122
Streaks: 54


Episode = 183
t = 42
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.760433
Explore rate: 0.133122
Learning rate: 0.133122
Streaks: 54


Episode = 183
t = 43
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.760433
Explore rate: 0.133122
Learning rate: 0.133122
Streaks: 54


Episode = 183
t = 44
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.762083
Explore rate: 0.133122
Learning rate: 0.133122
Streaks: 54


Episode = 183
t = 45
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.763731
Explore rate: 0.133122
Learning rate: 0.133122
Streaks: 54


Episode = 183
t = 46
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.756455
Explore rate: 0.133122
Learning


Episode = 183
t = 100
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.775030
Explore rate: 0.133122
Learning rate: 0.133122
Streaks: 54


Episode = 183
t = 101
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.763088
Explore rate: 0.133122
Learning rate: 0.133122
Streaks: 54


Episode = 183
t = 102
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.776501
Explore rate: 0.133122
Learning rate: 0.133122
Streaks: 54


Episode = 183
t = 103
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 98.742085
Explore rate: 0.133122
Learning rate: 0.133122
Streaks: 54


Episode = 183
t = 104
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.769033
Explore rate: 0.133122
Learning rate: 0.133122
Streaks: 54


Episode = 183
t = 105
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.769033
Explore rate: 0.133122
Learning rate: 0.133122
Streaks: 54


Episode = 183
t = 106
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.770672
Explore rate: 0.133122
L


Episode = 183
t = 160
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.789397
Explore rate: 0.133122
Learning rate: 0.133122
Streaks: 54


Episode = 183
t = 161
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.789397
Explore rate: 0.133122
Learning rate: 0.133122
Streaks: 54


Episode = 183
t = 162
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.791009
Explore rate: 0.133122
Learning rate: 0.133122
Streaks: 54


Episode = 183
t = 163
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.786677
Explore rate: 0.133122
Learning rate: 0.133122
Streaks: 54


Episode = 183
t = 164
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.793442
Explore rate: 0.133122
Learning rate: 0.133122
Streaks: 54


Episode = 183
t = 165
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.789184
Explore rate: 0.133122
Learning rate: 0.133122
Streaks: 54


Episode = 183
t = 166
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.786448
Explore rate: 0.133122
L


Episode = 184
t = 21
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.804218
Explore rate: 0.130768
Learning rate: 0.130768
Streaks: 55


Episode = 184
t = 22
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.805781
Explore rate: 0.130768
Learning rate: 0.130768
Streaks: 55


Episode = 184
t = 23
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.801195
Explore rate: 0.130768
Learning rate: 0.130768
Streaks: 55


Episode = 184
t = 24
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.808107
Explore rate: 0.130768
Learning rate: 0.130768
Streaks: 55


Episode = 184
t = 25
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.803657
Explore rate: 0.130768
Learning rate: 0.130768
Streaks: 55


Episode = 184
t = 26
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.798290
Explore rate: 0.130768
Learning rate: 0.130768
Streaks: 55


Episode = 184
t = 27
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.798290
Explore rate: 0.130768
Learning


Episode = 184
t = 81
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.812737
Explore rate: 0.130768
Learning rate: 0.130768
Streaks: 55


Episode = 184
t = 82
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.814290
Explore rate: 0.130768
Learning rate: 0.130768
Streaks: 55


Episode = 184
t = 83
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.823149
Explore rate: 0.130768
Learning rate: 0.130768
Streaks: 55


Episode = 184
t = 84
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.823149
Explore rate: 0.130768
Learning rate: 0.130768
Streaks: 55


Episode = 184
t = 85
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 98.780532
Explore rate: 0.130768
Learning rate: 0.130768
Streaks: 55


Episode = 184
t = 86
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.815840
Explore rate: 0.130768
Learning rate: 0.130768
Streaks: 55


Episode = 184
t = 87
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.815840
Explore rate: 0.130768
Learning

Streaks: 55


Episode = 184
t = 140
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.835253
Explore rate: 0.130768
Learning rate: 0.130768
Streaks: 55


Episode = 184
t = 141
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 98.800241
Explore rate: 0.130768
Learning rate: 0.130768
Streaks: 55


Episode = 184
t = 142
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.834695
Explore rate: 0.130768
Learning rate: 0.130768
Streaks: 55


Episode = 184
t = 143
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.834695
Explore rate: 0.130768
Learning rate: 0.130768
Streaks: 55


Episode = 184
t = 144
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.836219
Explore rate: 0.130768
Learning rate: 0.130768
Streaks: 55


Episode = 184
t = 145
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 98.830314
Explore rate: 0.130768
Learning rate: 0.130768
Streaks: 55


Episode = 184
t = 146
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.838300
Explore rat

Best Q: 98.847223
Explore rate: 0.130768
Learning rate: 0.130768
Streaks: 55


Episode = 184
t = 199
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 98.818378
Explore rate: 0.130768
Learning rate: 0.130768
Streaks: 55

Episode 184 finished after 199.000000 time steps

Episode = 185
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.844997
Explore rate: 0.130768
Learning rate: 0.130768
Streaks: 56


Episode = 185
t = 1
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.836561
Explore rate: 0.128427
Learning rate: 0.128427
Streaks: 56


Episode = 185
t = 2
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.846724
Explore rate: 0.128427
Learning rate: 0.128427
Streaks: 56


Episode = 185
t = 3
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.839347
Explore rate: 0.128427
Learning rate: 0.128427
Streaks: 56


Episode = 185
t = 4
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.847267
Explore rate: 0.128427
Learning rate: 0.128427
Streaks:


Episode = 185
t = 59
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.859380
Explore rate: 0.128427
Learning rate: 0.128427
Streaks: 56


Episode = 185
t = 60
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.860845
Explore rate: 0.128427
Learning rate: 0.128427
Streaks: 56


Episode = 185
t = 61
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 98.825452
Explore rate: 0.128427
Learning rate: 0.128427
Streaks: 56


Episode = 185
t = 62
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.870169
Explore rate: 0.128427
Learning rate: 0.128427
Streaks: 56


Episode = 185
t = 63
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.870169
Explore rate: 0.128427
Learning rate: 0.128427
Streaks: 56


Episode = 185
t = 64
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.871620
Explore rate: 0.128427
Learning rate: 0.128427
Streaks: 56


Episode = 185
t = 65
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 98.863484
Explore rate: 0.128427
Learning


Episode = 185
t = 120
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.871964
Explore rate: 0.128427
Learning rate: 0.128427
Streaks: 56


Episode = 185
t = 121
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.866220
Explore rate: 0.128427
Learning rate: 0.128427
Streaks: 56


Episode = 185
t = 122
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.869199
Explore rate: 0.128427
Learning rate: 0.128427
Streaks: 56


Episode = 185
t = 123
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.869199
Explore rate: 0.128427
Learning rate: 0.128427
Streaks: 56


Episode = 185
t = 124
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.870652
Explore rate: 0.128427
Learning rate: 0.128427
Streaks: 56


Episode = 185
t = 125
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 98.872925
Explore rate: 0.128427
Learning rate: 0.128427
Streaks: 56


Episode = 185
t = 126
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.873945
Explore rate: 0.128427
L


Episode = 185
t = 180
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.875860
Explore rate: 0.128427
Learning rate: 0.128427
Streaks: 56


Episode = 185
t = 181
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.875860
Explore rate: 0.128427
Learning rate: 0.128427
Streaks: 56


Episode = 185
t = 182
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.886591
Explore rate: 0.128427
Learning rate: 0.128427
Streaks: 56


Episode = 185
t = 183
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.886591
Explore rate: 0.128427
Learning rate: 0.128427
Streaks: 56


Episode = 185
t = 184
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.888021
Explore rate: 0.128427
Learning rate: 0.128427
Streaks: 56


Episode = 185
t = 185
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.889449
Explore rate: 0.128427
Learning rate: 0.128427
Streaks: 56


Episode = 185
t = 186
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 98.886994
Explore rate: 0.128427
L


Episode = 186
t = 40
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.900099
Explore rate: 0.126098
Learning rate: 0.126098
Streaks: 57


Episode = 186
t = 41
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.900099
Explore rate: 0.126098
Learning rate: 0.126098
Streaks: 57


Episode = 186
t = 42
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.901486
Explore rate: 0.126098
Learning rate: 0.126098
Streaks: 57


Episode = 186
t = 43
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 98.892350
Explore rate: 0.126098
Learning rate: 0.126098
Streaks: 57


Episode = 186
t = 44
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.897613
Explore rate: 0.126098
Learning rate: 0.126098
Streaks: 57


Episode = 186
t = 45
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.897613
Explore rate: 0.126098
Learning rate: 0.126098
Streaks: 57


Episode = 186
t = 46
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.899003
Explore rate: 0.126098
Learning


Episode = 186
t = 99
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.909439
Explore rate: 0.126098
Learning rate: 0.126098
Streaks: 57


Episode = 186
t = 100
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.909439
Explore rate: 0.126098
Learning rate: 0.126098
Streaks: 57


Episode = 186
t = 101
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.910814
Explore rate: 0.126098
Learning rate: 0.126098
Streaks: 57


Episode = 186
t = 102
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.912188
Explore rate: 0.126098
Learning rate: 0.126098
Streaks: 57


Episode = 186
t = 103
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 98.906497
Explore rate: 0.126098
Learning rate: 0.126098
Streaks: 57


Episode = 186
t = 104
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.914048
Explore rate: 0.126098
Learning rate: 0.126098
Streaks: 57


Episode = 186
t = 105
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 98.908818
Explore rate: 0.126098
Le


Episode = 186
t = 161
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.922389
Explore rate: 0.126098
Learning rate: 0.126098
Streaks: 57


Episode = 186
t = 162
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.922389
Explore rate: 0.126098
Learning rate: 0.126098
Streaks: 57


Episode = 186
t = 163
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.923748
Explore rate: 0.126098
Learning rate: 0.126098
Streaks: 57


Episode = 186
t = 164
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.916479
Explore rate: 0.126098
Learning rate: 0.126098
Streaks: 57


Episode = 186
t = 165
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.925384
Explore rate: 0.126098
Learning rate: 0.126098
Streaks: 57


Episode = 186
t = 166
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.918957
Explore rate: 0.126098
Learning rate: 0.126098
Streaks: 57


Episode = 186
t = 167
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.927463
Explore rate: 0.126098
L


Episode = 187
t = 19
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 98.909109
Explore rate: 0.123782
Learning rate: 0.123782
Streaks: 58


Episode = 187
t = 20
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 98.909109
Explore rate: 0.123782
Learning rate: 0.123782
Streaks: 58


Episode = 187
t = 21
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 98.909109
Explore rate: 0.123782
Learning rate: 0.123782
Streaks: 58


Episode = 187
t = 22
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 98.909109
Explore rate: 0.123782
Learning rate: 0.123782
Streaks: 58


Episode = 187
t = 23
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 98.910459
Explore rate: 0.123782
Learning rate: 0.123782
Streaks: 58


Episode = 187
t = 24
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.937653
Explore rate: 0.123782
Learning rate: 0.123782
Streaks: 58


Episode = 187
t = 25
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.937653
Explore rate: 0.123782
Learning

Learning rate: 0.123782
Streaks: 58


Episode = 187
t = 79
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 98.931268
Explore rate: 0.123782
Learning rate: 0.123782
Streaks: 58


Episode = 187
t = 80
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 98.931268
Explore rate: 0.123782
Learning rate: 0.123782
Streaks: 58


Episode = 187
t = 81
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 98.932591
Explore rate: 0.123782
Learning rate: 0.123782
Streaks: 58


Episode = 187
t = 82
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.959873
Explore rate: 0.123782
Learning rate: 0.123782
Streaks: 58


Episode = 187
t = 83
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 98.938413
Explore rate: 0.123782
Learning rate: 0.123782
Streaks: 58


Episode = 187
t = 84
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.959873
Explore rate: 0.123782
Learning rate: 0.123782
Streaks: 58


Episode = 187
t = 85
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 98.9


Episode = 187
t = 137
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.956259
Explore rate: 0.123782
Learning rate: 0.123782
Streaks: 58


Episode = 187
t = 138
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.956259
Explore rate: 0.123782
Learning rate: 0.123782
Streaks: 58


Episode = 187
t = 139
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.957551
Explore rate: 0.123782
Learning rate: 0.123782
Streaks: 58


Episode = 187
t = 140
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.958841
Explore rate: 0.123782
Learning rate: 0.123782
Streaks: 58


Episode = 187
t = 141
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.958841
Explore rate: 0.123782
Learning rate: 0.123782
Streaks: 58


Episode = 187
t = 142
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 98.952214
Explore rate: 0.123782
Learning rate: 0.123782
Streaks: 58


Episode = 187
t = 143
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.960447
Explore rate: 0.123782
L


Episode = 187
t = 198
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.975490
Explore rate: 0.123782
Learning rate: 0.123782
Streaks: 58


Episode = 187
t = 199
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.975490
Explore rate: 0.123782
Learning rate: 0.123782
Streaks: 58

Episode 187 finished after 199.000000 time steps

Episode = 188
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.979464
Explore rate: 0.123782
Learning rate: 0.123782
Streaks: 59


Episode = 188
t = 1
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.972387
Explore rate: 0.121478
Learning rate: 0.121478
Streaks: 59


Episode = 188
t = 2
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.980963
Explore rate: 0.121478
Learning rate: 0.121478
Streaks: 59


Episode = 188
t = 3
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.974667
Explore rate: 0.121478
Learning rate: 0.121478
Streaks: 59


Episode = 188
t = 4
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Bes


Episode = 188
t = 58
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.992297
Explore rate: 0.121478
Learning rate: 0.121478
Streaks: 59


Episode = 188
t = 59
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 98.989090
Explore rate: 0.121478
Learning rate: 0.121478
Streaks: 59


Episode = 188
t = 60
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 98.993135
Explore rate: 0.121478
Learning rate: 0.121478
Streaks: 59


Episode = 188
t = 61
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 98.943582
Explore rate: 0.121478
Learning rate: 0.121478
Streaks: 59


Episode = 188
t = 62
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.988862
Explore rate: 0.121478
Learning rate: 0.121478
Streaks: 59


Episode = 188
t = 63
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.988862
Explore rate: 0.121478
Learning rate: 0.121478
Streaks: 59


Episode = 188
t = 64
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 98.990091
Explore rate: 0.121478
Learning


Episode = 188
t = 119
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.007417
Explore rate: 0.121478
Learning rate: 0.121478
Streaks: 59


Episode = 188
t = 120
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.007417
Explore rate: 0.121478
Learning rate: 0.121478
Streaks: 59


Episode = 188
t = 121
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.008623
Explore rate: 0.121478
Learning rate: 0.121478
Streaks: 59


Episode = 188
t = 122
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.003159
Explore rate: 0.121478
Learning rate: 0.121478
Streaks: 59


Episode = 188
t = 123
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.010228
Explore rate: 0.121478
Learning rate: 0.121478
Streaks: 59


Episode = 188
t = 124
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.010228
Explore rate: 0.121478
Learning rate: 0.121478
Streaks: 59


Episode = 188
t = 125
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.005220
Explore rate: 0.121478
L


Episode = 188
t = 180
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.015830
Explore rate: 0.121478
Learning rate: 0.121478
Streaks: 59


Episode = 188
t = 181
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.010717
Explore rate: 0.121478
Learning rate: 0.121478
Streaks: 59


Episode = 188
t = 182
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.017461
Explore rate: 0.121478
Learning rate: 0.121478
Streaks: 59


Episode = 188
t = 183
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 98.969844
Explore rate: 0.121478
Learning rate: 0.121478
Streaks: 59


Episode = 188
t = 184
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.011402
Explore rate: 0.121478
Learning rate: 0.121478
Streaks: 59


Episode = 188
t = 185
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.011402
Explore rate: 0.121478
Learning rate: 0.121478
Streaks: 59


Episode = 188
t = 186
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.012603
Explore rate: 0.121478
L


Episode = 189
t = 38
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.024230
Explore rate: 0.119186
Learning rate: 0.119186
Streaks: 60


Episode = 189
t = 39
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.016200
Explore rate: 0.119186
Learning rate: 0.119186
Streaks: 60


Episode = 189
t = 40
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.025470
Explore rate: 0.119186
Learning rate: 0.119186
Streaks: 60


Episode = 189
t = 41
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.004344
Explore rate: 0.119186
Learning rate: 0.119186
Streaks: 60


Episode = 189
t = 42
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.024783
Explore rate: 0.119186
Learning rate: 0.119186
Streaks: 60


Episode = 189
t = 43
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.024783
Explore rate: 0.119186
Learning rate: 0.119186
Streaks: 60


Episode = 189
t = 44
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.025945
Explore rate: 0.119186
Learning


Episode = 189
t = 97
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.025156
Explore rate: 0.119186
Learning rate: 0.119186
Streaks: 60


Episode = 189
t = 98
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.035068
Explore rate: 0.119186
Learning rate: 0.119186
Streaks: 60


Episode = 189
t = 99
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.019012
Explore rate: 0.119186
Learning rate: 0.119186
Streaks: 60


Episode = 189
t = 100
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.038215
Explore rate: 0.119186
Learning rate: 0.119186
Streaks: 60


Episode = 189
t = 101
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.038215
Explore rate: 0.119186
Learning rate: 0.119186
Streaks: 60


Episode = 189
t = 102
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.039362
Explore rate: 0.119186
Learning rate: 0.119186
Streaks: 60


Episode = 189
t = 103
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.028513
Explore rate: 0.119186
Lear

Learning rate: 0.119186
Streaks: 60


Episode = 189
t = 156
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.044428
Explore rate: 0.119186
Learning rate: 0.119186
Streaks: 60


Episode = 189
t = 157
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.044428
Explore rate: 0.119186
Learning rate: 0.119186
Streaks: 60


Episode = 189
t = 158
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.045567
Explore rate: 0.119186
Learning rate: 0.119186
Streaks: 60


Episode = 189
t = 159
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.047744
Explore rate: 0.119186
Learning rate: 0.119186
Streaks: 60


Episode = 189
t = 160
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.047744
Explore rate: 0.119186
Learning rate: 0.119186
Streaks: 60


Episode = 189
t = 161
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.044933
Explore rate: 0.119186
Learning rate: 0.119186
Streaks: 60


Episode = 189
t = 162
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best 


Episode = 190
t = 18
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.055983
Explore rate: 0.116907
Learning rate: 0.116907
Streaks: 61


Episode = 190
t = 19
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.055983
Explore rate: 0.116907
Learning rate: 0.116907
Streaks: 61


Episode = 190
t = 20
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.057087
Explore rate: 0.116907
Learning rate: 0.116907
Streaks: 61


Episode = 190
t = 21
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.058189
Explore rate: 0.116907
Learning rate: 0.116907
Streaks: 61


Episode = 190
t = 22
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.058189
Explore rate: 0.116907
Learning rate: 0.116907
Streaks: 61


Episode = 190
t = 23
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.054033
Explore rate: 0.116907
Learning rate: 0.116907
Streaks: 61


Episode = 190
t = 24
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.059781
Explore rate: 0.116907
Learning


Episode = 190
t = 77
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.070906
Explore rate: 0.116907
Learning rate: 0.116907
Streaks: 61


Episode = 190
t = 78
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.070906
Explore rate: 0.116907
Learning rate: 0.116907
Streaks: 61


Episode = 190
t = 79
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.071992
Explore rate: 0.116907
Learning rate: 0.116907
Streaks: 61


Episode = 190
t = 80
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.017300
Explore rate: 0.116907
Learning rate: 0.116907
Streaks: 61


Episode = 190
t = 81
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.069738
Explore rate: 0.116907
Learning rate: 0.116907
Streaks: 61


Episode = 190
t = 82
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.069738
Explore rate: 0.116907
Learning rate: 0.116907
Streaks: 61


Episode = 190
t = 83
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.070825
Explore rate: 0.116907
Learning


Episode = 190
t = 137
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.074789
Explore rate: 0.116907
Learning rate: 0.116907
Streaks: 61


Episode = 190
t = 138
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.080571
Explore rate: 0.116907
Learning rate: 0.116907
Streaks: 61


Episode = 190
t = 139
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.080571
Explore rate: 0.116907
Learning rate: 0.116907
Streaks: 61


Episode = 190
t = 140
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.081646
Explore rate: 0.116907
Learning rate: 0.116907
Streaks: 61


Episode = 190
t = 141
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.076545
Explore rate: 0.116907
Learning rate: 0.116907
Streaks: 61


Episode = 190
t = 142
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.076545
Explore rate: 0.116907
Learning rate: 0.116907
Streaks: 61


Episode = 190
t = 143
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.076539
Explore rate: 0.116907
L


Episode = 190
t = 197
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.089708
Explore rate: 0.116907
Learning rate: 0.116907
Streaks: 61


Episode = 190
t = 198
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.082390
Explore rate: 0.116907
Learning rate: 0.116907
Streaks: 61


Episode = 190
t = 199
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.079949
Explore rate: 0.116907
Learning rate: 0.116907
Streaks: 61

Episode 190 finished after 199.000000 time steps

Episode = 191
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.089925
Explore rate: 0.116907
Learning rate: 0.116907
Streaks: 62


Episode = 191
t = 1
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.083180
Explore rate: 0.114639
Learning rate: 0.114639
Streaks: 62


Episode = 191
t = 2
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.091145
Explore rate: 0.114639
Learning rate: 0.114639
Streaks: 62


Episode = 191
t = 3
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
B


Episode = 191
t = 59
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.100179
Explore rate: 0.114639
Learning rate: 0.114639
Streaks: 62


Episode = 191
t = 60
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.101210
Explore rate: 0.114639
Learning rate: 0.114639
Streaks: 62


Episode = 191
t = 61
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.094423
Explore rate: 0.114639
Learning rate: 0.114639
Streaks: 62


Episode = 191
t = 62
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.102383
Explore rate: 0.114639
Learning rate: 0.114639
Streaks: 62


Episode = 191
t = 63
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.069568
Explore rate: 0.114639
Learning rate: 0.114639
Streaks: 62


Episode = 191
t = 64
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.093158
Explore rate: 0.114639
Learning rate: 0.114639
Streaks: 62


Episode = 191
t = 65
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.093158
Explore rate: 0.114639
Learning


Episode = 191
t = 118
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.094495
Explore rate: 0.114639
Learning rate: 0.114639
Streaks: 62


Episode = 191
t = 119
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.104448
Explore rate: 0.114639
Learning rate: 0.114639
Streaks: 62


Episode = 191
t = 120
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.078340
Explore rate: 0.114639
Learning rate: 0.114639
Streaks: 62


Episode = 191
t = 121
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.103656
Explore rate: 0.114639
Learning rate: 0.114639
Streaks: 62


Episode = 191
t = 122
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.103656
Explore rate: 0.114639
Learning rate: 0.114639
Streaks: 62


Episode = 191
t = 123
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.104684
Explore rate: 0.114639
Learning rate: 0.114639
Streaks: 62


Episode = 191
t = 124
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.104851
Explore rate: 0.114639
L


Episode = 191
t = 178
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.111299
Explore rate: 0.114639
Learning rate: 0.114639
Streaks: 62


Episode = 191
t = 179
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.118527
Explore rate: 0.114639
Learning rate: 0.114639
Streaks: 62


Episode = 191
t = 180
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.087252
Explore rate: 0.114639
Learning rate: 0.114639
Streaks: 62


Episode = 191
t = 181
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.112651
Explore rate: 0.114639
Learning rate: 0.114639
Streaks: 62


Episode = 191
t = 182
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.112651
Explore rate: 0.114639
Learning rate: 0.114639
Streaks: 62


Episode = 191
t = 183
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.113668
Explore rate: 0.114639
Learning rate: 0.114639
Streaks: 62


Episode = 191
t = 184
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.106760
Explore rate: 0.114639
L


Episode = 192
t = 40
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.119009
Explore rate: 0.112383
Learning rate: 0.112383
Streaks: 63


Episode = 192
t = 41
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.125753
Explore rate: 0.112383
Learning rate: 0.112383
Streaks: 63


Episode = 192
t = 42
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.125753
Explore rate: 0.112383
Learning rate: 0.112383
Streaks: 63


Episode = 192
t = 43
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.126735
Explore rate: 0.112383
Learning rate: 0.112383
Streaks: 63


Episode = 192
t = 44
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.095922
Explore rate: 0.112383
Learning rate: 0.112383
Streaks: 63


Episode = 192
t = 45
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.124233
Explore rate: 0.112383
Learning rate: 0.112383
Streaks: 63


Episode = 192
t = 46
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.124233
Explore rate: 0.112383
Learning


Episode = 192
t = 100
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.136019
Explore rate: 0.112383
Learning rate: 0.112383
Streaks: 63


Episode = 192
t = 101
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.136019
Explore rate: 0.112383
Learning rate: 0.112383
Streaks: 63


Episode = 192
t = 102
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.136990
Explore rate: 0.112383
Learning rate: 0.112383
Streaks: 63


Episode = 192
t = 103
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.130237
Explore rate: 0.112383
Learning rate: 0.112383
Streaks: 63


Episode = 192
t = 104
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.132355
Explore rate: 0.112383
Learning rate: 0.112383
Streaks: 63


Episode = 192
t = 105
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.132355
Explore rate: 0.112383
Learning rate: 0.112383
Streaks: 63


Episode = 192
t = 106
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.133330
Explore rate: 0.112383
L


Episode = 192
t = 161
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.138930
Explore rate: 0.112383
Learning rate: 0.112383
Streaks: 63


Episode = 192
t = 162
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.138930
Explore rate: 0.112383
Learning rate: 0.112383
Streaks: 63


Episode = 192
t = 163
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.139898
Explore rate: 0.112383
Learning rate: 0.112383
Streaks: 63


Episode = 192
t = 164
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.134127
Explore rate: 0.112383
Learning rate: 0.112383
Streaks: 63


Episode = 192
t = 165
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.150955
Explore rate: 0.112383
Learning rate: 0.112383
Streaks: 63


Episode = 192
t = 166
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.150955
Explore rate: 0.112383
Learning rate: 0.112383
Streaks: 63


Episode = 192
t = 167
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.151909
Explore rate: 0.112383
L


Episode = 193
t = 21
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.145293
Explore rate: 0.110138
Learning rate: 0.110138
Streaks: 64


Episode = 193
t = 22
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.151421
Explore rate: 0.110138
Learning rate: 0.110138
Streaks: 64


Episode = 193
t = 23
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.146902
Explore rate: 0.110138
Learning rate: 0.110138
Streaks: 64


Episode = 193
t = 24
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.151863
Explore rate: 0.110138
Learning rate: 0.110138
Streaks: 64


Episode = 193
t = 25
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.148383
Explore rate: 0.110138
Learning rate: 0.110138
Streaks: 64


Episode = 193
t = 26
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.152418
Explore rate: 0.110138
Learning rate: 0.110138
Streaks: 64


Episode = 193
t = 27
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.149761
Explore rate: 0.110138
Learning


Episode = 193
t = 82
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.157276
Explore rate: 0.110138
Learning rate: 0.110138
Streaks: 64


Episode = 193
t = 83
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.163229
Explore rate: 0.110138
Learning rate: 0.110138
Streaks: 64


Episode = 193
t = 84
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.163229
Explore rate: 0.110138
Learning rate: 0.110138
Streaks: 64


Episode = 193
t = 85
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.164150
Explore rate: 0.110138
Learning rate: 0.110138
Streaks: 64


Episode = 193
t = 86
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.165071
Explore rate: 0.110138
Learning rate: 0.110138
Streaks: 64


Episode = 193
t = 87
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.160867
Explore rate: 0.110138
Learning rate: 0.110138
Streaks: 64


Episode = 193
t = 88
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.160867
Explore rate: 0.110138
Learning


Episode = 193
t = 142
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.162756
Explore rate: 0.110138
Learning rate: 0.110138
Streaks: 64


Episode = 193
t = 143
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.166512
Explore rate: 0.110138
Learning rate: 0.110138
Streaks: 64


Episode = 193
t = 144
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.166512
Explore rate: 0.110138
Learning rate: 0.110138
Streaks: 64


Episode = 193
t = 145
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.167430
Explore rate: 0.110138
Learning rate: 0.110138
Streaks: 64


Episode = 193
t = 146
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.164779
Explore rate: 0.110138
Learning rate: 0.110138
Streaks: 64


Episode = 193
t = 147
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.168874
Explore rate: 0.110138
Learning rate: 0.110138
Streaks: 64


Episode = 193
t = 148
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.143348
Explore rate: 0.110138
L


Episode = 194
t = 2
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.177219
Explore rate: 0.107905
Learning rate: 0.107905
Streaks: 65


Episode = 194
t = 3
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.177219
Explore rate: 0.107905
Learning rate: 0.107905
Streaks: 65


Episode = 194
t = 4
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.178107
Explore rate: 0.107905
Learning rate: 0.107905
Streaks: 65


Episode = 194
t = 5
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.175006
Explore rate: 0.107905
Learning rate: 0.107905
Streaks: 65


Episode = 194
t = 6
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.179454
Explore rate: 0.107905
Learning rate: 0.107905
Streaks: 65


Episode = 194
t = 7
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.155566
Explore rate: 0.107905
Learning rate: 0.107905
Streaks: 65


Episode = 194
t = 8
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.179106
Explore rate: 0.107905
Learning rate: 


Episode = 194
t = 62
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.185753
Explore rate: 0.107905
Learning rate: 0.107905
Streaks: 65


Episode = 194
t = 63
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.185753
Explore rate: 0.107905
Learning rate: 0.107905
Streaks: 65


Episode = 194
t = 64
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.186631
Explore rate: 0.107905
Learning rate: 0.107905
Streaks: 65


Episode = 194
t = 65
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.187509
Explore rate: 0.107905
Learning rate: 0.107905
Streaks: 65


Episode = 194
t = 66
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.187509
Explore rate: 0.107905
Learning rate: 0.107905
Streaks: 65


Episode = 194
t = 67
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.165637
Explore rate: 0.107905
Learning rate: 0.107905
Streaks: 65


Episode = 194
t = 68
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.186587
Explore rate: 0.107905
Learning


Episode = 194
t = 123
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.193230
Explore rate: 0.107905
Learning rate: 0.107905
Streaks: 65


Episode = 194
t = 124
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.194100
Explore rate: 0.107905
Learning rate: 0.107905
Streaks: 65


Episode = 194
t = 125
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.189792
Explore rate: 0.107905
Learning rate: 0.107905
Streaks: 65


Episode = 194
t = 126
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.195285
Explore rate: 0.107905
Learning rate: 0.107905
Streaks: 65


Episode = 194
t = 127
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.191916
Explore rate: 0.107905
Learning rate: 0.107905
Streaks: 65


Episode = 194
t = 128
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.191916
Explore rate: 0.107905
Learning rate: 0.107905
Streaks: 65


Episode = 194
t = 129
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.172306
Explore rate: 0.107905
L


Episode = 194
t = 183
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.196791
Explore rate: 0.107905
Learning rate: 0.107905
Streaks: 65


Episode = 194
t = 184
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.203969
Explore rate: 0.107905
Learning rate: 0.107905
Streaks: 65


Episode = 194
t = 185
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.198425
Explore rate: 0.107905
Learning rate: 0.107905
Streaks: 65


Episode = 194
t = 186
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.198718
Explore rate: 0.107905
Learning rate: 0.107905
Streaks: 65


Episode = 194
t = 187
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.198718
Explore rate: 0.107905
Learning rate: 0.107905
Streaks: 65


Episode = 194
t = 188
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.199583
Explore rate: 0.107905
Learning rate: 0.107905
Streaks: 65


Episode = 194
t = 189
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.192926
Explore rate: 0.107905
L


Episode = 195
t = 44
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.210777
Explore rate: 0.105684
Learning rate: 0.105684
Streaks: 66


Episode = 195
t = 45
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.210777
Explore rate: 0.105684
Learning rate: 0.105684
Streaks: 66


Episode = 195
t = 46
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.211611
Explore rate: 0.105684
Learning rate: 0.105684
Streaks: 66


Episode = 195
t = 47
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.204177
Explore rate: 0.105684
Learning rate: 0.105684
Streaks: 66


Episode = 195
t = 48
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.211854
Explore rate: 0.105684
Learning rate: 0.105684
Streaks: 66


Episode = 195
t = 49
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.211854
Explore rate: 0.105684
Learning rate: 0.105684
Streaks: 66


Episode = 195
t = 50
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.212687
Explore rate: 0.105684
Learning

State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.220834
Explore rate: 0.105684
Learning rate: 0.105684
Streaks: 66


Episode = 195
t = 104
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.221657
Explore rate: 0.105684
Learning rate: 0.105684
Streaks: 66


Episode = 195
t = 105
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.216996
Explore rate: 0.105684
Learning rate: 0.105684
Streaks: 66


Episode = 195
t = 106
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.222728
Explore rate: 0.105684
Learning rate: 0.105684
Streaks: 66


Episode = 195
t = 107
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.218424
Explore rate: 0.105684
Learning rate: 0.105684
Streaks: 66


Episode = 195
t = 108
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.223099
Explore rate: 0.105684
Learning rate: 0.105684
Streaks: 66


Episode = 195
t = 109
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.219739
Explore rate: 0.105684
Learning rate: 0.105684
Streaks: 6


Episode = 195
t = 163
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.232031
Explore rate: 0.105684
Learning rate: 0.105684
Streaks: 66


Episode = 195
t = 164
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.199553
Explore rate: 0.105684
Learning rate: 0.105684
Streaks: 66


Episode = 195
t = 165
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.222624
Explore rate: 0.105684
Learning rate: 0.105684
Streaks: 66


Episode = 195
t = 166
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.222624
Explore rate: 0.105684
Learning rate: 0.105684
Streaks: 66


Episode = 195
t = 167
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.223446
Explore rate: 0.105684
Learning rate: 0.105684
Streaks: 66


Episode = 195
t = 168
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.224266
Explore rate: 0.105684
Learning rate: 0.105684
Streaks: 66


Episode = 195
t = 169
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.224266
Explore rate: 0.105684
L


Episode = 196
t = 24
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.234135
Explore rate: 0.103474
Learning rate: 0.103474
Streaks: 67


Episode = 196
t = 25
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.231619
Explore rate: 0.103474
Learning rate: 0.103474
Streaks: 67


Episode = 196
t = 26
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.232504
Explore rate: 0.103474
Learning rate: 0.103474
Streaks: 67


Episode = 196
t = 27
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.232504
Explore rate: 0.103474
Learning rate: 0.103474
Streaks: 67


Episode = 196
t = 28
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.233299
Explore rate: 0.103474
Learning rate: 0.103474
Streaks: 67


Episode = 196
t = 29
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.228173
Explore rate: 0.103474
Learning rate: 0.103474
Streaks: 67


Episode = 196
t = 30
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.235380
Explore rate: 0.103474
Learning

Learning rate: 0.103474
Streaks: 67


Episode = 196
t = 85
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.242381
Explore rate: 0.103474
Learning rate: 0.103474
Streaks: 67


Episode = 196
t = 86
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.247060
Explore rate: 0.103474
Learning rate: 0.103474
Streaks: 67


Episode = 196
t = 87
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.214838
Explore rate: 0.103474
Learning rate: 0.103474
Streaks: 67


Episode = 196
t = 88
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.239316
Explore rate: 0.103474
Learning rate: 0.103474
Streaks: 67


Episode = 196
t = 89
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.239316
Explore rate: 0.103474
Learning rate: 0.103474
Streaks: 67


Episode = 196
t = 90
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.240103
Explore rate: 0.103474
Learning rate: 0.103474
Streaks: 67


Episode = 196
t = 91
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.2


Episode = 196
t = 145
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.246944
Explore rate: 0.103474
Learning rate: 0.103474
Streaks: 67


Episode = 196
t = 146
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.243103
Explore rate: 0.103474
Learning rate: 0.103474
Streaks: 67


Episode = 196
t = 147
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.248028
Explore rate: 0.103474
Learning rate: 0.103474
Streaks: 67


Episode = 196
t = 148
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.244390
Explore rate: 0.103474
Learning rate: 0.103474
Streaks: 67


Episode = 196
t = 149
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.255850
Explore rate: 0.103474
Learning rate: 0.103474
Streaks: 67


Episode = 196
t = 150
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.255850
Explore rate: 0.103474
Learning rate: 0.103474
Streaks: 67


Episode = 196
t = 151
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.249592
Explore rate: 0.103474
L


Episode = 197
t = 6
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.254016
Explore rate: 0.101275
Learning rate: 0.101275
Streaks: 68


Episode = 197
t = 7
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.254016
Explore rate: 0.101275
Learning rate: 0.101275
Streaks: 68


Episode = 197
t = 8
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.254772
Explore rate: 0.101275
Learning rate: 0.101275
Streaks: 68


Episode = 197
t = 9
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.250990
Explore rate: 0.101275
Learning rate: 0.101275
Streaks: 68


Episode = 197
t = 10
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.263269
Explore rate: 0.101275
Learning rate: 0.101275
Streaks: 68


Episode = 197
t = 11
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.263269
Explore rate: 0.101275
Learning rate: 0.101275
Streaks: 68


Episode = 197
t = 12
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.264015
Explore rate: 0.101275
Learning rat


Episode = 197
t = 66
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.261969
Explore rate: 0.101275
Learning rate: 0.101275
Streaks: 68


Episode = 197
t = 67
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.261969
Explore rate: 0.101275
Learning rate: 0.101275
Streaks: 68


Episode = 197
t = 68
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.262716
Explore rate: 0.101275
Learning rate: 0.101275
Streaks: 68


Episode = 197
t = 69
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.259788
Explore rate: 0.101275
Learning rate: 0.101275
Streaks: 68


Episode = 197
t = 70
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.263841
Explore rate: 0.101275
Learning rate: 0.101275
Streaks: 68


Episode = 197
t = 71
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.260944
Explore rate: 0.101275
Learning rate: 0.101275
Streaks: 68


Episode = 197
t = 72
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.264296
Explore rate: 0.101275
Learning


Episode = 197
t = 127
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.247405
Explore rate: 0.101275
Learning rate: 0.101275
Streaks: 68


Episode = 197
t = 128
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.270725
Explore rate: 0.101275
Learning rate: 0.101275
Streaks: 68


Episode = 197
t = 129
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.270725
Explore rate: 0.101275
Learning rate: 0.101275
Streaks: 68


Episode = 197
t = 130
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.271464
Explore rate: 0.101275
Learning rate: 0.101275
Streaks: 68


Episode = 197
t = 131
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.264886
Explore rate: 0.101275
Learning rate: 0.101275
Streaks: 68


Episode = 197
t = 132
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.271008
Explore rate: 0.101275
Learning rate: 0.101275
Streaks: 68


Episode = 197
t = 133
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.271008
Explore rate: 0.101275
L

Learning rate: 0.101275
Streaks: 68


Episode = 197
t = 188
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.280494
Explore rate: 0.101275
Learning rate: 0.101275
Streaks: 68


Episode = 197
t = 189
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.275305
Explore rate: 0.101275
Learning rate: 0.101275
Streaks: 68


Episode = 197
t = 190
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.277662
Explore rate: 0.101275
Learning rate: 0.101275
Streaks: 68


Episode = 197
t = 191
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.277662
Explore rate: 0.101275
Learning rate: 0.101275
Streaks: 68


Episode = 197
t = 192
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.278394
Explore rate: 0.101275
Learning rate: 0.101275
Streaks: 68


Episode = 197
t = 193
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.272779
Explore rate: 0.101275
Learning rate: 0.101275
Streaks: 68


Episode = 197
t = 194
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best 


Episode = 198
t = 49
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.260659
Explore rate: 0.099087
Learning rate: 0.100000
Streaks: 69


Episode = 198
t = 50
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.284224
Explore rate: 0.099087
Learning rate: 0.100000
Streaks: 69


Episode = 198
t = 51
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.284224
Explore rate: 0.099087
Learning rate: 0.100000
Streaks: 69


Episode = 198
t = 52
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.284940
Explore rate: 0.099087
Learning rate: 0.100000
Streaks: 69


Episode = 198
t = 53
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.285655
Explore rate: 0.099087
Learning rate: 0.100000
Streaks: 69


Episode = 198
t = 54
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.285655
Explore rate: 0.099087
Learning rate: 0.100000
Streaks: 69


Episode = 198
t = 55
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.282196
Explore rate: 0.099087
Learning


Episode = 198
t = 110
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.289459
Explore rate: 0.099087
Learning rate: 0.100000
Streaks: 69


Episode = 198
t = 111
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.285703
Explore rate: 0.099087
Learning rate: 0.100000
Streaks: 69


Episode = 198
t = 112
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.290438
Explore rate: 0.099087
Learning rate: 0.100000
Streaks: 69


Episode = 198
t = 113
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.272767
Explore rate: 0.099087
Learning rate: 0.100000
Streaks: 69


Episode = 198
t = 114
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.294097
Explore rate: 0.099087
Learning rate: 0.100000
Streaks: 69


Episode = 198
t = 115
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.294097
Explore rate: 0.099087
Learning rate: 0.100000
Streaks: 69


Episode = 198
t = 116
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.294803
Explore rate: 0.099087
L


Episode = 198
t = 170
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.292768
Explore rate: 0.099087
Learning rate: 0.100000
Streaks: 69


Episode = 198
t = 171
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.297043
Explore rate: 0.099087
Learning rate: 0.100000
Streaks: 69


Episode = 198
t = 172
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.293898
Explore rate: 0.099087
Learning rate: 0.100000
Streaks: 69


Episode = 198
t = 173
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.297435
Explore rate: 0.099087
Learning rate: 0.100000
Streaks: 69


Episode = 198
t = 174
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.297435
Explore rate: 0.099087
Learning rate: 0.100000
Streaks: 69


Episode = 198
t = 175
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.297435
Explore rate: 0.099087
Learning rate: 0.100000
Streaks: 69


Episode = 198
t = 176
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.294954
Explore rate: 0.099087
L


Episode = 199
t = 30
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.304505
Explore rate: 0.096910
Learning rate: 0.100000
Streaks: 70


Episode = 199
t = 31
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.304505
Explore rate: 0.096910
Learning rate: 0.100000
Streaks: 70


Episode = 199
t = 32
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.305200
Explore rate: 0.096910
Learning rate: 0.100000
Streaks: 70


Episode = 199
t = 33
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.301607
Explore rate: 0.096910
Learning rate: 0.100000
Streaks: 70


Episode = 199
t = 34
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.305008
Explore rate: 0.096910
Learning rate: 0.100000
Streaks: 70


Episode = 199
t = 35
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.305008
Explore rate: 0.096910
Learning rate: 0.100000
Streaks: 70


Episode = 199
t = 36
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.305703
Explore rate: 0.096910
Learning


Episode = 199
t = 91
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.311281
Explore rate: 0.096910
Learning rate: 0.100000
Streaks: 70


Episode = 199
t = 92
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.312079
Explore rate: 0.096910
Learning rate: 0.100000
Streaks: 70


Episode = 199
t = 93
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.312079
Explore rate: 0.096910
Learning rate: 0.100000
Streaks: 70


Episode = 199
t = 94
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.312767
Explore rate: 0.096910
Learning rate: 0.100000
Streaks: 70


Episode = 199
t = 95
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.312767
Explore rate: 0.096910
Learning rate: 0.100000
Streaks: 70


Episode = 199
t = 96
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.313454
Explore rate: 0.096910
Learning rate: 0.100000
Streaks: 70


Episode = 199
t = 97
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.294116
Explore rate: 0.096910
Learning

Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.322284
Explore rate: 0.096910
Learning rate: 0.100000
Streaks: 70


Episode = 199
t = 150
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.322284
Explore rate: 0.096910
Learning rate: 0.100000
Streaks: 70


Episode = 199
t = 151
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.322961
Explore rate: 0.096910
Learning rate: 0.100000
Streaks: 70


Episode = 199
t = 152
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.317517
Explore rate: 0.096910
Learning rate: 0.100000
Streaks: 70


Episode = 199
t = 153
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.318794
Explore rate: 0.096910
Learning rate: 0.100000
Streaks: 70


Episode = 199
t = 154
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.318794
Explore rate: 0.096910
Learning rate: 0.100000
Streaks: 70


Episode = 199
t = 155
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.319475
Explore rate: 0.096910
Learning rate: 0.100000


Best Q: 99.322910
Explore rate: 0.094744
Learning rate: 0.100000
Streaks: 71


Episode = 200
t = 8
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.323982
Explore rate: 0.094744
Learning rate: 0.100000
Streaks: 71


Episode = 200
t = 9
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.323982
Explore rate: 0.094744
Learning rate: 0.100000
Streaks: 71


Episode = 200
t = 10
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.324658
Explore rate: 0.094744
Learning rate: 0.100000
Streaks: 71


Episode = 200
t = 11
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.320235
Explore rate: 0.094744
Learning rate: 0.100000
Streaks: 71


Episode = 200
t = 12
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.326946
Explore rate: 0.094744
Learning rate: 0.100000
Streaks: 71


Episode = 200
t = 13
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.326946
Explore rate: 0.094744
Learning rate: 0.100000
Streaks: 71


Episode = 200
t = 14
Action: 1
State: (0,


Episode = 200
t = 66
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.332379
Explore rate: 0.094744
Learning rate: 0.100000
Streaks: 71


Episode = 200
t = 67
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.329231
Explore rate: 0.094744
Learning rate: 0.100000
Streaks: 71


Episode = 200
t = 68
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.335446
Explore rate: 0.094744
Learning rate: 0.100000
Streaks: 71


Episode = 200
t = 69
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.335446
Explore rate: 0.094744
Learning rate: 0.100000
Streaks: 71


Episode = 200
t = 70
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.336110
Explore rate: 0.094744
Learning rate: 0.100000
Streaks: 71


Episode = 200
t = 71
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.329475
Explore rate: 0.094744
Learning rate: 0.100000
Streaks: 71


Episode = 200
t = 72
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.333336
Explore rate: 0.094744
Learning


Episode = 200
t = 128
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.341678
Explore rate: 0.094744
Learning rate: 0.100000
Streaks: 71


Episode = 200
t = 129
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.338900
Explore rate: 0.094744
Learning rate: 0.100000
Streaks: 71


Episode = 200
t = 130
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.342654
Explore rate: 0.094744
Learning rate: 0.100000
Streaks: 71


Episode = 200
t = 131
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.339933
Explore rate: 0.094744
Learning rate: 0.100000
Streaks: 71


Episode = 200
t = 132
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.339878
Explore rate: 0.094744
Learning rate: 0.100000
Streaks: 71


Episode = 200
t = 133
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.339878
Explore rate: 0.094744
Learning rate: 0.100000
Streaks: 71


Episode = 200
t = 134
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.340538
Explore rate: 0.094744
L

Reward: 1.000000
Best Q: 99.348092
Explore rate: 0.094744
Learning rate: 0.100000
Streaks: 71


Episode = 200
t = 188
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.343664
Explore rate: 0.094744
Learning rate: 0.100000
Streaks: 71


Episode = 200
t = 189
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.342818
Explore rate: 0.094744
Learning rate: 0.100000
Streaks: 71


Episode = 200
t = 190
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.342818
Explore rate: 0.094744
Learning rate: 0.100000
Streaks: 71


Episode = 200
t = 191
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.343475
Explore rate: 0.094744
Learning rate: 0.100000
Streaks: 71


Episode = 200
t = 192
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.340170
Explore rate: 0.094744
Learning rate: 0.100000
Streaks: 71


Episode = 200
t = 193
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.344395
Explore rate: 0.094744
Learning rate: 0.100000
Streaks: 71


Episode = 200
t 


Episode = 201
t = 44
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.354330
Explore rate: 0.092589
Learning rate: 0.100000
Streaks: 72


Episode = 201
t = 45
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.354330
Explore rate: 0.092589
Learning rate: 0.100000
Streaks: 72


Episode = 201
t = 46
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.354976
Explore rate: 0.092589
Learning rate: 0.100000
Streaks: 72


Episode = 201
t = 47
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.347446
Explore rate: 0.092589
Learning rate: 0.100000
Streaks: 72


Episode = 201
t = 48
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.345639
Explore rate: 0.092589
Learning rate: 0.100000
Streaks: 72


Episode = 201
t = 49
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.345639
Explore rate: 0.092589
Learning rate: 0.100000
Streaks: 72


Episode = 201
t = 50
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.346293
Explore rate: 0.092589
Learning


Episode = 201
t = 101
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.353136
Explore rate: 0.092589
Learning rate: 0.100000
Streaks: 72


Episode = 201
t = 102
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.353783
Explore rate: 0.092589
Learning rate: 0.100000
Streaks: 72


Episode = 201
t = 103
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.354429
Explore rate: 0.092589
Learning rate: 0.100000
Streaks: 72


Episode = 201
t = 104
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.355074
Explore rate: 0.092589
Learning rate: 0.100000
Streaks: 72


Episode = 201
t = 105
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.355074
Explore rate: 0.092589
Learning rate: 0.100000
Streaks: 72


Episode = 201
t = 106
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.351199
Explore rate: 0.092589
Learning rate: 0.100000
Streaks: 72


Episode = 201
t = 107
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.357715
Explore rate: 0.092589
L


Episode = 201
t = 161
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.364950
Explore rate: 0.092589
Learning rate: 0.100000
Streaks: 72


Episode = 201
t = 162
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.365586
Explore rate: 0.092589
Learning rate: 0.100000
Streaks: 72


Episode = 201
t = 163
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.358475
Explore rate: 0.092589
Learning rate: 0.100000
Streaks: 72


Episode = 201
t = 164
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.360469
Explore rate: 0.092589
Learning rate: 0.100000
Streaks: 72


Episode = 201
t = 165
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.336867
Explore rate: 0.092589
Learning rate: 0.100000
Streaks: 72


Episode = 201
t = 166
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.360469
Explore rate: 0.092589
Learning rate: 0.100000
Streaks: 72


Episode = 201
t = 167
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.360469
Explore rate: 0.092589
L


Episode = 202
t = 19
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.366080
Explore rate: 0.090444
Learning rate: 0.100000
Streaks: 73


Episode = 202
t = 20
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.372105
Explore rate: 0.090444
Learning rate: 0.100000
Streaks: 73


Episode = 202
t = 21
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.339867
Explore rate: 0.090444
Learning rate: 0.100000
Streaks: 73


Episode = 202
t = 22
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.339867
Explore rate: 0.090444
Learning rate: 0.100000
Streaks: 73


Episode = 202
t = 23
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.339867
Explore rate: 0.090444
Learning rate: 0.100000
Streaks: 73


Episode = 202
t = 24
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.368085
Explore rate: 0.090444
Learning rate: 0.100000
Streaks: 73


Episode = 202
t = 25
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.343915
Explore rate: 0.090444
Learning


Episode = 202
t = 78
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.375018
Explore rate: 0.090444
Learning rate: 0.100000
Streaks: 73


Episode = 202
t = 79
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.370628
Explore rate: 0.090444
Learning rate: 0.100000
Streaks: 73


Episode = 202
t = 80
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.373299
Explore rate: 0.090444
Learning rate: 0.100000
Streaks: 73


Episode = 202
t = 81
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.373299
Explore rate: 0.090444
Learning rate: 0.100000
Streaks: 73


Episode = 202
t = 82
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.373926
Explore rate: 0.090444
Learning rate: 0.100000
Streaks: 73


Episode = 202
t = 83
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.371094
Explore rate: 0.090444
Learning rate: 0.100000
Streaks: 73


Episode = 202
t = 84
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.374835
Explore rate: 0.090444
Learning



Episode = 202
t = 144
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.379743
Explore rate: 0.090444
Learning rate: 0.100000
Streaks: 73


Episode = 202
t = 145
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.379743
Explore rate: 0.090444
Learning rate: 0.100000
Streaks: 73


Episode = 202
t = 146
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.380364
Explore rate: 0.090444
Learning rate: 0.100000
Streaks: 73


Episode = 202
t = 147
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.377165
Explore rate: 0.090444
Learning rate: 0.100000
Streaks: 73


Episode = 202
t = 148
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.381224
Explore rate: 0.090444
Learning rate: 0.100000
Streaks: 73


Episode = 202
t = 149
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.378190
Explore rate: 0.090444
Learning rate: 0.100000
Streaks: 73


Episode = 202
t = 150
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.381543
Explore rate: 0.090444


State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.385230
Explore rate: 0.088310
Learning rate: 0.100000
Streaks: 74


Episode = 203
t = 6
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.385844
Explore rate: 0.088310
Learning rate: 0.100000
Streaks: 74


Episode = 203
t = 7
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.373014
Explore rate: 0.088310
Learning rate: 0.100000
Streaks: 74


Episode = 203
t = 8
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.386970
Explore rate: 0.088310
Learning rate: 0.100000
Streaks: 74


Episode = 203
t = 9
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.386970
Explore rate: 0.088310
Learning rate: 0.100000
Streaks: 74


Episode = 203
t = 10
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.387584
Explore rate: 0.088310
Learning rate: 0.100000
Streaks: 74


Episode = 203
t = 11
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.382892
Explore rate: 0.088310
Learning rate: 0.100000
Streaks: 74


Episod


Episode = 203
t = 65
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.386669
Explore rate: 0.088310
Learning rate: 0.100000
Streaks: 74


Episode = 203
t = 66
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.386669
Explore rate: 0.088310
Learning rate: 0.100000
Streaks: 74


Episode = 203
t = 67
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.380556
Explore rate: 0.088310
Learning rate: 0.100000
Streaks: 74


Episode = 203
t = 68
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.392125
Explore rate: 0.088310
Learning rate: 0.100000
Streaks: 74


Episode = 203
t = 69
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.392125
Explore rate: 0.088310
Learning rate: 0.100000
Streaks: 74


Episode = 203
t = 70
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.392732
Explore rate: 0.088310
Learning rate: 0.100000
Streaks: 74


Episode = 203
t = 71
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.393340
Explore rate: 0.088310
Learning


Episode = 203
t = 126
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.399602
Explore rate: 0.088310
Learning rate: 0.100000
Streaks: 74


Episode = 203
t = 127
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.394511
Explore rate: 0.088310
Learning rate: 0.100000
Streaks: 74


Episode = 203
t = 128
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.384340
Explore rate: 0.088310
Learning rate: 0.100000
Streaks: 74


Episode = 203
t = 129
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.384340
Explore rate: 0.088310
Learning rate: 0.100000
Streaks: 74


Episode = 203
t = 130
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.395936
Explore rate: 0.088310
Learning rate: 0.100000
Streaks: 74


Episode = 203
t = 131
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.395936
Explore rate: 0.088310
Learning rate: 0.100000
Streaks: 74


Episode = 203
t = 132
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.396540
Explore rate: 0.088310
L


Episode = 203
t = 186
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.404527
Explore rate: 0.088310
Learning rate: 0.100000
Streaks: 74


Episode = 203
t = 187
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.405123
Explore rate: 0.088310
Learning rate: 0.100000
Streaks: 74


Episode = 203
t = 188
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.401139
Explore rate: 0.088310
Learning rate: 0.100000
Streaks: 74


Episode = 203
t = 189
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.405858
Explore rate: 0.088310
Learning rate: 0.100000
Streaks: 74


Episode = 203
t = 190
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.402205
Explore rate: 0.088310
Learning rate: 0.100000
Streaks: 74


Episode = 203
t = 191
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.404853
Explore rate: 0.088310
Learning rate: 0.100000
Streaks: 74


Episode = 203
t = 192
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.404853
Explore rate: 0.088310
L


Episode = 204
t = 47
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.405906
Explore rate: 0.086186
Learning rate: 0.100000
Streaks: 75


Episode = 204
t = 48
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.410308
Explore rate: 0.086186
Learning rate: 0.100000
Streaks: 75


Episode = 204
t = 49
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.410308
Explore rate: 0.086186
Learning rate: 0.100000
Streaks: 75


Episode = 204
t = 50
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.410898
Explore rate: 0.086186
Learning rate: 0.100000
Streaks: 75


Episode = 204
t = 51
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.408354
Explore rate: 0.086186
Learning rate: 0.100000
Streaks: 75


Episode = 204
t = 52
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.411765
Explore rate: 0.086186
Learning rate: 0.100000
Streaks: 75


Episode = 204
t = 53
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.397309
Explore rate: 0.086186
Learning


Episode = 204
t = 106
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.417326
Explore rate: 0.086186
Learning rate: 0.100000
Streaks: 75


Episode = 204
t = 107
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.417386
Explore rate: 0.086186
Learning rate: 0.100000
Streaks: 75


Episode = 204
t = 108
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.417386
Explore rate: 0.086186
Learning rate: 0.100000
Streaks: 75


Episode = 204
t = 109
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.414450
Explore rate: 0.086186
Learning rate: 0.100000
Streaks: 75


Episode = 204
t = 110
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.417326
Explore rate: 0.086186
Learning rate: 0.100000
Streaks: 75


Episode = 204
t = 111
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.417326
Explore rate: 0.086186
Learning rate: 0.100000
Streaks: 75


Episode = 204
t = 112
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.417909
Explore rate: 0.086186
L


Episode = 204
t = 166
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.423084
Explore rate: 0.086186
Learning rate: 0.100000
Streaks: 75


Episode = 204
t = 167
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.419937
Explore rate: 0.086186
Learning rate: 0.100000
Streaks: 75


Episode = 204
t = 168
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.423869
Explore rate: 0.086186
Learning rate: 0.100000
Streaks: 75


Episode = 204
t = 169
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.423869
Explore rate: 0.086186
Learning rate: 0.100000
Streaks: 75


Episode = 204
t = 170
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.410214
Explore rate: 0.086186
Learning rate: 0.100000
Streaks: 75


Episode = 204
t = 171
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.420827
Explore rate: 0.086186
Learning rate: 0.100000
Streaks: 75


Episode = 204
t = 172
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.420827
Explore rate: 0.086186
L


Episode = 205
t = 25
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.424273
Explore rate: 0.084073
Learning rate: 0.100000
Streaks: 76


Episode = 205
t = 26
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.429653
Explore rate: 0.084073
Learning rate: 0.100000
Streaks: 76


Episode = 205
t = 27
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.413310
Explore rate: 0.084073
Learning rate: 0.100000
Streaks: 76


Episode = 205
t = 28
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.426520
Explore rate: 0.084073
Learning rate: 0.100000
Streaks: 76


Episode = 205
t = 29
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.426520
Explore rate: 0.084073
Learning rate: 0.100000
Streaks: 76


Episode = 205
t = 30
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.427093
Explore rate: 0.084073
Learning rate: 0.100000
Streaks: 76


Episode = 205
t = 31
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.422655
Explore rate: 0.084073
Learning


Episode = 205
t = 84
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.432643
Explore rate: 0.084073
Learning rate: 0.100000
Streaks: 76


Episode = 205
t = 85
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.420727
Explore rate: 0.084073
Learning rate: 0.100000
Streaks: 76


Episode = 205
t = 86
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.432941
Explore rate: 0.084073
Learning rate: 0.100000
Streaks: 76


Episode = 205
t = 87
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.432941
Explore rate: 0.084073
Learning rate: 0.100000
Streaks: 76


Episode = 205
t = 88
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.433508
Explore rate: 0.084073
Learning rate: 0.100000
Streaks: 76


Episode = 205
t = 89
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.433508
Explore rate: 0.084073
Learning rate: 0.100000
Streaks: 76


Episode = 205
t = 90
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.434075
Explore rate: 0.084073
Learning


Episode = 205
t = 145
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.437970
Explore rate: 0.084073
Learning rate: 0.100000
Streaks: 76


Episode = 205
t = 146
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.425745
Explore rate: 0.084073
Learning rate: 0.100000
Streaks: 76


Episode = 205
t = 147
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.437116
Explore rate: 0.084073
Learning rate: 0.100000
Streaks: 76


Episode = 205
t = 148
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.437116
Explore rate: 0.084073
Learning rate: 0.100000
Streaks: 76


Episode = 205
t = 149
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.437679
Explore rate: 0.084073
Learning rate: 0.100000
Streaks: 76


Episode = 205
t = 150
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.434986
Explore rate: 0.084073
Learning rate: 0.100000
Streaks: 76


Episode = 205
t = 151
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.437322
Explore rate: 0.084073
L


Episode = 206
t = 5
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.442658
Explore rate: 0.081970
Learning rate: 0.100000
Streaks: 77


Episode = 206
t = 6
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.443215
Explore rate: 0.081970
Learning rate: 0.100000
Streaks: 77


Episode = 206
t = 7
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.438543
Explore rate: 0.081970
Learning rate: 0.100000
Streaks: 77


Episode = 206
t = 8
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.443810
Explore rate: 0.081970
Learning rate: 0.100000
Streaks: 77


Episode = 206
t = 9
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.423393
Explore rate: 0.081970
Learning rate: 0.100000
Streaks: 77


Episode = 206
t = 10
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.442661
Explore rate: 0.081970
Learning rate: 0.100000
Streaks: 77


Episode = 206
t = 11
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.442661
Explore rate: 0.081970
Learning rate

t = 64
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.443486
Explore rate: 0.081970
Learning rate: 0.100000
Streaks: 77


Episode = 206
t = 65
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.449273
Explore rate: 0.081970
Learning rate: 0.100000
Streaks: 77


Episode = 206
t = 66
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.435424
Explore rate: 0.081970
Learning rate: 0.100000
Streaks: 77


Episode = 206
t = 67
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.450327
Explore rate: 0.081970
Learning rate: 0.100000
Streaks: 77


Episode = 206
t = 68
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.450327
Explore rate: 0.081970
Learning rate: 0.100000
Streaks: 77


Episode = 206
t = 69
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.450877
Explore rate: 0.081970
Learning rate: 0.100000
Streaks: 77


Episode = 206
t = 70
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.447033
Explore rate: 0.081970
Learning rate: 0.100000


Episode = 206
t = 125
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.451470
Explore rate: 0.081970
Learning rate: 0.100000
Streaks: 77


Episode = 206
t = 126
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.455108
Explore rate: 0.081970
Learning rate: 0.100000
Streaks: 77


Episode = 206
t = 127
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.452379
Explore rate: 0.081970
Learning rate: 0.100000
Streaks: 77


Episode = 206
t = 128
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.453265
Explore rate: 0.081970
Learning rate: 0.100000
Streaks: 77


Episode = 206
t = 129
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.453265
Explore rate: 0.081970
Learning rate: 0.100000
Streaks: 77


Episode = 206
t = 130
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.453811
Explore rate: 0.081970
Learning rate: 0.100000
Streaks: 77


Episode = 206
t = 131
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.441271
Explore rate: 0.081970
L



Episode = 206
t = 182
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.459652
Explore rate: 0.081970
Learning rate: 0.100000
Streaks: 77


Episode = 206
t = 183
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.460193
Explore rate: 0.081970
Learning rate: 0.100000
Streaks: 77


Episode = 206
t = 184
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.457661
Explore rate: 0.081970
Learning rate: 0.100000
Streaks: 77


Episode = 206
t = 185
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.460968
Explore rate: 0.081970
Learning rate: 0.100000
Streaks: 77


Episode = 206
t = 186
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.439096
Explore rate: 0.081970
Learning rate: 0.100000
Streaks: 77


Episode = 206
t = 187
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.457737
Explore rate: 0.081970
Learning rate: 0.100000
Streaks: 77


Episode = 206
t = 188
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.457737
Explore rate: 0.081970



Episode = 207
t = 41
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.453129
Explore rate: 0.079877
Learning rate: 0.100000
Streaks: 78


Episode = 207
t = 42
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.461779
Explore rate: 0.079877
Learning rate: 0.100000
Streaks: 78


Episode = 207
t = 43
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.461779
Explore rate: 0.079877
Learning rate: 0.100000
Streaks: 78


Episode = 207
t = 44
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.462317
Explore rate: 0.079877
Learning rate: 0.100000
Streaks: 78


Episode = 207
t = 45
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.462855
Explore rate: 0.079877
Learning rate: 0.100000
Streaks: 78


Episode = 207
t = 46
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.462855
Explore rate: 0.079877
Learning rate: 0.100000
Streaks: 78


Episode = 207
t = 47
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.461732
Explore rate: 0.079877
Learning


Episode = 207
t = 102
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.470561
Explore rate: 0.079877
Learning rate: 0.100000
Streaks: 78


Episode = 207
t = 103
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.466628
Explore rate: 0.079877
Learning rate: 0.100000
Streaks: 78


Episode = 207
t = 104
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.471177
Explore rate: 0.079877
Learning rate: 0.100000
Streaks: 78


Episode = 207
t = 105
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.467611
Explore rate: 0.079877
Learning rate: 0.100000
Streaks: 78


Episode = 207
t = 106
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.467285
Explore rate: 0.079877
Learning rate: 0.100000
Streaks: 78


Episode = 207
t = 107
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.467285
Explore rate: 0.079877
Learning rate: 0.100000
Streaks: 78


Episode = 207
t = 108
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.467817
Explore rate: 0.079877
L


Episode = 207
t = 163
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.473014
Explore rate: 0.079877
Learning rate: 0.100000
Streaks: 78


Episode = 207
t = 164
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.473014
Explore rate: 0.079877
Learning rate: 0.100000
Streaks: 78


Episode = 207
t = 165
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.465238
Explore rate: 0.079877
Learning rate: 0.100000
Streaks: 78


Episode = 207
t = 166
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.465238
Explore rate: 0.079877
Learning rate: 0.100000
Streaks: 78


Episode = 207
t = 167
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.472507
Explore rate: 0.079877
Learning rate: 0.100000
Streaks: 78


Episode = 207
t = 168
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.472507
Explore rate: 0.079877
Learning rate: 0.100000
Streaks: 78


Episode = 207
t = 169
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.473035
Explore rate: 0.079877
L


Episode = 208
t = 22
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.479082
Explore rate: 0.077794
Learning rate: 0.100000
Streaks: 79


Episode = 208
t = 23
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.479082
Explore rate: 0.077794
Learning rate: 0.100000
Streaks: 79


Episode = 208
t = 24
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.479603
Explore rate: 0.077794
Learning rate: 0.100000
Streaks: 79


Episode = 208
t = 25
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.474542
Explore rate: 0.077794
Learning rate: 0.100000
Streaks: 79


Episode = 208
t = 26
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.477277
Explore rate: 0.077794
Learning rate: 0.100000
Streaks: 79


Episode = 208
t = 27
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.477277
Explore rate: 0.077794
Learning rate: 0.100000
Streaks: 79


Episode = 208
t = 28
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.477800
Explore rate: 0.077794
Learning

Learning rate: 0.100000
Streaks: 79


Episode = 208
t = 82
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.482429
Explore rate: 0.077794
Learning rate: 0.100000
Streaks: 79


Episode = 208
t = 83
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.480941
Explore rate: 0.077794
Learning rate: 0.100000
Streaks: 79


Episode = 208
t = 84
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.483265
Explore rate: 0.077794
Learning rate: 0.100000
Streaks: 79


Episode = 208
t = 85
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.481690
Explore rate: 0.077794
Learning rate: 0.100000
Streaks: 79


Episode = 208
t = 86
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.484814
Explore rate: 0.077794
Learning rate: 0.100000
Streaks: 79


Episode = 208
t = 87
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.484814
Explore rate: 0.077794
Learning rate: 0.100000
Streaks: 79


Episode = 208
t = 88
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.4


Episode = 208
t = 141
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.488543
Explore rate: 0.077794
Learning rate: 0.100000
Streaks: 79


Episode = 208
t = 142
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.488543
Explore rate: 0.077794
Learning rate: 0.100000
Streaks: 79


Episode = 208
t = 143
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.489055
Explore rate: 0.077794
Learning rate: 0.100000
Streaks: 79


Episode = 208
t = 144
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.488669
Explore rate: 0.077794
Learning rate: 0.100000
Streaks: 79


Episode = 208
t = 145
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.487808
Explore rate: 0.077794
Learning rate: 0.100000
Streaks: 79


Episode = 208
t = 146
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.487808
Explore rate: 0.077794
Learning rate: 0.100000
Streaks: 79


Episode = 208
t = 147
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.488320
Explore rate: 0.077794
L


Episode = 209
t = 1
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.488036
Explore rate: 0.075721
Learning rate: 0.100000
Streaks: 80


Episode = 209
t = 2
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.492005
Explore rate: 0.075721
Learning rate: 0.100000
Streaks: 80


Episode = 209
t = 3
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.492005
Explore rate: 0.075721
Learning rate: 0.100000
Streaks: 80


Episode = 209
t = 4
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.492513
Explore rate: 0.075721
Learning rate: 0.100000
Streaks: 80


Episode = 209
t = 5
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.475984
Explore rate: 0.075721
Learning rate: 0.100000
Streaks: 80


Episode = 209
t = 6
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.493068
Explore rate: 0.075721
Learning rate: 0.100000
Streaks: 80


Episode = 209
t = 7
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.493068
Explore rate: 0.075721
Learning rate: 


Episode = 209
t = 62
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.496047
Explore rate: 0.075721
Learning rate: 0.100000
Streaks: 80


Episode = 209
t = 63
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.493683
Explore rate: 0.075721
Learning rate: 0.100000
Streaks: 80


Episode = 209
t = 64
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.498261
Explore rate: 0.075721
Learning rate: 0.100000
Streaks: 80


Episode = 209
t = 65
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.498261
Explore rate: 0.075721
Learning rate: 0.100000
Streaks: 80


Episode = 209
t = 66
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.498763
Explore rate: 0.075721
Learning rate: 0.100000
Streaks: 80


Episode = 209
t = 67
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.495342
Explore rate: 0.075721
Learning rate: 0.100000
Streaks: 80


Episode = 209
t = 68
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.499377
Explore rate: 0.075721
Learning


Episode = 209
t = 123
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.503056
Explore rate: 0.075721
Learning rate: 0.100000
Streaks: 80


Episode = 209
t = 124
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.488832
Explore rate: 0.075721
Learning rate: 0.100000
Streaks: 80


Episode = 209
t = 125
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.500948
Explore rate: 0.075721
Learning rate: 0.100000
Streaks: 80


Episode = 209
t = 126
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.500948
Explore rate: 0.075721
Learning rate: 0.100000
Streaks: 80


Episode = 209
t = 127
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.501447
Explore rate: 0.075721
Learning rate: 0.100000
Streaks: 80


Episode = 209
t = 128
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.499624
Explore rate: 0.075721
Learning rate: 0.100000
Streaks: 80


Episode = 209
t = 129
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.502213
Explore rate: 0.075721
L


Episode = 209
t = 184
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.503133
Explore rate: 0.075721
Learning rate: 0.100000
Streaks: 80


Episode = 209
t = 185
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.508140
Explore rate: 0.075721
Learning rate: 0.100000
Streaks: 80


Episode = 209
t = 186
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.504125
Explore rate: 0.075721
Learning rate: 0.100000
Streaks: 80


Episode = 209
t = 187
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.508235
Explore rate: 0.075721
Learning rate: 0.100000
Streaks: 80


Episode = 209
t = 188
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.494937
Explore rate: 0.075721
Learning rate: 0.100000
Streaks: 80


Episode = 209
t = 189
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.505853
Explore rate: 0.075721
Learning rate: 0.100000
Streaks: 80


Episode = 209
t = 190
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.505853
Explore rate: 0.075721
L

Streaks: 81


Episode = 210
t = 46
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.512031
Explore rate: 0.073658
Learning rate: 0.100000
Streaks: 81


Episode = 210
t = 47
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.512031
Explore rate: 0.073658
Learning rate: 0.100000
Streaks: 81


Episode = 210
t = 48
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.512519
Explore rate: 0.073658
Learning rate: 0.100000
Streaks: 81


Episode = 210
t = 49
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.500228
Explore rate: 0.073658
Learning rate: 0.100000
Streaks: 81


Episode = 210
t = 50
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.513197
Explore rate: 0.073658
Learning rate: 0.100000
Streaks: 81


Episode = 210
t = 51
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.513197
Explore rate: 0.073658
Learning rate: 0.100000
Streaks: 81


Episode = 210
t = 52
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.513684
Explore rate: 0.07


Episode = 210
t = 105
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.517737
Explore rate: 0.073658
Learning rate: 0.100000
Streaks: 81


Episode = 210
t = 106
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.517737
Explore rate: 0.073658
Learning rate: 0.100000
Streaks: 81


Episode = 210
t = 107
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.518219
Explore rate: 0.073658
Learning rate: 0.100000
Streaks: 81


Episode = 210
t = 108
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.516599
Explore rate: 0.073658
Learning rate: 0.100000
Streaks: 81


Episode = 210
t = 109
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.518974
Explore rate: 0.073658
Learning rate: 0.100000
Streaks: 81


Episode = 210
t = 110
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.517318
Explore rate: 0.073658
Learning rate: 0.100000
Streaks: 81


Episode = 210
t = 111
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.517383
Explore rate: 0.073658
L


Episode = 210
t = 165
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.515337
Explore rate: 0.073658
Learning rate: 0.100000
Streaks: 81


Episode = 210
t = 166
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.521026
Explore rate: 0.073658
Learning rate: 0.100000
Streaks: 81


Episode = 210
t = 167
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.509258
Explore rate: 0.073658
Learning rate: 0.100000
Streaks: 81


Episode = 210
t = 168
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.509258
Explore rate: 0.073658
Learning rate: 0.100000
Streaks: 81


Episode = 210
t = 169
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.509258
Explore rate: 0.073658
Learning rate: 0.100000
Streaks: 81


Episode = 210
t = 170
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.524063
Explore rate: 0.073658
Learning rate: 0.100000
Streaks: 81


Episode = 210
t = 171
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.524063
Explore rate: 0.073658
L


Episode = 211
t = 26
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.510138
Explore rate: 0.071604
Learning rate: 0.100000
Streaks: 82


Episode = 211
t = 27
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.510138
Explore rate: 0.071604
Learning rate: 0.100000
Streaks: 82


Episode = 211
t = 28
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.523137
Explore rate: 0.071604
Learning rate: 0.100000
Streaks: 82


Episode = 211
t = 29
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.523137
Explore rate: 0.071604
Learning rate: 0.100000
Streaks: 82


Episode = 211
t = 30
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.523613
Explore rate: 0.071604
Learning rate: 0.100000
Streaks: 82


Episode = 211
t = 31
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.521478
Explore rate: 0.071604
Learning rate: 0.100000
Streaks: 82


Episode = 211
t = 32
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.524307
Explore rate: 0.071604
Learning


Episode = 211
t = 87
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.529949
Explore rate: 0.071604
Learning rate: 0.100000
Streaks: 82


Episode = 211
t = 88
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.530915
Explore rate: 0.071604
Learning rate: 0.100000
Streaks: 82


Episode = 211
t = 89
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.530915
Explore rate: 0.071604
Learning rate: 0.100000
Streaks: 82


Episode = 211
t = 90
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.531385
Explore rate: 0.071604
Learning rate: 0.100000
Streaks: 82


Episode = 211
t = 91
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.528935
Explore rate: 0.071604
Learning rate: 0.100000
Streaks: 82


Episode = 211
t = 92
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.532886
Explore rate: 0.071604
Learning rate: 0.100000
Streaks: 82


Episode = 211
t = 93
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.532886
Explore rate: 0.071604
Learning


Episode = 211
t = 147
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.535353
Explore rate: 0.071604
Learning rate: 0.100000
Streaks: 82


Episode = 211
t = 148
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.535817
Explore rate: 0.071604
Learning rate: 0.100000
Streaks: 82


Episode = 211
t = 149
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.535348
Explore rate: 0.071604
Learning rate: 0.100000
Streaks: 82


Episode = 211
t = 150
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.535277
Explore rate: 0.071604
Learning rate: 0.100000
Streaks: 82


Episode = 211
t = 151
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.535277
Explore rate: 0.071604
Learning rate: 0.100000
Streaks: 82


Episode = 211
t = 152
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.535741
Explore rate: 0.071604
Learning rate: 0.100000
Streaks: 82


Episode = 211
t = 153
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.521657
Explore rate: 0.071604
L


Episode = 212
t = 7
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.539193
Explore rate: 0.069560
Learning rate: 0.100000
Streaks: 83


Episode = 212
t = 8
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.542372
Explore rate: 0.069560
Learning rate: 0.100000
Streaks: 83


Episode = 212
t = 9
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.527762
Explore rate: 0.069560
Learning rate: 0.100000
Streaks: 83


Episode = 212
t = 10
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.538988
Explore rate: 0.069560
Learning rate: 0.100000
Streaks: 83


Episode = 212
t = 11
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.538988
Explore rate: 0.069560
Learning rate: 0.100000
Streaks: 83


Episode = 212
t = 12
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.539449
Explore rate: 0.069560
Learning rate: 0.100000
Streaks: 83


Episode = 212
t = 13
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.536796
Explore rate: 0.069560
Learning ra


Episode = 212
t = 67
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.547331
Explore rate: 0.069560
Learning rate: 0.100000
Streaks: 83


Episode = 212
t = 68
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.547784
Explore rate: 0.069560
Learning rate: 0.100000
Streaks: 83


Episode = 212
t = 69
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.543173
Explore rate: 0.069560
Learning rate: 0.100000
Streaks: 83


Episode = 212
t = 70
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.548186
Explore rate: 0.069560
Learning rate: 0.100000
Streaks: 83


Episode = 212
t = 71
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.544126
Explore rate: 0.069560
Learning rate: 0.100000
Streaks: 83


Episode = 212
t = 72
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.548236
Explore rate: 0.069560
Learning rate: 0.100000
Streaks: 83


Episode = 212
t = 73
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.544989
Explore rate: 0.069560
Learning

Streaks: 83


Episode = 212
t = 128
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.549553
Explore rate: 0.069560
Learning rate: 0.100000
Streaks: 83


Episode = 212
t = 129
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.547275
Explore rate: 0.069560
Learning rate: 0.100000
Streaks: 83


Episode = 212
t = 130
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.550184
Explore rate: 0.069560
Learning rate: 0.100000
Streaks: 83


Episode = 212
t = 131
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.533153
Explore rate: 0.069560
Learning rate: 0.100000
Streaks: 83


Episode = 212
t = 132
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.551306
Explore rate: 0.069560
Learning rate: 0.100000
Streaks: 83


Episode = 212
t = 133
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.551306
Explore rate: 0.069560
Learning rate: 0.100000
Streaks: 83


Episode = 212
t = 134
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.551755
Explore rat


Episode = 212
t = 188
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.539245
Explore rate: 0.069560
Learning rate: 0.100000
Streaks: 83


Episode = 212
t = 189
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.552256
Explore rate: 0.069560
Learning rate: 0.100000
Streaks: 83


Episode = 212
t = 190
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.540994
Explore rate: 0.069560
Learning rate: 0.100000
Streaks: 83


Episode = 212
t = 191
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.552256
Explore rate: 0.069560
Learning rate: 0.100000
Streaks: 83


Episode = 212
t = 192
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.552256
Explore rate: 0.069560
Learning rate: 0.100000
Streaks: 83


Episode = 212
t = 193
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.552704
Explore rate: 0.069560
Learning rate: 0.100000
Streaks: 83


Episode = 212
t = 194
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.553152
Explore rate: 0.069560
L

Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.558545
Explore rate: 0.067526
Learning rate: 0.100000
Streaks: 84


Episode = 213
t = 50
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.541881
Explore rate: 0.067526
Learning rate: 0.100000
Streaks: 84


Episode = 213
t = 51
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.557667
Explore rate: 0.067526
Learning rate: 0.100000
Streaks: 84


Episode = 213
t = 52
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.557667
Explore rate: 0.067526
Learning rate: 0.100000
Streaks: 84


Episode = 213
t = 53
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.558109
Explore rate: 0.067526
Learning rate: 0.100000
Streaks: 84


Episode = 213
t = 54
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.558551
Explore rate: 0.067526
Learning rate: 0.100000
Streaks: 84


Episode = 213
t = 55
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.555710
Explore rate: 0.067526
Learning rate: 0.100000
Streak


Episode = 213
t = 109
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.559967
Explore rate: 0.067526
Learning rate: 0.100000
Streaks: 84


Episode = 213
t = 110
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.562272
Explore rate: 0.067526
Learning rate: 0.100000
Streaks: 84


Episode = 213
t = 111
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.562272
Explore rate: 0.067526
Learning rate: 0.100000
Streaks: 84


Episode = 213
t = 112
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.562710
Explore rate: 0.067526
Learning rate: 0.100000
Streaks: 84


Episode = 213
t = 113
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.551074
Explore rate: 0.067526
Learning rate: 0.100000
Streaks: 84


Episode = 213
t = 114
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.562254
Explore rate: 0.067526
Learning rate: 0.100000
Streaks: 84


Episode = 213
t = 115
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.562254
Explore rate: 0.067526
L


Episode = 213
t = 169
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.565511
Explore rate: 0.067526
Learning rate: 0.100000
Streaks: 84


Episode = 213
t = 170
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.562384
Explore rate: 0.067526
Learning rate: 0.100000
Streaks: 84


Episode = 213
t = 171
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.566027
Explore rate: 0.067526
Learning rate: 0.100000
Streaks: 84


Episode = 213
t = 172
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.566027
Explore rate: 0.067526
Learning rate: 0.100000
Streaks: 84


Episode = 213
t = 173
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.566027
Explore rate: 0.067526
Learning rate: 0.100000
Streaks: 84


Episode = 213
t = 174
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.566461
Explore rate: 0.067526
Learning rate: 0.100000
Streaks: 84


Episode = 213
t = 175
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.563183
Explore rate: 0.067526
L

t = 29
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.568312
Explore rate: 0.065502
Learning rate: 0.100000
Streaks: 85


Episode = 214
t = 30
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.570934
Explore rate: 0.065502
Learning rate: 0.100000
Streaks: 85


Episode = 214
t = 31
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.559097
Explore rate: 0.065502
Learning rate: 0.100000
Streaks: 85


Episode = 214
t = 32
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.570533
Explore rate: 0.065502
Learning rate: 0.100000
Streaks: 85


Episode = 214
t = 33
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.570533
Explore rate: 0.065502
Learning rate: 0.100000
Streaks: 85


Episode = 214
t = 34
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.570962
Explore rate: 0.065502
Learning rate: 0.100000
Streaks: 85


Episode = 214
t = 35
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.568751
Explore rate: 0.065502
Learning rate: 0.100000

Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.574339
Explore rate: 0.065502
Learning rate: 0.100000
Streaks: 85


Episode = 214
t = 90
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.574765
Explore rate: 0.065502
Learning rate: 0.100000
Streaks: 85


Episode = 214
t = 91
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.574765
Explore rate: 0.065502
Learning rate: 0.100000
Streaks: 85


Episode = 214
t = 92
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.575190
Explore rate: 0.065502
Learning rate: 0.100000
Streaks: 85


Episode = 214
t = 93
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.571674
Explore rate: 0.065502
Learning rate: 0.100000
Streaks: 85


Episode = 214
t = 94
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.574762
Explore rate: 0.065502
Learning rate: 0.100000
Streaks: 85


Episode = 214
t = 95
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.574762
Explore rate: 0.065502
Learning rate: 0.100000
Streak


Episode = 214
t = 151
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.579996
Explore rate: 0.065502
Learning rate: 0.100000
Streaks: 85


Episode = 214
t = 152
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.580417
Explore rate: 0.065502
Learning rate: 0.100000
Streaks: 85


Episode = 214
t = 153
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.578092
Explore rate: 0.065502
Learning rate: 0.100000
Streaks: 85


Episode = 214
t = 154
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.580984
Explore rate: 0.065502
Learning rate: 0.100000
Streaks: 85


Episode = 214
t = 155
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.578800
Explore rate: 0.065502
Learning rate: 0.100000
Streaks: 85


Episode = 214
t = 156
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.580012
Explore rate: 0.065502
Learning rate: 0.100000
Streaks: 85


Episode = 214
t = 157
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.580012
Explore rate: 0.065502
L


Episode = 215
t = 13
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.581534
Explore rate: 0.063486
Learning rate: 0.100000
Streaks: 86


Episode = 215
t = 14
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.581953
Explore rate: 0.063486
Learning rate: 0.100000
Streaks: 86


Episode = 215
t = 15
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.581472
Explore rate: 0.063486
Learning rate: 0.100000
Streaks: 86


Episode = 215
t = 16
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.585815
Explore rate: 0.063486
Learning rate: 0.100000
Streaks: 86


Episode = 215
t = 17
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.585815
Explore rate: 0.063486
Learning rate: 0.100000
Streaks: 86


Episode = 215
t = 18
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.586229
Explore rate: 0.063486
Learning rate: 0.100000
Streaks: 86


Episode = 215
t = 19
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.572407
Explore rate: 0.063486
Learning

Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.573002
Explore rate: 0.063486
Learning rate: 0.100000
Streaks: 86


Episode = 215
t = 73
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.588285
Explore rate: 0.063486
Learning rate: 0.100000
Streaks: 86


Episode = 215
t = 74
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.588285
Explore rate: 0.063486
Learning rate: 0.100000
Streaks: 86


Episode = 215
t = 75
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.588697
Explore rate: 0.063486
Learning rate: 0.100000
Streaks: 86


Episode = 215
t = 76
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.589108
Explore rate: 0.063486
Learning rate: 0.100000
Streaks: 86


Episode = 215
t = 77
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.584626
Explore rate: 0.063486
Learning rate: 0.100000
Streaks: 86


Episode = 215
t = 78
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.589445
Explore rate: 0.063486
Learning rate: 0.100000
Streak

Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.588815
Explore rate: 0.063486
Learning rate: 0.100000
Streaks: 86


Episode = 215
t = 132
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.590949
Explore rate: 0.063486
Learning rate: 0.100000
Streaks: 86


Episode = 215
t = 133
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.589437
Explore rate: 0.063486
Learning rate: 0.100000
Streaks: 86


Episode = 215
t = 134
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.591208
Explore rate: 0.063486
Learning rate: 0.100000
Streaks: 86


Episode = 215
t = 135
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.591208
Explore rate: 0.063486
Learning rate: 0.100000
Streaks: 86


Episode = 215
t = 136
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.578570
Explore rate: 0.063486
Learning rate: 0.100000
Streaks: 86


Episode = 215
t = 137
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.578570
Explore rate: 0.063486
Learning rate: 0.100000



Episode = 215
t = 190
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.595166
Explore rate: 0.063486
Learning rate: 0.100000
Streaks: 86


Episode = 215
t = 191
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.583594
Explore rate: 0.063486
Learning rate: 0.100000
Streaks: 86


Episode = 215
t = 192
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.595166
Explore rate: 0.063486
Learning rate: 0.100000
Streaks: 86


Episode = 215
t = 193
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.595166
Explore rate: 0.063486
Learning rate: 0.100000
Streaks: 86


Episode = 215
t = 194
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.595571
Explore rate: 0.063486
Learning rate: 0.100000
Streaks: 86


Episode = 215
t = 195
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.595975
Explore rate: 0.063486
Learning rate: 0.100000
Streaks: 86


Episode = 215
t = 196
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.594037
Explore rate: 0.063486
L


Episode = 216
t = 50
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.598728
Explore rate: 0.061480
Learning rate: 0.100000
Streaks: 87


Episode = 216
t = 51
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.587223
Explore rate: 0.061480
Learning rate: 0.100000
Streaks: 87


Episode = 216
t = 52
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.599501
Explore rate: 0.061480
Learning rate: 0.100000
Streaks: 87


Episode = 216
t = 53
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.599501
Explore rate: 0.061480
Learning rate: 0.100000
Streaks: 87


Episode = 216
t = 54
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.599901
Explore rate: 0.061480
Learning rate: 0.100000
Streaks: 87


Episode = 216
t = 55
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.600301
Explore rate: 0.061480
Learning rate: 0.100000
Streaks: 87


Episode = 216
t = 56
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.598960
Explore rate: 0.061480
Learning


Episode = 216
t = 108
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.598627
Explore rate: 0.061480
Learning rate: 0.100000
Streaks: 87


Episode = 216
t = 109
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.601430
Explore rate: 0.061480
Learning rate: 0.100000
Streaks: 87


Episode = 216
t = 110
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.601430
Explore rate: 0.061480
Learning rate: 0.100000
Streaks: 87


Episode = 216
t = 111
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.599306
Explore rate: 0.061480
Learning rate: 0.100000
Streaks: 87


Episode = 216
t = 112
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.601977
Explore rate: 0.061480
Learning rate: 0.100000
Streaks: 87


Episode = 216
t = 113
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.599971
Explore rate: 0.061480
Learning rate: 0.100000
Streaks: 87


Episode = 216
t = 114
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.602887
Explore rate: 0.061480
L



Episode = 216
t = 167
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.605747
Explore rate: 0.061480
Learning rate: 0.100000
Streaks: 87


Episode = 216
t = 168
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.596571
Explore rate: 0.061480
Learning rate: 0.100000
Streaks: 87


Episode = 216
t = 169
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.606711
Explore rate: 0.061480
Learning rate: 0.100000
Streaks: 87


Episode = 216
t = 170
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.606711
Explore rate: 0.061480
Learning rate: 0.100000
Streaks: 87


Episode = 216
t = 171
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.607104
Explore rate: 0.061480
Learning rate: 0.100000
Streaks: 87


Episode = 216
t = 172
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.604383
Explore rate: 0.061480
Learning rate: 0.100000
Streaks: 87


Episode = 216
t = 173
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.605233
Explore rate: 0.061480



Episode = 217
t = 28
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.609736
Explore rate: 0.059484
Learning rate: 0.100000
Streaks: 88


Episode = 217
t = 29
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.610127
Explore rate: 0.059484
Learning rate: 0.100000
Streaks: 88


Episode = 217
t = 30
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.610517
Explore rate: 0.059484
Learning rate: 0.100000
Streaks: 88


Episode = 217
t = 31
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.607075
Explore rate: 0.059484
Learning rate: 0.100000
Streaks: 88


Episode = 217
t = 32
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.612681
Explore rate: 0.059484
Learning rate: 0.100000
Streaks: 88


Episode = 217
t = 33
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.612681
Explore rate: 0.059484
Learning rate: 0.100000
Streaks: 88


Episode = 217
t = 34
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.613068
Explore rate: 0.059484
Learning


Episode = 217
t = 88
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.613182
Explore rate: 0.059484
Learning rate: 0.100000
Streaks: 88


Episode = 217
t = 89
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.613569
Explore rate: 0.059484
Learning rate: 0.100000
Streaks: 88


Episode = 217
t = 90
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.611211
Explore rate: 0.059484
Learning rate: 0.100000
Streaks: 88


Episode = 217
t = 91
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.614070
Explore rate: 0.059484
Learning rate: 0.100000
Streaks: 88


Episode = 217
t = 92
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.614070
Explore rate: 0.059484
Learning rate: 0.100000
Streaks: 88


Episode = 217
t = 93
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.611883
Explore rate: 0.059484
Learning rate: 0.100000
Streaks: 88


Episode = 217
t = 94
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.614867
Explore rate: 0.059484
Learning

Learning rate: 0.100000
Streaks: 88


Episode = 217
t = 149
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.615728
Explore rate: 0.059484
Learning rate: 0.100000
Streaks: 88


Episode = 217
t = 150
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.617959
Explore rate: 0.059484
Learning rate: 0.100000
Streaks: 88


Episode = 217
t = 151
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.617959
Explore rate: 0.059484
Learning rate: 0.100000
Streaks: 88


Episode = 217
t = 152
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.618341
Explore rate: 0.059484
Learning rate: 0.100000
Streaks: 88


Episode = 217
t = 153
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.606253
Explore rate: 0.059484
Learning rate: 0.100000
Streaks: 88


Episode = 217
t = 154
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.617623
Explore rate: 0.059484
Learning rate: 0.100000
Streaks: 88


Episode = 217
t = 155
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best 

Streaks: 89


Episode = 218
t = 9
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.619645
Explore rate: 0.057496
Learning rate: 0.100000
Streaks: 89


Episode = 218
t = 10
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.621319
Explore rate: 0.057496
Learning rate: 0.100000
Streaks: 89


Episode = 218
t = 11
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.621319
Explore rate: 0.057496
Learning rate: 0.100000
Streaks: 89


Episode = 218
t = 12
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.621697
Explore rate: 0.057496
Learning rate: 0.100000
Streaks: 89


Episode = 218
t = 13
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.619125
Explore rate: 0.057496
Learning rate: 0.100000
Streaks: 89


Episode = 218
t = 14
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.609387
Explore rate: 0.057496
Learning rate: 0.100000
Streaks: 89


Episode = 218
t = 15
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.609387
Explore rate: 0.057


Episode = 218
t = 68
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.624518
Explore rate: 0.057496
Learning rate: 0.100000
Streaks: 89


Episode = 218
t = 69
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.625780
Explore rate: 0.057496
Learning rate: 0.100000
Streaks: 89


Episode = 218
t = 70
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.625780
Explore rate: 0.057496
Learning rate: 0.100000
Streaks: 89


Episode = 218
t = 71
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.625780
Explore rate: 0.057496
Learning rate: 0.100000
Streaks: 89


Episode = 218
t = 72
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.612771
Explore rate: 0.057496
Learning rate: 0.100000
Streaks: 89


Episode = 218
t = 73
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.622997
Explore rate: 0.057496
Learning rate: 0.100000
Streaks: 89


Episode = 218
t = 74
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.622997
Explore rate: 0.057496
Learning


Episode = 218
t = 126
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.629087
Explore rate: 0.057496
Learning rate: 0.100000
Streaks: 89


Episode = 218
t = 127
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.627067
Explore rate: 0.057496
Learning rate: 0.100000
Streaks: 89


Episode = 218
t = 128
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.629592
Explore rate: 0.057496
Learning rate: 0.100000
Streaks: 89


Episode = 218
t = 129
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.627690
Explore rate: 0.057496
Learning rate: 0.100000
Streaks: 89


Episode = 218
t = 130
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.627605
Explore rate: 0.057496
Learning rate: 0.100000
Streaks: 89


Episode = 218
t = 131
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.627605
Explore rate: 0.057496
Learning rate: 0.100000
Streaks: 89


Episode = 218
t = 132
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.627977
Explore rate: 0.057496
L


Episode = 218
t = 184
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.620485
Explore rate: 0.057496
Learning rate: 0.100000
Streaks: 89


Episode = 218
t = 185
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.631356
Explore rate: 0.057496
Learning rate: 0.100000
Streaks: 89


Episode = 218
t = 186
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.631356
Explore rate: 0.057496
Learning rate: 0.100000
Streaks: 89


Episode = 218
t = 187
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.631725
Explore rate: 0.057496
Learning rate: 0.100000
Streaks: 89


Episode = 218
t = 188
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.629804
Explore rate: 0.057496
Learning rate: 0.100000
Streaks: 89


Episode = 218
t = 189
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.632057
Explore rate: 0.057496
Learning rate: 0.100000
Streaks: 89


Episode = 218
t = 190
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.632057
Explore rate: 0.057496
L

Learning rate: 0.100000
Streaks: 90


Episode = 219
t = 43
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.634245
Explore rate: 0.055517
Learning rate: 0.100000
Streaks: 90


Episode = 219
t = 44
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.636916
Explore rate: 0.055517
Learning rate: 0.100000
Streaks: 90


Episode = 219
t = 45
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.636916
Explore rate: 0.055517
Learning rate: 0.100000
Streaks: 90


Episode = 219
t = 46
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.637279
Explore rate: 0.055517
Learning rate: 0.100000
Streaks: 90


Episode = 219
t = 47
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.620636
Explore rate: 0.055517
Learning rate: 0.100000
Streaks: 90


Episode = 219
t = 48
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.636593
Explore rate: 0.055517
Learning rate: 0.100000
Streaks: 90


Episode = 219
t = 49
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.6


Episode = 219
t = 103
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.639111
Explore rate: 0.055517
Learning rate: 0.100000
Streaks: 90


Episode = 219
t = 104
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.639111
Explore rate: 0.055517
Learning rate: 0.100000
Streaks: 90


Episode = 219
t = 105
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.639472
Explore rate: 0.055517
Learning rate: 0.100000
Streaks: 90


Episode = 219
t = 106
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.638915
Explore rate: 0.055517
Learning rate: 0.100000
Streaks: 90


Episode = 219
t = 107
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.640102
Explore rate: 0.055517
Learning rate: 0.100000
Streaks: 90


Episode = 219
t = 108
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.639394
Explore rate: 0.055517
Learning rate: 0.100000
Streaks: 90


Episode = 219
t = 109
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.636479
Explore rate: 0.055517
L


Episode = 219
t = 163
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.639529
Explore rate: 0.055517
Learning rate: 0.100000
Streaks: 90


Episode = 219
t = 164
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.639890
Explore rate: 0.055517
Learning rate: 0.100000
Streaks: 90


Episode = 219
t = 165
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.640250
Explore rate: 0.055517
Learning rate: 0.100000
Streaks: 90


Episode = 219
t = 166
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.638558
Explore rate: 0.055517
Learning rate: 0.100000
Streaks: 90


Episode = 219
t = 167
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.640766
Explore rate: 0.055517
Learning rate: 0.100000
Streaks: 90


Episode = 219
t = 168
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.631812
Explore rate: 0.055517
Learning rate: 0.100000
Streaks: 90


Episode = 219
t = 169
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.643935
Explore rate: 0.055517
L

Explore rate: 0.053548
Learning rate: 0.100000
Streaks: 91


Episode = 220
t = 21
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.641731
Explore rate: 0.053548
Learning rate: 0.100000
Streaks: 91


Episode = 220
t = 22
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.643473
Explore rate: 0.053548
Learning rate: 0.100000
Streaks: 91


Episode = 220
t = 23
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.636206
Explore rate: 0.053548
Learning rate: 0.100000
Streaks: 91


Episode = 220
t = 24
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.636206
Explore rate: 0.053548
Learning rate: 0.100000
Streaks: 91


Episode = 220
t = 25
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.636206
Explore rate: 0.053548
Learning rate: 0.100000
Streaks: 91


Episode = 220
t = 26
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.645333
Explore rate: 0.053548
Learning rate: 0.100000
Streaks: 91


Episode = 220
t = 27
Action: 0
State: (0, 0, 2, 1)
Reward


Episode = 220
t = 79
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.639097
Explore rate: 0.053548
Learning rate: 0.100000
Streaks: 91


Episode = 220
t = 80
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.650109
Explore rate: 0.053548
Learning rate: 0.100000
Streaks: 91


Episode = 220
t = 81
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.650109
Explore rate: 0.053548
Learning rate: 0.100000
Streaks: 91


Episode = 220
t = 82
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.650459
Explore rate: 0.053548
Learning rate: 0.100000
Streaks: 91


Episode = 220
t = 83
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.650809
Explore rate: 0.053548
Learning rate: 0.100000
Streaks: 91


Episode = 220
t = 84
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.648889
Explore rate: 0.053548
Learning rate: 0.100000
Streaks: 91


Episode = 220
t = 85
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.651282
Explore rate: 0.053548
Learning

t = 138
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.652089
Explore rate: 0.053548
Learning rate: 0.100000
Streaks: 91


Episode = 220
t = 139
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.652089
Explore rate: 0.053548
Learning rate: 0.100000
Streaks: 91


Episode = 220
t = 140
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.652437
Explore rate: 0.053548
Learning rate: 0.100000
Streaks: 91


Episode = 220
t = 141
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.651111
Explore rate: 0.053548
Learning rate: 0.100000
Streaks: 91


Episode = 220
t = 142
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.652966
Explore rate: 0.053548
Learning rate: 0.100000
Streaks: 91


Episode = 220
t = 143
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.641266
Explore rate: 0.053548
Learning rate: 0.100000
Streaks: 91


Episode = 220
t = 144
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.650388
Explore rate: 0.053548
Learning rate: 0

Explore rate: 0.053548
Learning rate: 0.100000
Streaks: 91

Episode 220 finished after 199.000000 time steps

Episode = 221
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.655296
Explore rate: 0.053548
Learning rate: 0.100000
Streaks: 92


Episode = 221
t = 1
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.652955
Explore rate: 0.051587
Learning rate: 0.100000
Streaks: 92


Episode = 221
t = 2
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.654992
Explore rate: 0.051587
Learning rate: 0.100000
Streaks: 92


Episode = 221
t = 3
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.654992
Explore rate: 0.051587
Learning rate: 0.100000
Streaks: 92


Episode = 221
t = 4
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.655337
Explore rate: 0.051587
Learning rate: 0.100000
Streaks: 92


Episode = 221
t = 5
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.653414
Explore rate: 0.051587
Learning rate: 0.100000
Streaks: 92


Episode = 221



Episode = 221
t = 58
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.659128
Explore rate: 0.051587
Learning rate: 0.100000
Streaks: 92


Episode = 221
t = 59
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.657054
Explore rate: 0.051587
Learning rate: 0.100000
Streaks: 92


Episode = 221
t = 60
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.659214
Explore rate: 0.051587
Learning rate: 0.100000
Streaks: 92


Episode = 221
t = 61
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.659214
Explore rate: 0.051587
Learning rate: 0.100000
Streaks: 92


Episode = 221
t = 62
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.659555
Explore rate: 0.051587
Learning rate: 0.100000
Streaks: 92


Episode = 221
t = 63
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.657294
Explore rate: 0.051587
Learning rate: 0.100000
Streaks: 92


Episode = 221
t = 64
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.659570
Explore rate: 0.051587
Learning

Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.660085
Explore rate: 0.051587
Learning rate: 0.100000
Streaks: 92


Episode = 221
t = 118
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.661459
Explore rate: 0.051587
Learning rate: 0.100000
Streaks: 92


Episode = 221
t = 119
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.661459
Explore rate: 0.051587
Learning rate: 0.100000
Streaks: 92


Episode = 221
t = 120
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.661798
Explore rate: 0.051587
Learning rate: 0.100000
Streaks: 92


Episode = 221
t = 121
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.661279
Explore rate: 0.051587
Learning rate: 0.100000
Streaks: 92


Episode = 221
t = 122
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.662389
Explore rate: 0.051587
Learning rate: 0.100000
Streaks: 92


Episode = 221
t = 123
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.652419
Explore rate: 0.051587
Learning rate: 0.100000



Episode = 221
t = 177
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.666786
Explore rate: 0.051587
Learning rate: 0.100000
Streaks: 92


Episode = 221
t = 178
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.665349
Explore rate: 0.051587
Learning rate: 0.100000
Streaks: 92


Episode = 221
t = 179
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.663956
Explore rate: 0.051587
Learning rate: 0.100000
Streaks: 92


Episode = 221
t = 180
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.663956
Explore rate: 0.051587
Learning rate: 0.100000
Streaks: 92


Episode = 221
t = 181
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.664292
Explore rate: 0.051587
Learning rate: 0.100000
Streaks: 92


Episode = 221
t = 182
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.661692
Explore rate: 0.051587
Learning rate: 0.100000
Streaks: 92


Episode = 221
t = 183
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.664672
Explore rate: 0.051587
L

Reward: 1.000000
Best Q: 99.671100
Explore rate: 0.049635
Learning rate: 0.100000
Streaks: 93


Episode = 222
t = 37
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.671429
Explore rate: 0.049635
Learning rate: 0.100000
Streaks: 93


Episode = 222
t = 38
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.668293
Explore rate: 0.049635
Learning rate: 0.100000
Streaks: 93


Episode = 222
t = 39
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.671742
Explore rate: 0.049635
Learning rate: 0.100000
Streaks: 93


Episode = 222
t = 40
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.656410
Explore rate: 0.049635
Learning rate: 0.100000
Streaks: 93


Episode = 222
t = 41
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.667276
Explore rate: 0.049635
Learning rate: 0.100000
Streaks: 93


Episode = 222
t = 42
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.667276
Explore rate: 0.049635
Learning rate: 0.100000
Streaks: 93


Episode = 222
t = 43
A


Episode = 222
t = 97
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.673508
Explore rate: 0.049635
Learning rate: 0.100000
Streaks: 93


Episode = 222
t = 98
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.673835
Explore rate: 0.049635
Learning rate: 0.100000
Streaks: 93


Episode = 222
t = 99
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.670572
Explore rate: 0.049635
Learning rate: 0.100000
Streaks: 93


Episode = 222
t = 100
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.674131
Explore rate: 0.049635
Learning rate: 0.100000
Streaks: 93


Episode = 222
t = 101
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.659259
Explore rate: 0.049635
Learning rate: 0.100000
Streaks: 93


Episode = 222
t = 102
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.672542
Explore rate: 0.049635
Learning rate: 0.100000
Streaks: 93


Episode = 222
t = 103
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.672542
Explore rate: 0.049635
Lear

Learning rate: 0.100000
Streaks: 93


Episode = 222
t = 159
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.675791
Explore rate: 0.049635
Learning rate: 0.100000
Streaks: 93


Episode = 222
t = 160
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.676115
Explore rate: 0.049635
Learning rate: 0.100000
Streaks: 93


Episode = 222
t = 161
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.676439
Explore rate: 0.049635
Learning rate: 0.100000
Streaks: 93


Episode = 222
t = 162
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.674347
Explore rate: 0.049635
Learning rate: 0.100000
Streaks: 93


Episode = 222
t = 163
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.674347
Explore rate: 0.049635
Learning rate: 0.100000
Streaks: 93


Episode = 222
t = 164
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.662519
Explore rate: 0.049635
Learning rate: 0.100000
Streaks: 93


Episode = 222
t = 165
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best 


Episode = 223
t = 18
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.678916
Explore rate: 0.047692
Learning rate: 0.100000
Streaks: 94


Episode = 223
t = 19
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.678916
Explore rate: 0.047692
Learning rate: 0.100000
Streaks: 94


Episode = 223
t = 20
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.679237
Explore rate: 0.047692
Learning rate: 0.100000
Streaks: 94


Episode = 223
t = 21
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.677169
Explore rate: 0.047692
Learning rate: 0.100000
Streaks: 94


Episode = 223
t = 22
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.678228
Explore rate: 0.047692
Learning rate: 0.100000
Streaks: 94


Episode = 223
t = 23
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.666047
Explore rate: 0.047692
Learning rate: 0.100000
Streaks: 94


Episode = 223
t = 24
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.678228
Explore rate: 0.047692
Learning


Episode = 223
t = 77
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.682902
Explore rate: 0.047692
Learning rate: 0.100000
Streaks: 94


Episode = 223
t = 78
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.683219
Explore rate: 0.047692
Learning rate: 0.100000
Streaks: 94


Episode = 223
t = 79
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.681520
Explore rate: 0.047692
Learning rate: 0.100000
Streaks: 94


Episode = 223
t = 80
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.683652
Explore rate: 0.047692
Learning rate: 0.100000
Streaks: 94


Episode = 223
t = 81
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.682050
Explore rate: 0.047692
Learning rate: 0.100000
Streaks: 94


Episode = 223
t = 82
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.682167
Explore rate: 0.047692
Learning rate: 0.100000
Streaks: 94


Episode = 223
t = 83
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.682167
Explore rate: 0.047692
Learning


Episode = 223
t = 138
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.685584
Explore rate: 0.047692
Learning rate: 0.100000
Streaks: 94


Episode = 223
t = 139
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.673723
Explore rate: 0.047692
Learning rate: 0.100000
Streaks: 94


Episode = 223
t = 140
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.683656
Explore rate: 0.047692
Learning rate: 0.100000
Streaks: 94


Episode = 223
t = 141
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.683656
Explore rate: 0.047692
Learning rate: 0.100000
Streaks: 94


Episode = 223
t = 142
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.683972
Explore rate: 0.047692
Learning rate: 0.100000
Streaks: 94


Episode = 223
t = 143
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.684307
Explore rate: 0.047692
Learning rate: 0.100000
Streaks: 94


Episode = 223
t = 144
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.684606
Explore rate: 0.047692
L


Episode = 223
t = 198
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.688324
Explore rate: 0.047692
Learning rate: 0.100000
Streaks: 94


Episode = 223
t = 199
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.688324
Explore rate: 0.047692
Learning rate: 0.100000
Streaks: 94

Episode 223 finished after 199.000000 time steps

Episode = 224
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.688636
Explore rate: 0.047692
Learning rate: 0.100000
Streaks: 95


Episode = 224
t = 1
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.687679
Explore rate: 0.045757
Learning rate: 0.100000
Streaks: 95


Episode = 224
t = 2
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.689132
Explore rate: 0.045757
Learning rate: 0.100000
Streaks: 95


Episode = 224
t = 3
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.688136
Explore rate: 0.045757
Learning rate: 0.100000
Streaks: 95


Episode = 224
t = 4
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Bes


Episode = 224
t = 59
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.689340
Explore rate: 0.045757
Learning rate: 0.100000
Streaks: 95


Episode = 224
t = 60
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.692982
Explore rate: 0.045757
Learning rate: 0.100000
Streaks: 95


Episode = 224
t = 61
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.692982
Explore rate: 0.045757
Learning rate: 0.100000
Streaks: 95


Episode = 224
t = 62
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.693289
Explore rate: 0.045757
Learning rate: 0.100000
Streaks: 95


Episode = 224
t = 63
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.691453
Explore rate: 0.045757
Learning rate: 0.100000
Streaks: 95


Episode = 224
t = 64
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.693690
Explore rate: 0.045757
Learning rate: 0.100000
Streaks: 95


Episode = 224
t = 65
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.678348
Explore rate: 0.045757
Learning


Episode = 224
t = 120
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.694674
Explore rate: 0.045757
Learning rate: 0.100000
Streaks: 95


Episode = 224
t = 121
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.694134
Explore rate: 0.045757
Learning rate: 0.100000
Streaks: 95


Episode = 224
t = 122
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.694134
Explore rate: 0.045757
Learning rate: 0.100000
Streaks: 95


Episode = 224
t = 123
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.694440
Explore rate: 0.045757
Learning rate: 0.100000
Streaks: 95


Episode = 224
t = 124
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.692979
Explore rate: 0.045757
Learning rate: 0.100000
Streaks: 95


Episode = 224
t = 125
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.694876
Explore rate: 0.045757
Learning rate: 0.100000
Streaks: 95


Episode = 224
t = 126
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.693474
Explore rate: 0.045757
L


Episode = 224
t = 179
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.697080
Explore rate: 0.045757
Learning rate: 0.100000
Streaks: 95


Episode = 224
t = 180
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.697080
Explore rate: 0.045757
Learning rate: 0.100000
Streaks: 95


Episode = 224
t = 181
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.697383
Explore rate: 0.045757
Learning rate: 0.100000
Streaks: 95


Episode = 224
t = 182
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.696661
Explore rate: 0.045757
Learning rate: 0.100000
Streaks: 95


Episode = 224
t = 183
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.697887
Explore rate: 0.045757
Learning rate: 0.100000
Streaks: 95


Episode = 224
t = 184
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.684222
Explore rate: 0.045757
Learning rate: 0.100000
Streaks: 95


Episode = 224
t = 185
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.698695
Explore rate: 0.045757
L


Episode = 225
t = 39
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.699553
Explore rate: 0.043832
Learning rate: 0.100000
Streaks: 96


Episode = 225
t = 40
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.700682
Explore rate: 0.043832
Learning rate: 0.100000
Streaks: 96


Episode = 225
t = 41
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.689218
Explore rate: 0.043832
Learning rate: 0.100000
Streaks: 96


Episode = 225
t = 42
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.701044
Explore rate: 0.043832
Learning rate: 0.100000
Streaks: 96


Episode = 225
t = 43
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.701044
Explore rate: 0.043832
Learning rate: 0.100000
Streaks: 96


Episode = 225
t = 44
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.701343
Explore rate: 0.043832
Learning rate: 0.100000
Streaks: 96


Episode = 225
t = 45
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.698791
Explore rate: 0.043832
Learning


Episode = 225
t = 100
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.703188
Explore rate: 0.043832
Learning rate: 0.100000
Streaks: 96


Episode = 225
t = 101
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.702756
Explore rate: 0.043832
Learning rate: 0.100000
Streaks: 96


Episode = 225
t = 102
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.703709
Explore rate: 0.043832
Learning rate: 0.100000
Streaks: 96


Episode = 225
t = 103
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.693506
Explore rate: 0.043832
Learning rate: 0.100000
Streaks: 96


Episode = 225
t = 104
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.703942
Explore rate: 0.043832
Learning rate: 0.100000
Streaks: 96


Episode = 225
t = 105
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.703942
Explore rate: 0.043832
Learning rate: 0.100000
Streaks: 96


Episode = 225
t = 106
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.704238
Explore rate: 0.043832
L


Episode = 225
t = 161
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.706080
Explore rate: 0.043832
Learning rate: 0.100000
Streaks: 96


Episode = 225
t = 162
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.704597
Explore rate: 0.043832
Learning rate: 0.100000
Streaks: 96


Episode = 225
t = 163
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.704597
Explore rate: 0.043832
Learning rate: 0.100000
Streaks: 96


Episode = 225
t = 164
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.704892
Explore rate: 0.043832
Learning rate: 0.100000
Streaks: 96


Episode = 225
t = 165
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.690856
Explore rate: 0.043832
Learning rate: 0.100000
Streaks: 96


Episode = 225
t = 166
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.706957
Explore rate: 0.043832
Learning rate: 0.100000
Streaks: 96


Episode = 225
t = 167
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.706957
Explore rate: 0.043832
L


Episode = 226
t = 19
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.707745
Explore rate: 0.041914
Learning rate: 0.100000
Streaks: 97


Episode = 226
t = 20
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.708037
Explore rate: 0.041914
Learning rate: 0.100000
Streaks: 97


Episode = 226
t = 21
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.707086
Explore rate: 0.041914
Learning rate: 0.100000
Streaks: 97


Episode = 226
t = 22
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.710188
Explore rate: 0.041914
Learning rate: 0.100000
Streaks: 97


Episode = 226
t = 23
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.710188
Explore rate: 0.041914
Learning rate: 0.100000
Streaks: 97


Episode = 226
t = 24
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.710477
Explore rate: 0.041914
Learning rate: 0.100000
Streaks: 97


Episode = 226
t = 25
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.708623
Explore rate: 0.041914
Learning



Episode = 226
t = 79
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.710915
Explore rate: 0.041914
Learning rate: 0.100000
Streaks: 97


Episode = 226
t = 80
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.711204
Explore rate: 0.041914
Learning rate: 0.100000
Streaks: 97


Episode = 226
t = 81
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.710476
Explore rate: 0.041914
Learning rate: 0.100000
Streaks: 97


Episode = 226
t = 82
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.712721
Explore rate: 0.041914
Learning rate: 0.100000
Streaks: 97


Episode = 226
t = 83
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.712721
Explore rate: 0.041914
Learning rate: 0.100000
Streaks: 97


Episode = 226
t = 84
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.713008
Explore rate: 0.041914
Learning rate: 0.100000
Streaks: 97


Episode = 226
t = 85
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.711562
Explore rate: 0.041914
Learnin


Episode = 226
t = 140
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.715582
Explore rate: 0.041914
Learning rate: 0.100000
Streaks: 97


Episode = 226
t = 141
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.715582
Explore rate: 0.041914
Learning rate: 0.100000
Streaks: 97


Episode = 226
t = 142
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.715867
Explore rate: 0.041914
Learning rate: 0.100000
Streaks: 97


Episode = 226
t = 143
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.714290
Explore rate: 0.041914
Learning rate: 0.100000
Streaks: 97


Episode = 226
t = 144
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.716250
Explore rate: 0.041914
Learning rate: 0.100000
Streaks: 97


Episode = 226
t = 145
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.714769
Explore rate: 0.041914
Learning rate: 0.100000
Streaks: 97


Episode = 226
t = 146
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.713332
Explore rate: 0.041914
L


Episode = 227
t = 1
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.715380
Explore rate: 0.040005
Learning rate: 0.100000
Streaks: 98


Episode = 227
t = 2
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.715312
Explore rate: 0.040005
Learning rate: 0.100000
Streaks: 98


Episode = 227
t = 3
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.715658
Explore rate: 0.040005
Learning rate: 0.100000
Streaks: 98


Episode = 227
t = 4
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.715631
Explore rate: 0.040005
Learning rate: 0.100000
Streaks: 98


Episode = 227
t = 5
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.715939
Explore rate: 0.040005
Learning rate: 0.100000
Streaks: 98


Episode = 227
t = 6
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.718771
Explore rate: 0.040005
Learning rate: 0.100000
Streaks: 98


Episode = 227
t = 7
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.718771
Explore rate: 0.040005
Learning rate: 


Episode = 227
t = 68
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.720275
Explore rate: 0.040005
Learning rate: 0.100000
Streaks: 98


Episode = 227
t = 69
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.718063
Explore rate: 0.040005
Learning rate: 0.100000
Streaks: 98


Episode = 227
t = 70
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.719069
Explore rate: 0.040005
Learning rate: 0.100000
Streaks: 98


Episode = 227
t = 71
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.708924
Explore rate: 0.040005
Learning rate: 0.100000
Streaks: 98


Episode = 227
t = 72
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.719069
Explore rate: 0.040005
Learning rate: 0.100000
Streaks: 98


Episode = 227
t = 73
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.719069
Explore rate: 0.040005
Learning rate: 0.100000
Streaks: 98


Episode = 227
t = 74
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.719350
Explore rate: 0.040005
Learning


Episode = 227
t = 127
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.722787
Explore rate: 0.040005
Learning rate: 0.100000
Streaks: 98


Episode = 227
t = 128
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.722787
Explore rate: 0.040005
Learning rate: 0.100000
Streaks: 98


Episode = 227
t = 129
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.723065
Explore rate: 0.040005
Learning rate: 0.100000
Streaks: 98


Episode = 227
t = 130
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.723342
Explore rate: 0.040005
Learning rate: 0.100000
Streaks: 98


Episode = 227
t = 131
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.721196
Explore rate: 0.040005
Learning rate: 0.100000
Streaks: 98


Episode = 227
t = 132
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.723655
Explore rate: 0.040005
Learning rate: 0.100000
Streaks: 98


Episode = 227
t = 133
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.712704
Explore rate: 0.040005
L


Episode = 227
t = 189
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.723234
Explore rate: 0.040005
Learning rate: 0.100000
Streaks: 98


Episode = 227
t = 190
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.724034
Explore rate: 0.040005
Learning rate: 0.100000
Streaks: 98


Episode = 227
t = 191
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.723590
Explore rate: 0.040005
Learning rate: 0.100000
Streaks: 98


Episode = 227
t = 192
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.725585
Explore rate: 0.040005
Learning rate: 0.100000
Streaks: 98


Episode = 227
t = 193
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.725585
Explore rate: 0.040005
Learning rate: 0.100000
Streaks: 98


Episode = 227
t = 194
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.724248
Explore rate: 0.040005
Learning rate: 0.100000
Streaks: 98


Episode = 227
t = 195
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.724266
Explore rate: 0.040005
L


Episode = 228
t = 47
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.725596
Explore rate: 0.038105
Learning rate: 0.100000
Streaks: 99


Episode = 228
t = 48
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.727241
Explore rate: 0.038105
Learning rate: 0.100000
Streaks: 99


Episode = 228
t = 49
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.727241
Explore rate: 0.038105
Learning rate: 0.100000
Streaks: 99


Episode = 228
t = 50
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.727513
Explore rate: 0.038105
Learning rate: 0.100000
Streaks: 99


Episode = 228
t = 51
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.726703
Explore rate: 0.038105
Learning rate: 0.100000
Streaks: 99


Episode = 228
t = 52
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.727951
Explore rate: 0.038105
Learning rate: 0.100000
Streaks: 99


Episode = 228
t = 53
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.727100
Explore rate: 0.038105
Learning

Learning rate: 0.100000
Streaks: 99


Episode = 228
t = 108
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.731057
Explore rate: 0.038105
Learning rate: 0.100000
Streaks: 99


Episode = 228
t = 109
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.731326
Explore rate: 0.038105
Learning rate: 0.100000
Streaks: 99


Episode = 228
t = 110
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.729649
Explore rate: 0.038105
Learning rate: 0.100000
Streaks: 99


Episode = 228
t = 111
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.731670
Explore rate: 0.038105
Learning rate: 0.100000
Streaks: 99


Episode = 228
t = 112
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.720258
Explore rate: 0.038105
Learning rate: 0.100000
Streaks: 99


Episode = 228
t = 113
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.728900
Explore rate: 0.038105
Learning rate: 0.100000
Streaks: 99


Episode = 228
t = 114
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best 


Episode = 228
t = 169
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.731857
Explore rate: 0.038105
Learning rate: 0.100000
Streaks: 99


Episode = 228
t = 170
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.732125
Explore rate: 0.038105
Learning rate: 0.100000
Streaks: 99


Episode = 228
t = 171
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.730718
Explore rate: 0.038105
Learning rate: 0.100000
Streaks: 99


Episode = 228
t = 172
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.733187
Explore rate: 0.038105
Learning rate: 0.100000
Streaks: 99


Episode = 228
t = 173
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.733187
Explore rate: 0.038105
Learning rate: 0.100000
Streaks: 99


Episode = 228
t = 174
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.733454
Explore rate: 0.038105
Learning rate: 0.100000
Streaks: 99


Episode = 228
t = 175
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.731834
Explore rate: 0.038105
L


Episode = 229
t = 29
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.727787
Explore rate: 0.036212
Learning rate: 0.100000
Streaks: 100


Episode = 229
t = 30
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.734945
Explore rate: 0.036212
Learning rate: 0.100000
Streaks: 100


Episode = 229
t = 31
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.734945
Explore rate: 0.036212
Learning rate: 0.100000
Streaks: 100


Episode = 229
t = 32
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.735210
Explore rate: 0.036212
Learning rate: 0.100000
Streaks: 100


Episode = 229
t = 33
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.734242
Explore rate: 0.036212
Learning rate: 0.100000
Streaks: 100


Episode = 229
t = 34
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.734242
Explore rate: 0.036212
Learning rate: 0.100000
Streaks: 100


Episode = 229
t = 35
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.727214
Explore rate: 0.036212
Le

State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.736288
Explore rate: 0.036212
Learning rate: 0.100000
Streaks: 100


Episode = 229
t = 90
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.737799
Explore rate: 0.036212
Learning rate: 0.100000
Streaks: 100


Episode = 229
t = 91
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.737799
Explore rate: 0.036212
Learning rate: 0.100000
Streaks: 100


Episode = 229
t = 92
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.738061
Explore rate: 0.036212
Learning rate: 0.100000
Streaks: 100


Episode = 229
t = 93
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.729147
Explore rate: 0.036212
Learning rate: 0.100000
Streaks: 100


Episode = 229
t = 94
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.737379
Explore rate: 0.036212
Learning rate: 0.100000
Streaks: 100


Episode = 229
t = 95
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.737379
Explore rate: 0.036212
Learning rate: 0.100000
Streaks: 1


Episode = 229
t = 150
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.739263
Explore rate: 0.036212
Learning rate: 0.100000
Streaks: 100


Episode = 229
t = 151
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.737607
Explore rate: 0.036212
Learning rate: 0.100000
Streaks: 100


Episode = 229
t = 152
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.740363
Explore rate: 0.036212
Learning rate: 0.100000
Streaks: 100


Episode = 229
t = 153
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.740363
Explore rate: 0.036212
Learning rate: 0.100000
Streaks: 100


Episode = 229
t = 154
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.740623
Explore rate: 0.036212
Learning rate: 0.100000
Streaks: 100


Episode = 229
t = 155
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.739189
Explore rate: 0.036212
Learning rate: 0.100000
Streaks: 100


Episode = 229
t = 156
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.740974
Explore rate: 0.03

Explore rate: 0.034328
Learning rate: 0.100000
Streaks: 101


Episode = 230
t = 10
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.742452
Explore rate: 0.034328
Learning rate: 0.100000
Streaks: 101


Episode = 230
t = 11
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.742452
Explore rate: 0.034328
Learning rate: 0.100000
Streaks: 101


Episode = 230
t = 12
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.742709
Explore rate: 0.034328
Learning rate: 0.100000
Streaks: 101


Episode = 230
t = 13
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.741833
Explore rate: 0.034328
Learning rate: 0.100000
Streaks: 101


Episode = 230
t = 14
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.743111
Explore rate: 0.034328
Learning rate: 0.100000
Streaks: 101


Episode = 230
t = 15
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.736104
Explore rate: 0.034328
Learning rate: 0.100000
Streaks: 101


Episode = 230
t = 16
Action: 1
State: (0, 0, 3, 1)


Episode = 230
t = 69
Action: 0
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.737239
Explore rate: 0.034328
Learning rate: 0.100000
Streaks: 101


Episode = 230
t = 70
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.745315
Explore rate: 0.034328
Learning rate: 0.100000
Streaks: 101


Episode = 230
t = 71
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.745315
Explore rate: 0.034328
Learning rate: 0.100000
Streaks: 101


Episode = 230
t = 72
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.745569
Explore rate: 0.034328
Learning rate: 0.100000
Streaks: 101


Episode = 230
t = 73
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.743587
Explore rate: 0.034328
Learning rate: 0.100000
Streaks: 101


Episode = 230
t = 74
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.745856
Explore rate: 0.034328
Learning rate: 0.100000
Streaks: 101


Episode = 230
t = 75
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.738772
Explore rate: 0.034328
Le

Streaks: 101


Episode = 230
t = 129
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.747223
Explore rate: 0.034328
Learning rate: 0.100000
Streaks: 101


Episode = 230
t = 130
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.747476
Explore rate: 0.034328
Learning rate: 0.100000
Streaks: 101


Episode = 230
t = 131
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.741484
Explore rate: 0.034328
Learning rate: 0.100000
Streaks: 101


Episode = 230
t = 132
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.748009
Explore rate: 0.034328
Learning rate: 0.100000
Streaks: 101


Episode = 230
t = 133
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.748009
Explore rate: 0.034328
Learning rate: 0.100000
Streaks: 101


Episode = 230
t = 134
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.748261
Explore rate: 0.034328
Learning rate: 0.100000
Streaks: 101


Episode = 230
t = 135
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.746869
Expl


Episode = 230
t = 190
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.750241
Explore rate: 0.034328
Learning rate: 0.100000
Streaks: 101


Episode = 230
t = 191
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.749003
Explore rate: 0.034328
Learning rate: 0.100000
Streaks: 101


Episode = 230
t = 192
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.750593
Explore rate: 0.034328
Learning rate: 0.100000
Streaks: 101


Episode = 230
t = 193
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.749411
Explore rate: 0.034328
Learning rate: 0.100000
Streaks: 101


Episode = 230
t = 194
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.750103
Explore rate: 0.034328
Learning rate: 0.100000
Streaks: 101


Episode = 230
t = 195
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.750103
Explore rate: 0.034328
Learning rate: 0.100000
Streaks: 101


Episode = 230
t = 196
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.750353
Explore rate: 0.03


Episode = 231
t = 52
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.754103
Explore rate: 0.032452
Learning rate: 0.100000
Streaks: 102


Episode = 231
t = 53
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.752878
Explore rate: 0.032452
Learning rate: 0.100000
Streaks: 102


Episode = 231
t = 54
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.753051
Explore rate: 0.032452
Learning rate: 0.100000
Streaks: 102


Episode = 231
t = 55
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.753051
Explore rate: 0.032452
Learning rate: 0.100000
Streaks: 102


Episode = 231
t = 56
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.753298
Explore rate: 0.032452
Learning rate: 0.100000
Streaks: 102


Episode = 231
t = 57
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.743023
Explore rate: 0.032452
Learning rate: 0.100000
Streaks: 102


Episode = 231
t = 58
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.754228
Explore rate: 0.032452
Le


Episode = 231
t = 112
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.755198
Explore rate: 0.032452
Learning rate: 0.100000
Streaks: 102


Episode = 231
t = 113
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.755443
Explore rate: 0.032452
Learning rate: 0.100000
Streaks: 102


Episode = 231
t = 114
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.755688
Explore rate: 0.032452
Learning rate: 0.100000
Streaks: 102


Episode = 231
t = 115
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.754479
Explore rate: 0.032452
Learning rate: 0.100000
Streaks: 102


Episode = 231
t = 116
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.756032
Explore rate: 0.032452
Learning rate: 0.100000
Streaks: 102


Episode = 231
t = 117
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.745688
Explore rate: 0.032452
Learning rate: 0.100000
Streaks: 102


Episode = 231
t = 118
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.755302
Explore rate: 0.03

Learning rate: 0.100000
Streaks: 102


Episode = 231
t = 173
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.756906
Explore rate: 0.032452
Learning rate: 0.100000
Streaks: 102


Episode = 231
t = 174
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.757149
Explore rate: 0.032452
Learning rate: 0.100000
Streaks: 102


Episode = 231
t = 175
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.756067
Explore rate: 0.032452
Learning rate: 0.100000
Streaks: 102


Episode = 231
t = 176
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.758520
Explore rate: 0.032452
Learning rate: 0.100000
Streaks: 102


Episode = 231
t = 177
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.758520
Explore rate: 0.032452
Learning rate: 0.100000
Streaks: 102


Episode = 231
t = 178
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.758762
Explore rate: 0.032452
Learning rate: 0.100000
Streaks: 102


Episode = 231
t = 179
Action: 1
State: (0, 0, 2, 0)
Reward: 1.00000


Episode = 232
t = 31
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.757797
Explore rate: 0.030584
Learning rate: 0.100000
Streaks: 103


Episode = 232
t = 32
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.761023
Explore rate: 0.030584
Learning rate: 0.100000
Streaks: 103


Episode = 232
t = 33
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.761023
Explore rate: 0.030584
Learning rate: 0.100000
Streaks: 103


Episode = 232
t = 34
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.761262
Explore rate: 0.030584
Learning rate: 0.100000
Streaks: 103


Episode = 232
t = 35
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.759420
Explore rate: 0.030584
Learning rate: 0.100000
Streaks: 103


Episode = 232
t = 36
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.761533
Explore rate: 0.030584
Learning rate: 0.100000
Streaks: 103


Episode = 232
t = 37
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.759869
Explore rate: 0.030584
Le

Streaks: 103


Episode = 232
t = 92
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.763172
Explore rate: 0.030584
Learning rate: 0.100000
Streaks: 103


Episode = 232
t = 93
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.763408
Explore rate: 0.030584
Learning rate: 0.100000
Streaks: 103


Episode = 232
t = 94
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.753100
Explore rate: 0.030584
Learning rate: 0.100000
Streaks: 103


Episode = 232
t = 95
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.761608
Explore rate: 0.030584
Learning rate: 0.100000
Streaks: 103


Episode = 232
t = 96
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.761608
Explore rate: 0.030584
Learning rate: 0.100000
Streaks: 103


Episode = 232
t = 97
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.761846
Explore rate: 0.030584
Learning rate: 0.100000
Streaks: 103


Episode = 232
t = 98
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.762084
Explore rat

Episode = 232
t = 151
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.763512
Explore rate: 0.030584
Learning rate: 0.100000
Streaks: 103


Episode = 232
t = 152
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.764760
Explore rate: 0.030584
Learning rate: 0.100000
Streaks: 103


Episode = 232
t = 153
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.764760
Explore rate: 0.030584
Learning rate: 0.100000
Streaks: 103


Episode = 232
t = 154
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.764995
Explore rate: 0.030584
Learning rate: 0.100000
Streaks: 103


Episode = 232
t = 155
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.758492
Explore rate: 0.030584
Learning rate: 0.100000
Streaks: 103


Episode = 232
t = 156
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.765191
Explore rate: 0.030584
Learning rate: 0.100000
Streaks: 103


Episode = 232
t = 157
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.765191
Explore rate: 0.030


Episode = 233
t = 13
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.767932
Explore rate: 0.028724
Learning rate: 0.100000
Streaks: 104


Episode = 233
t = 14
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.768164
Explore rate: 0.028724
Learning rate: 0.100000
Streaks: 104


Episode = 233
t = 15
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.767303
Explore rate: 0.028724
Learning rate: 0.100000
Streaks: 104


Episode = 233
t = 16
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.768519
Explore rate: 0.028724
Learning rate: 0.100000
Streaks: 104


Episode = 233
t = 17
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.767656
Explore rate: 0.028724
Learning rate: 0.100000
Streaks: 104


Episode = 233
t = 18
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.767258
Explore rate: 0.028724
Learning rate: 0.100000
Streaks: 104


Episode = 233
t = 19
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.767258
Explore rate: 0.028724
Le


Episode = 233
t = 74
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.769495
Explore rate: 0.028724
Learning rate: 0.100000
Streaks: 104


Episode = 233
t = 75
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.769725
Explore rate: 0.028724
Learning rate: 0.100000
Streaks: 104


Episode = 233
t = 76
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.769955
Explore rate: 0.028724
Learning rate: 0.100000
Streaks: 104


Episode = 233
t = 77
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.768287
Explore rate: 0.028724
Learning rate: 0.100000
Streaks: 104


Episode = 233
t = 78
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.770227
Explore rate: 0.028724
Learning rate: 0.100000
Streaks: 104


Episode = 233
t = 79
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.762330
Explore rate: 0.028724
Learning rate: 0.100000
Streaks: 104


Episode = 233
t = 80
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.769336
Explore rate: 0.028724
Le


Episode = 233
t = 135
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.765071
Explore rate: 0.028724
Learning rate: 0.100000
Streaks: 104


Episode = 233
t = 136
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.771631
Explore rate: 0.028724
Learning rate: 0.100000
Streaks: 104


Episode = 233
t = 137
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.771631
Explore rate: 0.028724
Learning rate: 0.100000
Streaks: 104


Episode = 233
t = 138
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.771859
Explore rate: 0.028724
Learning rate: 0.100000
Streaks: 104


Episode = 233
t = 139
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.772087
Explore rate: 0.028724
Learning rate: 0.100000
Streaks: 104


Episode = 233
t = 140
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.771480
Explore rate: 0.028724
Learning rate: 0.100000
Streaks: 104


Episode = 233
t = 141
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.772460
Explore rate: 0.02


Episode = 233
t = 195
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.771789
Explore rate: 0.028724
Learning rate: 0.100000
Streaks: 104


Episode = 233
t = 196
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.773483
Explore rate: 0.028724
Learning rate: 0.100000
Streaks: 104


Episode = 233
t = 197
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.773483
Explore rate: 0.028724
Learning rate: 0.100000
Streaks: 104


Episode = 233
t = 198
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.773709
Explore rate: 0.028724
Learning rate: 0.100000
Streaks: 104


Episode = 233
t = 199
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.773511
Explore rate: 0.028724
Learning rate: 0.100000
Streaks: 104

Episode 233 finished after 199.000000 time steps

Episode = 234
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.774120
Explore rate: 0.028724
Learning rate: 0.100000
Streaks: 105


Episode = 234
t = 1
Action: 1
State: (0, 0, 2, 0)
Reward: 


Episode = 234
t = 55
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.766996
Explore rate: 0.026872
Learning rate: 0.100000
Streaks: 105


Episode = 234
t = 56
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.776617
Explore rate: 0.026872
Learning rate: 0.100000
Streaks: 105


Episode = 234
t = 57
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.776617
Explore rate: 0.026872
Learning rate: 0.100000
Streaks: 105


Episode = 234
t = 58
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.776840
Explore rate: 0.026872
Learning rate: 0.100000
Streaks: 105


Episode = 234
t = 59
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.774940
Explore rate: 0.026872
Learning rate: 0.100000
Streaks: 105


Episode = 234
t = 60
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.777076
Explore rate: 0.026872
Learning rate: 0.100000
Streaks: 105


Episode = 234
t = 61
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.775376
Explore rate: 0.026872
Le


Episode = 234
t = 116
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.770113
Explore rate: 0.026872
Learning rate: 0.100000
Streaks: 105


Episode = 234
t = 117
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.770113
Explore rate: 0.026872
Learning rate: 0.100000
Streaks: 105


Episode = 234
t = 118
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.770113
Explore rate: 0.026872
Learning rate: 0.100000
Streaks: 105


Episode = 234
t = 119
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.770343
Explore rate: 0.026872
Learning rate: 0.100000
Streaks: 105


Episode = 234
t = 120
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.779068
Explore rate: 0.026872
Learning rate: 0.100000
Streaks: 105


Episode = 234
t = 121
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.779068
Explore rate: 0.026872
Learning rate: 0.100000
Streaks: 105


Episode = 234
t = 122
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.779289
Explore rate: 0.02


Episode = 234
t = 177
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.780798
Explore rate: 0.026872
Learning rate: 0.100000
Streaks: 105


Episode = 234
t = 178
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.781018
Explore rate: 0.026872
Learning rate: 0.100000
Streaks: 105


Episode = 234
t = 179
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.781237
Explore rate: 0.026872
Learning rate: 0.100000
Streaks: 105


Episode = 234
t = 180
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.780172
Explore rate: 0.026872
Learning rate: 0.100000
Streaks: 105


Episode = 234
t = 181
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.779971
Explore rate: 0.026872
Learning rate: 0.100000
Streaks: 105


Episode = 234
t = 182
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.779971
Explore rate: 0.026872
Learning rate: 0.100000
Streaks: 105


Episode = 234
t = 183
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.780191
Explore rate: 0.02


Episode = 235
t = 39
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.781347
Explore rate: 0.025028
Learning rate: 0.100000
Streaks: 106


Episode = 235
t = 40
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.783040
Explore rate: 0.025028
Learning rate: 0.100000
Streaks: 106


Episode = 235
t = 41
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.783040
Explore rate: 0.025028
Learning rate: 0.100000
Streaks: 106


Episode = 235
t = 42
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.783257
Explore rate: 0.025028
Learning rate: 0.100000
Streaks: 106


Episode = 235
t = 43
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.782039
Explore rate: 0.025028
Learning rate: 0.100000
Streaks: 106


Episode = 235
t = 44
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.783143
Explore rate: 0.025028
Learning rate: 0.100000
Streaks: 106


Episode = 235
t = 45
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.783143
Explore rate: 0.025028
Le


Episode = 235
t = 97
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.784998
Explore rate: 0.025028
Learning rate: 0.100000
Streaks: 106


Episode = 235
t = 98
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.785213
Explore rate: 0.025028
Learning rate: 0.100000
Streaks: 106


Episode = 235
t = 99
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.784170
Explore rate: 0.025028
Learning rate: 0.100000
Streaks: 106


Episode = 235
t = 100
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.784975
Explore rate: 0.025028
Learning rate: 0.100000
Streaks: 106


Episode = 235
t = 101
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.784975
Explore rate: 0.025028
Learning rate: 0.100000
Streaks: 106


Episode = 235
t = 102
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.785190
Explore rate: 0.025028
Learning rate: 0.100000
Streaks: 106


Episode = 235
t = 103
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.777146
Explore rate: 0.02502


Episode = 235
t = 157
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.787323
Explore rate: 0.025028
Learning rate: 0.100000
Streaks: 106


Episode = 235
t = 158
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.786690
Explore rate: 0.025028
Learning rate: 0.100000
Streaks: 106


Episode = 235
t = 159
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.786690
Explore rate: 0.025028
Learning rate: 0.100000
Streaks: 106


Episode = 235
t = 160
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.786903
Explore rate: 0.025028
Learning rate: 0.100000
Streaks: 106


Episode = 235
t = 161
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.779252
Explore rate: 0.025028
Learning rate: 0.100000
Streaks: 106


Episode = 235
t = 162
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.788771
Explore rate: 0.025028
Learning rate: 0.100000
Streaks: 106


Episode = 235
t = 163
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.788771
Explore rate: 0.02


Episode = 236
t = 16
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.788437
Explore rate: 0.023192
Learning rate: 0.100000
Streaks: 107


Episode = 236
t = 17
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.787848
Explore rate: 0.023192
Learning rate: 0.100000
Streaks: 107


Episode = 236
t = 18
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.788780
Explore rate: 0.023192
Learning rate: 0.100000
Streaks: 107


Episode = 236
t = 19
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.781444
Explore rate: 0.023192
Learning rate: 0.100000
Streaks: 107


Episode = 236
t = 20
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.788551
Explore rate: 0.023192
Learning rate: 0.100000
Streaks: 107


Episode = 236
t = 21
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.788551
Explore rate: 0.023192
Learning rate: 0.100000
Streaks: 107


Episode = 236
t = 22
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.788763
Explore rate: 0.023192
Le

Explore rate: 0.023192
Learning rate: 0.100000
Streaks: 107


Episode = 236
t = 76
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.790417
Explore rate: 0.023192
Learning rate: 0.100000
Streaks: 107


Episode = 236
t = 77
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.789448
Explore rate: 0.023192
Learning rate: 0.100000
Streaks: 107


Episode = 236
t = 78
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.790720
Explore rate: 0.023192
Learning rate: 0.100000
Streaks: 107


Episode = 236
t = 79
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.784203
Explore rate: 0.023192
Learning rate: 0.100000
Streaks: 107


Episode = 236
t = 80
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.790891
Explore rate: 0.023192
Learning rate: 0.100000
Streaks: 107


Episode = 236
t = 81
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.790891
Explore rate: 0.023192
Learning rate: 0.100000
Streaks: 107


Episode = 236
t = 82
Action: 0
State: (0, 0, 2, 1)


Episode = 236
t = 136
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.793842
Explore rate: 0.023192
Learning rate: 0.100000
Streaks: 107


Episode = 236
t = 137
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.784448
Explore rate: 0.023192
Learning rate: 0.100000
Streaks: 107


Episode = 236
t = 138
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.792638
Explore rate: 0.023192
Learning rate: 0.100000
Streaks: 107


Episode = 236
t = 139
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.792638
Explore rate: 0.023192
Learning rate: 0.100000
Streaks: 107


Episode = 236
t = 140
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.792846
Explore rate: 0.023192
Learning rate: 0.100000
Streaks: 107


Episode = 236
t = 141
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.791933
Explore rate: 0.023192
Learning rate: 0.100000
Streaks: 107


Episode = 236
t = 142
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.793149
Explore rate: 0.02


Episode = 236
t = 195
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.794067
Explore rate: 0.023192
Learning rate: 0.100000
Streaks: 107


Episode = 236
t = 196
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.794633
Explore rate: 0.023192
Learning rate: 0.100000
Streaks: 107


Episode = 236
t = 197
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.794633
Explore rate: 0.023192
Learning rate: 0.100000
Streaks: 107


Episode = 236
t = 198
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.794839
Explore rate: 0.023192
Learning rate: 0.100000
Streaks: 107


Episode = 236
t = 199
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.794157
Explore rate: 0.023192
Learning rate: 0.100000
Streaks: 107

Episode 236 finished after 199.000000 time steps

Episode = 237
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.795161
Explore rate: 0.023192
Learning rate: 0.100000
Streaks: 108


Episode = 237
t = 1
Action: 1
State: (0, 0, 3, 0)
Reward: 


Episode = 237
t = 54
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.796990
Explore rate: 0.021363
Learning rate: 0.100000
Streaks: 108


Episode = 237
t = 55
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.791351
Explore rate: 0.021363
Learning rate: 0.100000
Streaks: 108


Episode = 237
t = 56
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.798441
Explore rate: 0.021363
Learning rate: 0.100000
Streaks: 108


Episode = 237
t = 57
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.798441
Explore rate: 0.021363
Learning rate: 0.100000
Streaks: 108


Episode = 237
t = 58
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.798643
Explore rate: 0.021363
Learning rate: 0.100000
Streaks: 108


Episode = 237
t = 59
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.797057
Explore rate: 0.021363
Learning rate: 0.100000
Streaks: 108


Episode = 237
t = 60
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.798868
Explore rate: 0.021363
Le


Episode = 237
t = 114
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.799818
Explore rate: 0.021363
Learning rate: 0.100000
Streaks: 108


Episode = 237
t = 115
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.798920
Explore rate: 0.021363
Learning rate: 0.100000
Streaks: 108


Episode = 237
t = 116
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.800110
Explore rate: 0.021363
Learning rate: 0.100000
Streaks: 108


Episode = 237
t = 117
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.799239
Explore rate: 0.021363
Learning rate: 0.100000
Streaks: 108


Episode = 237
t = 118
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.798733
Explore rate: 0.021363
Learning rate: 0.100000
Streaks: 108


Episode = 237
t = 119
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.798733
Explore rate: 0.021363
Learning rate: 0.100000
Streaks: 108


Episode = 237
t = 120
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.798934
Explore rate: 0.02


Episode = 237
t = 176
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.800675
Explore rate: 0.021363
Learning rate: 0.100000
Streaks: 108


Episode = 237
t = 177
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.800874
Explore rate: 0.021363
Learning rate: 0.100000
Streaks: 108


Episode = 237
t = 178
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.801074
Explore rate: 0.021363
Learning rate: 0.100000
Streaks: 108


Episode = 237
t = 179
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.799509
Explore rate: 0.021363
Learning rate: 0.100000
Streaks: 108


Episode = 237
t = 180
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.801297
Explore rate: 0.021363
Learning rate: 0.100000
Streaks: 108


Episode = 237
t = 181
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.795464
Explore rate: 0.021363
Learning rate: 0.100000
Streaks: 108


Episode = 237
t = 182
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.800142
Explore rate: 0.02

Explore rate: 0.019542
Learning rate: 0.100000
Streaks: 109


Episode = 238
t = 37
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.801899
Explore rate: 0.019542
Learning rate: 0.100000
Streaks: 109


Episode = 238
t = 38
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.802098
Explore rate: 0.019542
Learning rate: 0.100000
Streaks: 109


Episode = 238
t = 39
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.798351
Explore rate: 0.019542
Learning rate: 0.100000
Streaks: 109


Episode = 238
t = 40
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.803705
Explore rate: 0.019542
Learning rate: 0.100000
Streaks: 109


Episode = 238
t = 41
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.803705
Explore rate: 0.019542
Learning rate: 0.100000
Streaks: 109


Episode = 238
t = 42
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.803901
Explore rate: 0.019542
Learning rate: 0.100000
Streaks: 109


Episode = 238
t = 43
Action: 0
State: (0, 0, 2, 2)


Episode = 238
t = 97
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.805816
Explore rate: 0.019542
Learning rate: 0.100000
Streaks: 109


Episode = 238
t = 98
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.805816
Explore rate: 0.019542
Learning rate: 0.100000
Streaks: 109


Episode = 238
t = 99
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.804384
Explore rate: 0.019542
Learning rate: 0.100000
Streaks: 109


Episode = 238
t = 100
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.804411
Explore rate: 0.019542
Learning rate: 0.100000
Streaks: 109


Episode = 238
t = 101
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.804411
Explore rate: 0.019542
Learning rate: 0.100000
Streaks: 109


Episode = 238
t = 102
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.804607
Explore rate: 0.019542
Learning rate: 0.100000
Streaks: 109


Episode = 238
t = 103
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.799792
Explore rate: 0.01954


Episode = 238
t = 157
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.806330
Explore rate: 0.019542
Learning rate: 0.100000
Streaks: 109


Episode = 238
t = 158
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.806330
Explore rate: 0.019542
Learning rate: 0.100000
Streaks: 109


Episode = 238
t = 159
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.806523
Explore rate: 0.019542
Learning rate: 0.100000
Streaks: 109


Episode = 238
t = 160
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.806717
Explore rate: 0.019542
Learning rate: 0.100000
Streaks: 109


Episode = 238
t = 161
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.805438
Explore rate: 0.019542
Learning rate: 0.100000
Streaks: 109


Episode = 238
t = 162
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.806958
Explore rate: 0.019542
Learning rate: 0.100000
Streaks: 109


Episode = 238
t = 163
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.801415
Explore rate: 0.01


Episode = 239
t = 14
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.808757
Explore rate: 0.017729
Learning rate: 0.100000
Streaks: 110


Episode = 239
t = 15
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.808757
Explore rate: 0.017729
Learning rate: 0.100000
Streaks: 110


Episode = 239
t = 16
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.808949
Explore rate: 0.017729
Learning rate: 0.100000
Streaks: 110


Episode = 239
t = 17
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.807527
Explore rate: 0.017729
Learning rate: 0.100000
Streaks: 110


Episode = 239
t = 18
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.809025
Explore rate: 0.017729
Learning rate: 0.100000
Streaks: 110


Episode = 239
t = 19
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.809025
Explore rate: 0.017729
Learning rate: 0.100000
Streaks: 110


Episode = 239
t = 20
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.809216
Explore rate: 0.017729
Le

Episode = 239
t = 73
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.810041
Explore rate: 0.017729
Learning rate: 0.100000
Streaks: 110


Episode = 239
t = 74
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.811864
Explore rate: 0.017729
Learning rate: 0.100000
Streaks: 110


Episode = 239
t = 75
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.811864
Explore rate: 0.017729
Learning rate: 0.100000
Streaks: 110


Episode = 239
t = 76
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.812052
Explore rate: 0.017729
Learning rate: 0.100000
Streaks: 110


Episode = 239
t = 77
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.799798
Explore rate: 0.017729
Learning rate: 0.100000
Streaks: 110


Episode = 239
t = 78
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.811906
Explore rate: 0.017729
Learning rate: 0.100000
Streaks: 110


Episode = 239
t = 79
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.801197
Explore rate: 0.017729
Lea


Episode = 239
t = 133
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.811616
Explore rate: 0.017729
Learning rate: 0.100000
Streaks: 110


Episode = 239
t = 134
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.811616
Explore rate: 0.017729
Learning rate: 0.100000
Streaks: 110


Episode = 239
t = 135
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.811805
Explore rate: 0.017729
Learning rate: 0.100000
Streaks: 110


Episode = 239
t = 136
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.811090
Explore rate: 0.017729
Learning rate: 0.100000
Streaks: 110


Episode = 239
t = 137
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.812092
Explore rate: 0.017729
Learning rate: 0.100000
Streaks: 110


Episode = 239
t = 138
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.811379
Explore rate: 0.017729
Learning rate: 0.100000
Streaks: 110


Episode = 239
t = 139
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.812209
Explore rate: 0.01


Episode = 239
t = 194
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.813301
Explore rate: 0.017729
Learning rate: 0.100000
Streaks: 110


Episode = 239
t = 195
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.813301
Explore rate: 0.017729
Learning rate: 0.100000
Streaks: 110


Episode = 239
t = 196
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.813488
Explore rate: 0.017729
Learning rate: 0.100000
Streaks: 110


Episode = 239
t = 197
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.807765
Explore rate: 0.017729
Learning rate: 0.100000
Streaks: 110


Episode = 239
t = 198
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.814947
Explore rate: 0.017729
Learning rate: 0.100000
Streaks: 110


Episode = 239
t = 199
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.814947
Explore rate: 0.017729
Learning rate: 0.100000
Streaks: 110

Episode 239 finished after 199.000000 time steps

Episode = 240
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward

Explore rate: 0.015923
Learning rate: 0.100000
Streaks: 111


Episode = 240
t = 54
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.815540
Explore rate: 0.015923
Learning rate: 0.100000
Streaks: 111


Episode = 240
t = 55
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.815255
Explore rate: 0.015923
Learning rate: 0.100000
Streaks: 111


Episode = 240
t = 56
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.815862
Explore rate: 0.015923
Learning rate: 0.100000
Streaks: 111


Episode = 240
t = 57
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.815862
Explore rate: 0.015923
Learning rate: 0.100000
Streaks: 111


Episode = 240
t = 58
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.815500
Explore rate: 0.015923
Learning rate: 0.100000
Streaks: 111


Episode = 240
t = 59
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.815186
Explore rate: 0.015923
Learning rate: 0.100000
Streaks: 111


Episode = 240
t = 60
Action: 1
State: (0, 0, 3, 1)


Episode = 240
t = 112
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.817421
Explore rate: 0.015923
Learning rate: 0.100000
Streaks: 111


Episode = 240
t = 113
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.816680
Explore rate: 0.015923
Learning rate: 0.100000
Streaks: 111


Episode = 240
t = 114
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.817530
Explore rate: 0.015923
Learning rate: 0.100000
Streaks: 111


Episode = 240
t = 115
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.816943
Explore rate: 0.015923
Learning rate: 0.100000
Streaks: 111


Episode = 240
t = 116
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.816943
Explore rate: 0.015923
Learning rate: 0.100000
Streaks: 111


Episode = 240
t = 117
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.817126
Explore rate: 0.015923
Learning rate: 0.100000
Streaks: 111


Episode = 240
t = 118
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.816539
Explore rate: 0.01

Reward: 1.000000
Best Q: 99.819179
Explore rate: 0.015923
Learning rate: 0.100000
Streaks: 111


Episode = 240
t = 173
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.818411
Explore rate: 0.015923
Learning rate: 0.100000
Streaks: 111


Episode = 240
t = 174
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.819229
Explore rate: 0.015923
Learning rate: 0.100000
Streaks: 111


Episode = 240
t = 175
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.819229
Explore rate: 0.015923
Learning rate: 0.100000
Streaks: 111


Episode = 240
t = 176
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.818328
Explore rate: 0.015923
Learning rate: 0.100000
Streaks: 111


Episode = 240
t = 177
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.819284
Explore rate: 0.015923
Learning rate: 0.100000
Streaks: 111


Episode = 240
t = 178
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.819284
Explore rate: 0.015923
Learning rate: 0.100000
Streaks: 111


Episode =


Episode = 241
t = 33
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.821113
Explore rate: 0.014125
Learning rate: 0.100000
Streaks: 112


Episode = 241
t = 34
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.820605
Explore rate: 0.014125
Learning rate: 0.100000
Streaks: 112


Episode = 241
t = 35
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.820342
Explore rate: 0.014125
Learning rate: 0.100000
Streaks: 112


Episode = 241
t = 36
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.820342
Explore rate: 0.014125
Learning rate: 0.100000
Streaks: 112


Episode = 241
t = 37
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.820522
Explore rate: 0.014125
Learning rate: 0.100000
Streaks: 112


Episode = 241
t = 38
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.819514
Explore rate: 0.014125
Learning rate: 0.100000
Streaks: 112


Episode = 241
t = 39
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.820763
Explore rate: 0.014125
Le

State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.821780
Explore rate: 0.014125
Learning rate: 0.100000
Streaks: 112


Episode = 241
t = 94
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.822322
Explore rate: 0.014125
Learning rate: 0.100000
Streaks: 112


Episode = 241
t = 95
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.822322
Explore rate: 0.014125
Learning rate: 0.100000
Streaks: 112


Episode = 241
t = 96
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.822500
Explore rate: 0.014125
Learning rate: 0.100000
Streaks: 112


Episode = 241
t = 97
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.821750
Explore rate: 0.014125
Learning rate: 0.100000
Streaks: 112


Episode = 241
t = 98
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.822763
Explore rate: 0.014125
Learning rate: 0.100000
Streaks: 112


Episode = 241
t = 99
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.816458
Explore rate: 0.014125
Learning rate: 0.100000
Streaks: 1


Episode = 241
t = 152
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.823932
Explore rate: 0.014125
Learning rate: 0.100000
Streaks: 112


Episode = 241
t = 153
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.824108
Explore rate: 0.014125
Learning rate: 0.100000
Streaks: 112


Episode = 241
t = 154
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.824284
Explore rate: 0.014125
Learning rate: 0.100000
Streaks: 112


Episode = 241
t = 155
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.822825
Explore rate: 0.014125
Learning rate: 0.100000
Streaks: 112


Episode = 241
t = 156
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.824473
Explore rate: 0.014125
Learning rate: 0.100000
Streaks: 112


Episode = 241
t = 157
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.819214
Explore rate: 0.014125
Learning rate: 0.100000
Streaks: 112


Episode = 241
t = 158
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.824097
Explore rate: 0.01


Episode = 242
t = 12
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.825901
Explore rate: 0.012334
Learning rate: 0.100000
Streaks: 113


Episode = 242
t = 13
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.824924
Explore rate: 0.012334
Learning rate: 0.100000
Streaks: 113


Episode = 242
t = 14
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.826136
Explore rate: 0.012334
Learning rate: 0.100000
Streaks: 113


Episode = 242
t = 15
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.826136
Explore rate: 0.012334
Learning rate: 0.100000
Streaks: 113


Episode = 242
t = 16
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.826310
Explore rate: 0.012334
Learning rate: 0.100000
Streaks: 113


Episode = 242
t = 17
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.825223
Explore rate: 0.012334
Learning rate: 0.100000
Streaks: 113


Episode = 242
t = 18
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.826135
Explore rate: 0.012334
Le

Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.827702
Explore rate: 0.012334
Learning rate: 0.100000
Streaks: 113


Episode = 242
t = 73
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.826504
Explore rate: 0.012334
Learning rate: 0.100000
Streaks: 113


Episode = 242
t = 74
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.828465
Explore rate: 0.012334
Learning rate: 0.100000
Streaks: 113


Episode = 242
t = 75
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.828465
Explore rate: 0.012334
Learning rate: 0.100000
Streaks: 113


Episode = 242
t = 76
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.828637
Explore rate: 0.012334
Learning rate: 0.100000
Streaks: 113


Episode = 242
t = 77
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.827301
Explore rate: 0.012334
Learning rate: 0.100000
Streaks: 113


Episode = 242
t = 78
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.828830
Explore rate: 0.012334
Learning rate: 0.100000



Episode = 242
t = 132
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.828765
Explore rate: 0.012334
Learning rate: 0.100000
Streaks: 113


Episode = 242
t = 133
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.828936
Explore rate: 0.012334
Learning rate: 0.100000
Streaks: 113


Episode = 242
t = 134
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.827626
Explore rate: 0.012334
Learning rate: 0.100000
Streaks: 113


Episode = 242
t = 135
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.829131
Explore rate: 0.012334
Learning rate: 0.100000
Streaks: 113


Episode = 242
t = 136
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.827947
Explore rate: 0.012334
Learning rate: 0.100000
Streaks: 113


Episode = 242
t = 137
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.830471
Explore rate: 0.012334
Learning rate: 0.100000
Streaks: 113


Episode = 242
t = 138
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.830471
Explore rate: 0.01

Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.830871
Explore rate: 0.012334
Learning rate: 0.100000
Streaks: 113


Episode = 242
t = 193
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.830431
Explore rate: 0.012334
Learning rate: 0.100000
Streaks: 113


Episode = 242
t = 194
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.830431
Explore rate: 0.012334
Learning rate: 0.100000
Streaks: 113


Episode = 242
t = 195
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.830601
Explore rate: 0.012334
Learning rate: 0.100000
Streaks: 113


Episode = 242
t = 196
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.829901
Explore rate: 0.012334
Learning rate: 0.100000
Streaks: 113


Episode = 242
t = 197
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.832203
Explore rate: 0.012334
Learning rate: 0.100000
Streaks: 113


Episode = 242
t = 198
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.832203
Explore rate: 0.012334
Learning rate: 0.1


Episode = 243
t = 52
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.833168
Explore rate: 0.010550
Learning rate: 0.100000
Streaks: 114


Episode = 243
t = 53
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.833168
Explore rate: 0.010550
Learning rate: 0.100000
Streaks: 114


Episode = 243
t = 54
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.833335
Explore rate: 0.010550
Learning rate: 0.100000
Streaks: 114


Episode = 243
t = 55
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.833502
Explore rate: 0.010550
Learning rate: 0.100000
Streaks: 114


Episode = 243
t = 56
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.832646
Explore rate: 0.010550
Learning rate: 0.100000
Streaks: 114


Episode = 243
t = 57
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.833733
Explore rate: 0.010550
Learning rate: 0.100000
Streaks: 114


Episode = 243
t = 58
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.828215
Explore rate: 0.010550
Le


Episode = 243
t = 111
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.834218
Explore rate: 0.010550
Learning rate: 0.100000
Streaks: 114


Episode = 243
t = 112
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.833641
Explore rate: 0.010550
Learning rate: 0.100000
Streaks: 114


Episode = 243
t = 113
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.834476
Explore rate: 0.010550
Learning rate: 0.100000
Streaks: 114


Episode = 243
t = 114
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.833890
Explore rate: 0.010550
Learning rate: 0.100000
Streaks: 114


Episode = 243
t = 115
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.834584
Explore rate: 0.010550
Learning rate: 0.100000
Streaks: 114


Episode = 243
t = 116
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.827827
Explore rate: 0.010550
Learning rate: 0.100000
Streaks: 114


Episode = 243
t = 117
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.834065
Explore rate: 0.01


Episode = 243
t = 171
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.836314
Explore rate: 0.010550
Learning rate: 0.100000
Streaks: 114


Episode = 243
t = 172
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.834956
Explore rate: 0.010550
Learning rate: 0.100000
Streaks: 114


Episode = 243
t = 173
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.835711
Explore rate: 0.010550
Learning rate: 0.100000
Streaks: 114


Episode = 243
t = 174
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.835711
Explore rate: 0.010550
Learning rate: 0.100000
Streaks: 114


Episode = 243
t = 175
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.835875
Explore rate: 0.010550
Learning rate: 0.100000
Streaks: 114


Episode = 243
t = 176
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.836039
Explore rate: 0.010550
Learning rate: 0.100000
Streaks: 114


Episode = 243
t = 177
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.835313
Explore rate: 0.01

Learning rate: 0.100000
Streaks: 115


Episode = 244
t = 31
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.837444
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 115


Episode = 244
t = 32
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.837607
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 115


Episode = 244
t = 33
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.837470
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 115


Episode = 244
t = 34
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.837902
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 115


Episode = 244
t = 35
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.837675
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 115


Episode = 244
t = 36
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.837245
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 115


Episode = 244
t = 37
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best 

Streaks: 115


Episode = 244
t = 91
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.838229
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 115


Episode = 244
t = 92
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.838229
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 115


Episode = 244
t = 93
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.838391
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 115


Episode = 244
t = 94
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.837345
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 115


Episode = 244
t = 95
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.838595
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 115


Episode = 244
t = 96
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.837631
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 115


Episode = 244
t = 97
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.839402
Explore rat

Streaks: 115


Episode = 244
t = 150
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.840133
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 115


Episode = 244
t = 151
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.840133
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 115


Episode = 244
t = 152
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.840293
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 115


Episode = 244
t = 153
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.839977
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 115


Episode = 244
t = 154
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.840565
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 115


Episode = 244
t = 155
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.840195
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 115


Episode = 244
t = 156
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.840439
Expl


Episode = 245
t = 10
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.841608
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 116


Episode = 245
t = 11
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.841240
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 116


Episode = 245
t = 12
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.841704
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 116


Episode = 245
t = 13
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.841704
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 116


Episode = 245
t = 14
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.841862
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 116


Episode = 245
t = 15
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.841009
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 116


Episode = 245
t = 16
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.841873
Explore rate: 0.010000
Le



Episode = 245
t = 72
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.844031
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 116


Episode = 245
t = 73
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.844031
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 116


Episode = 245
t = 74
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.844187
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 116


Episode = 245
t = 75
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.843088
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 116


Episode = 245
t = 76
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.844374
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 116


Episode = 245
t = 77
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.843372
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 116


Episode = 245
t = 78
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.843487
Explore rate: 0.010000
L


Episode = 245
t = 133
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.844960
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 116


Episode = 245
t = 134
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.846317
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 116


Episode = 245
t = 135
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.844815
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 116


Episode = 245
t = 136
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.844815
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 116


Episode = 245
t = 137
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.844970
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 116


Episode = 245
t = 138
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.845125
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 116


Episode = 245
t = 139
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.843665
Explore rate: 0.01


Episode = 245
t = 194
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.846836
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 116


Episode = 245
t = 195
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.846836
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 116


Episode = 245
t = 196
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.846989
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 116


Episode = 245
t = 197
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.847142
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 116


Episode = 245
t = 198
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.846316
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 116


Episode = 245
t = 199
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.846497
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 116

Episode 245 finished after 199.000000 time steps

Episode = 246
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward


Episode = 246
t = 54
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.848585
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 117


Episode = 246
t = 55
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.848585
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 117


Episode = 246
t = 56
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.848737
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 117


Episode = 246
t = 57
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.847785
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 117


Episode = 246
t = 58
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.849620
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 117


Episode = 246
t = 59
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.849620
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 117


Episode = 246
t = 60
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.849771
Explore rate: 0.010000
Le


Episode = 246
t = 116
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.850637
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 117


Episode = 246
t = 117
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.850637
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 117


Episode = 246
t = 118
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.850787
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 117


Episode = 246
t = 119
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.849934
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 117


Episode = 246
t = 120
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.850986
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 117


Episode = 246
t = 121
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.843933
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 117


Episode = 246
t = 122
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.850891
Explore rate: 0.01


Episode = 246
t = 176
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.851734
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 117


Episode = 246
t = 177
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.851404
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 117


Episode = 246
t = 178
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.851983
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 117


Episode = 246
t = 179
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.846283
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 117


Episode = 246
t = 180
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.852225
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 117


Episode = 246
t = 181
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.852225
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 117


Episode = 246
t = 182
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.852372
Explore rate: 0.01


Episode = 247
t = 37
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.852569
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 118


Episode = 247
t = 38
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.853764
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 118


Episode = 247
t = 39
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.852835
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 118


Episode = 247
t = 40
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.853112
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 118


Episode = 247
t = 41
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.853112
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 118


Episode = 247
t = 42
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.853259
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 118


Episode = 247
t = 43
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.852615
Explore rate: 0.010000
Le


Episode = 247
t = 99
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.853795
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 118


Episode = 247
t = 100
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.854373
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 118


Episode = 247
t = 101
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.853998
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 118


Episode = 247
t = 102
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.854481
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 118


Episode = 247
t = 103
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.850958
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 118


Episode = 247
t = 104
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.854710
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 118


Episode = 247
t = 105
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.854710
Explore rate: 0.010


Episode = 247
t = 161
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.856284
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 118


Episode = 247
t = 162
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.856428
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 118


Episode = 247
t = 163
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.855883
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 118


Episode = 247
t = 164
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.856647
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 118


Episode = 247
t = 165
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.856103
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 118


Episode = 247
t = 166
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.855671
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 118


Episode = 247
t = 167
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.855671
Explore rate: 0.01

Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.856871
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 119


Episode = 248
t = 21
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.849398
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 119


Episode = 248
t = 22
Action: 1
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.849398
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 119


Episode = 248
t = 23
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.857202
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 119


Episode = 248
t = 24
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.857202
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 119


Episode = 248
t = 25
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.857345
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 119


Episode = 248
t = 26
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.856700
Explore rate: 0.010000
Learning rate: 0.100000


Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 119


Episode = 248
t = 79
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.858212
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 119


Episode = 248
t = 80
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.858175
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 119


Episode = 248
t = 81
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.858478
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 119


Episode = 248
t = 82
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.858347
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 119


Episode = 248
t = 83
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.858623
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 119


Episode = 248
t = 84
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.858623
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 119


Episode = 248
t = 85
Action: 1
State: (0, 0, 3, 0)

Learning rate: 0.100000
Streaks: 119


Episode = 248
t = 138
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.859589
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 119


Episode = 248
t = 139
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.859362
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 119


Episode = 248
t = 140
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.859362
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 119


Episode = 248
t = 141
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.859503
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 119


Episode = 248
t = 142
Action: 1
State: (0, 0, 2, 0)
Reward: 1.000000
Best Q: 99.856996
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 119


Episode = 248
t = 143
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.860150
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 119


Episode = 248
t = 144
Action: 0
State: (0, 0, 2, 1)
Reward: 1.00000

Learning rate: 0.100000
Streaks: 119


Episode = 248
t = 196
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.861130
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 119


Episode = 248
t = 197
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.861130
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 119


Episode = 248
t = 198
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.860226
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 119


Episode = 248
t = 199
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.861378
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 119

Episode 248 finished after 199.000000 time steps

Episode = 249
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.861304
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 120


Episode = 249
t = 1
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.860480
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 120


Episode = 249
t = 2
Ac

Learning rate: 0.100000
Streaks: 120


Episode = 249
t = 57
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.862412
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 120


Episode = 249
t = 58
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.863553
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 120


Episode = 249
t = 59
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.862662
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 120


Episode = 249
t = 60
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.862321
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 120


Episode = 249
t = 61
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.862321
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 120


Episode = 249
t = 62
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.862459
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 120


Episode = 249
t = 63
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best 


Episode = 249
t = 117
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.858551
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 120


Episode = 249
t = 118
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.864804
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 120


Episode = 249
t = 119
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.864804
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 120


Episode = 249
t = 120
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.864939
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 120


Episode = 249
t = 121
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.865074
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 120


Episode = 249
t = 122
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.864018
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 120


Episode = 249
t = 123
Action: 0
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.865226
Explore rate: 0.01


Episode = 249
t = 178
Action: 0
State: (0, 0, 2, 2)
Reward: 1.000000
Best Q: 99.864465
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 120


Episode = 249
t = 179
Action: 1
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.865134
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 120


Episode = 249
t = 180
Action: 0
State: (0, 0, 3, 2)
Reward: 1.000000
Best Q: 99.860661
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 120


Episode = 249
t = 181
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.865761
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 120


Episode = 249
t = 182
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.865761
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 120


Episode = 249
t = 183
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.865895
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 120


Episode = 249
t = 184
Action: 1
State: (0, 0, 3, 0)
Reward: 1.000000
Best Q: 99.865044
Explore rate: 0.01

Streaks: 121


Episode = 288
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.866855
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 289
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.866988
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 290
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.867121
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 291
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.867254
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 292
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.869672
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 293
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.867387
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 294
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.869802
Explore rate: 0.01

Streaks: 121


Episode = 348
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.871694
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 349
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.871822
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 350
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.871950
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 351
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.872078
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 352
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.872637
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 353
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.872764
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 354
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.872891
Explore rate: 0.01


Episode = 407
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.876122
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 408
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.876245
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 409
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.876369
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 410
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.876493
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 411
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.876616
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 412
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.875691
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 413
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.875815
Explore rate: 0.010000
Learning 


Episode = 468
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.879607
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 469
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.879664
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 470
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.879785
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 471
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.879728
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 472
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.879905
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 473
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.880025
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 474
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.880145
Explore rate: 0.010000
Learning 


Episode = 527
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.883284
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 528
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.882988
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 529
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.883401
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 530
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.883517
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 531
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.883105
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 532
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.883634
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 533
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.883222
Explore rate: 0.010000
Learning 


Episode = 586
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.886335
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 587
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.886448
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 588
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.886562
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 589
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.886675
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 590
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.886789
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 591
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.886902
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 592
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.886735
Explore rate: 0.010000
Learning 


Episode = 646
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.889533
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 647
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.890246
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 648
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.890356
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 649
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.890466
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 650
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.890575
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 651
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.889644
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 652
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.890684
Explore rate: 0.010000
Learning 


Episode = 707
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.893917
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 708
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.892370
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 709
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.894023
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 710
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.894129
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 711
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.892477
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 712
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.894235
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 713
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.894341
Explore rate: 0.010000
Learning 


Episode = 768
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.897670
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 769
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.897772
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 770
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.897874
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 771
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.895028
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 772
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.895133
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 773
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.897976
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 774
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.898078
Explore rate: 0.010000
Learning 


Episode = 827
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.898438
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 828
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.900123
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 829
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.898539
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 830
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.900223
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 831
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.898641
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 832
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.898742
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 833
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.898843
Explore rate: 0.010000
Learning 


Episode = 888
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.903270
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 889
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.903367
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 890
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.903463
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 891
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.901243
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 892
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.901342
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 893
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.901441
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 894
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.901539
Explore rate: 0.010000
Learning 


Episode = 948
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.904546
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 949
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.905754
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 950
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.905848
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 951
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.905942
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 952
t = 0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.000000
Best Q: 99.904642
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 953
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.906036
Explore rate: 0.010000
Learning rate: 0.100000
Streaks: 121


Episode = 954
t = 0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.000000
Best Q: 99.906130
Explore rate: 0.010000
Learning 

In [26]:
env.close()

In [27]:
print(q_table)

[[[[[ 0.          0.        ]
    [ 0.          0.        ]
    [ 0.          0.        ]]

   [[42.87897538 35.44152698]
    [42.93972189 43.80824446]
    [33.52650566 35.1392959 ]]

   [[99.86048061 99.19218741]
    [99.9071832  99.80427399]
    [99.67535042 99.86494091]]

   [[99.86541011 99.65018973]
    [99.79847777 99.90799   ]
    [99.24902526 99.86130561]]

   [[ 0.          5.08679488]
    [25.31649955  7.30859622]
    [28.81056622 28.73548103]]

   [[ 0.          0.        ]
    [ 0.          0.        ]
    [ 0.          0.        ]]]]]
