# Final Project - Reinforcements Learning 
Hello dear students,<br> this is the template notebook. Please click on the "File" tab and then on "Save a copy into drive".

---
<br>

### Name and ID:
Student 1: Avraham Raviv, 204355390
<br>
Student 2: Yevgeni Berkovitch, 317079234
<br><br>
<img src="https://play-lh.googleusercontent.com/e_oKlKPISbgdzut1H9opevS7-LTB8-8lsmpCdMkhlnqFenZhpjxbLmx7l158-xQQCIY">

### https://github.com/mpSchrader/gym-sokoban

# Installs

In [1]:
%%capture
!sudo apt-get update
!sudo apt-get install -y xvfb ffmpeg freeglut3-dev
!pip install 'imageio==2.4.0'
!pip install gym
!pip install pygame
!apt-get install python-opengl -y
!apt install xvfb -y
!pip install pyvirtualdisplay
!pip install piglet
!pip install gym
!apt-get install python-opengl -y
!apt install xvfb -y
!pip install gym_sokoban

!imageio_download_bin ffmpeg

# Imports

In [2]:
import random
import time

import numpy as np
import scipy as scp
import matplotlib.pyplot as plt

import base64
import imageio
from pyvirtualdisplay import Display
from IPython.display import HTML

import gym
from gym import error, spaces, utils
from soko_pap import *

from collections import deque
from queue import PriorityQueue

from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten

from tqdm.notebook import tqdm
from collections import defaultdict

In [3]:
%matplotlib inline

In [4]:
imageio.plugins.ffmpeg.download()

In [5]:
from gym import logger as gymlogger
gymlogger.set_level(40) # error only

# Display utils
The cell below contains the video display configuration. No need to make changes here.

In [6]:
def embed_mp4(filename):
    """Embeds an mp4 file in the notebook."""
    video = open(filename,'rb').read()
    b64 = base64.b64encode(video)
    tag = '''
    <video width="640" height="480" controls>
    <source src="data:video/mp4;base64,{0}" type="video/mp4">
    Your browser does not support the video tag.
    </video>'''.format(b64.decode())

    return HTML(tag)

# Utils

In [7]:
def get_distances(room_state):
    for i in range(room_state.shape[0]):
        for j in range(room_state.shape[1]):
            if room_state[i][j] == 2:
                target = (i, j)

    distances = np.zeros(shape=room_state.shape)
    visited_cells = set()
    cell_queue = deque()

    visited_cells.add(target)
    cell_queue.appendleft(target)

    while len(cell_queue) != 0:
        cell = cell_queue.pop()
        distance = distances[cell[0]][cell[1]]
        for x,y in ((1,0), (-1,-0), (0,1), (0,-1)):
            next_cell_x, next_cell_y = cell[0]+x, cell[1]+y
            if room_state[next_cell_x][next_cell_y] != 0 and not (next_cell_x, next_cell_y) in visited_cells:
                distances[next_cell_x][next_cell_y] = distance + 1
                visited_cells.add((next_cell_x, next_cell_y))
                cell_queue.appendleft((next_cell_x, next_cell_y))
                
    return distances   

def calc_distances(room_state, distances):
    box = None
    mover = None
    for i in range(room_state.shape[0]):
        for j in range(room_state.shape[1]):            
            if room_state[i][j] == 4:
                box = (i,j)
            
            if room_state[i][j] == 5:
                mover = (i,j)
    
    return mover, box, distances[box[0]][box[1]]   

def box2target_change_reward(room_state, next_room_state, distances):
    if np.array_equal(room_state, next_room_state):
        return -1.0
    
    mover, box, t2b = calc_distances(room_state, distances)
    n_mover, n_box, n_t2b = calc_distances(next_room_state, distances)
    
    change_reward = 0.0
    if n_t2b < t2b:
        change_reward += 5.0
    elif n_t2b > t2b:
        change_reward -= 5.0
        
    m2b = np.sqrt((mover[0]-box[0])**2 + (mover[1]-box[1])**2)
    n_m2b = np.sqrt((n_mover[0]-n_box[0])**2 + (n_mover[1]-n_box[1])**2)
    
    if n_m2b < m2b and m2b >= 2:
        change_reward += 1.0
    elif n_m2b > m2b and n_m2b >= 2:
        change_reward -= 1.0
        
    return change_reward   

# Solution

In [8]:
class SOK_Agent:
    def __init__(self):
        # Construct DQN models
        self.state_size = (7,7,1) 
        self.action_size = 8
        self.model = self._build_model()
        self.target_model = self._build_model()
        self.target_model.set_weights(self.model.get_weights())
        self.batch_size = 8
        
        # Replay buffers
        self.replay_buffer = deque(maxlen=5000)
        self.prioritized_replay_buffer = deque(maxlen=500)
        
        # Hyperparameters
        self.gamma = 0.9
        self.epsilon = 1.0   
        self.epsilon_min = 0.3
        self.epsilon_decay = 0.995
        self.replay_rate = 10
        self.update_beta = 0.999

        self.verbosity = 100 

    def _build_model(self):
        model = Sequential()
        model.add(Conv2D(64, (3, 3), input_shape=self.state_size, activation='relu'))
        model.add(Flatten())
        model.add(Dense(512, activation='relu'))       
        model.add(Dense(64, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer="adam")        
        return model

    def remember(self, state, action, reward, next_state, done):
        self.replay_buffer.append([state, action, reward, next_state, done])    
        
    def copy_to_prioritized_buffer(self, n):
        for i in range(n):
            self.prioritized_replay_buffer.append(self.replay_buffer[-1-i])  

    def act(self, state, stochastic=False):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        
        act_values = self.model.predict(state, verbose=0)[0]
        
        if stochastic:
            act_probs = np.exp(act_values)/np.exp(act_values).sum()
            return np.random.choice(np.arange(self.action_size), size=1, p=act_probs)[0]
              
        return np.argmax(act_values) 

    def replay(self): 
        if len(self.replay_buffer) < self.batch_size:
            return
        
        if len(self.prioritized_replay_buffer) < self.batch_size//2:
            minibatch = random.sample(self.replay_buffer, self.batch_size) 
        else:    
            minibatch = random.sample(self.replay_buffer, self.batch_size//2) 
            minibatch.extend(random.sample(self.prioritized_replay_buffer, self.batch_size//2))
        
        states = np.zeros((self.batch_size, self.state_size[0], self.state_size[1]))
        actions = np.zeros(self.batch_size, dtype=int)
        rewards = np.zeros(self.batch_size)
        next_states = np.zeros((self.batch_size, self.state_size[0], self.state_size[1]))
        statuses = np.zeros(self.batch_size)
        targets = np.zeros((self.batch_size, self.action_size)) 
        
        for i, (state, action, reward, next_state, done) in enumerate(minibatch): 
            states[i] = state.copy()
            actions[i] = action
            rewards[i] = reward
            next_states[i] = next_state.copy()
            statuses[i] = 1 if done else 0    
        
        targets = self.model.predict(states) 
        max_actions = np.argmax(self.model.predict(next_states), axis=1)
        next_rewards = self.target_model.predict(next_states)
        
        ind = 0
        for action, reward, next_reward, max_action, done in zip(actions, rewards, next_rewards, max_actions, statuses):  
            if not done:
                reward += self.gamma * next_reward[max_action]
            targets[ind][action] = reward
            ind += 1
        
        self.model.fit(states, targets, epochs=10, verbose=0) 
        
        self.update_target_model()        
    
        if self.epsilon > self.epsilon_min:
            self.epsilon = self.epsilon * self.epsilon_decay
        
    def update_target_model(self):
        model_w = self.model.get_weights()
        target_model_w = self.target_model.get_weights()
        updated_target_model_w = []
        for i in range(len(model_w)):
            updated_target_model_w.append(self.update_beta*target_model_w[i] + (1-self.update_beta)*model_w[i])
        self.target_model.set_weights(updated_target_model_w)    
            
    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)

In [9]:
def process_frame(frame):
    f = frame[:, :, 0]   
    f = f.reshape(7, 16, 7, 16).max(axis=(1, 3))
    #f = f.flatten()
    f = f / 255
    return np.expand_dims(f, axis=0)

## Training

#### Test Suite

In [10]:
def test_agent(stochastic=False):
    current_epsilon = agent.epsilon
    agent.epsilon = 0.0
    num_solved = 0
    solved_in_steps = defaultdict(int)

    for t in tqdm(range(100)):    
        random.seed(t)
        sok = PushAndPullSokobanEnv(dim_room=(7, 7), num_boxes=1)
        sok.set_maxsteps(20)
        steps = 0

        state = sok.get_image('rgb_array')
        done = False
        while not done:
            steps += 1
            action = agent.act(process_frame(state), stochastic)
            if action < 4:
                action += 1
            else:
                action += 5
            state, reward, done, info = sok.step(action)

        if 3 in sok.room_state:            
            num_solved += 1
            solved_in_steps[steps] += 1
    
    agent.epsilon = current_epsilon
    print("*" * 30)
    print("Stochastic" if stochastic else "Deterministic")
    print("*" * 30)
    print("Solved: %d" % num_solved)
    print("=" * 30)
    print(solved_in_steps)
    print("*" * 30)

In [11]:
max_episodes = 50000
max_steps = 25

def init_sok(r):
    random.seed(r+100)
    sok = PushAndPullSokobanEnv(dim_room=(7, 7), num_boxes=1)
    sok.set_maxsteps(max_steps)
    return sok

In [None]:
agent = SOK_Agent()

steps_per_episode = []

for e in range(max_episodes):
    if e % 100 == 0 and e > 0:
        test_agent(stochastic=False)
        
    print("Episode: %d" % (e))
    
    sok = init_sok(e)
    random.seed(e)
    
    state = process_frame(sok.get_image('rgb_array'))
    room_state = sok.room_state.copy() 
    distances = get_distances(room_state)
    
    for step in range(sok.max_steps):
        action = agent.act(state)
        if action < 4:
            next_state, reward, done, _ = sok.step(action+1) 
        else:
            next_state, reward, done, _ = sok.step(action+5)         
        
        next_state = process_frame(next_state)        
        next_room_state = sok.room_state
        
        if not done:
            reward += box2target_change_reward(room_state, next_room_state, distances)
        
        agent.remember(state, action, reward, next_state, done)
        
        state = next_state.copy() 
        room_state = next_room_state.copy()                
        
        if (step+1) % agent.replay_rate == 0:
            agent.replay()            
        
        if done:   
            steps_per_episode.append(step+1)
            
            if 3 in sok.room_state:                
                print("SOLVED! Episode %d Steps: %d Epsilon %.4f" % (e, step+1, agent.epsilon)) 
                agent.copy_to_prioritized_buffer(step+1)
            
            #agent.save("exp1_episode%d.h5" % (e))            
            break

Episode: 0
Episode: 1
Episode: 2
Episode: 3
Episode: 4
Episode: 5
Episode: 6
Episode: 7
SOLVED! Episode 7 Steps: 6 Epsilon 0.9322
Episode: 8
Episode: 9
Episode: 10
SOLVED! Episode 10 Steps: 12 Epsilon 0.9092
Episode: 11
Episode: 12
Episode: 13
SOLVED! Episode 13 Steps: 7 Epsilon 0.8911
Episode: 14
Episode: 15
Episode: 16
SOLVED! Episode 16 Steps: 19 Epsilon 0.8691
Episode: 17
Episode: 18
Episode: 19
Episode: 20
Episode: 21
Episode: 22
SOLVED! Episode 22 Steps: 13 Epsilon 0.8224
Episode: 23
Episode: 24
Episode: 25
Episode: 26
SOLVED! Episode 26 Steps: 4 Epsilon 0.7981
Episode: 27
Episode: 28
Episode: 29
Episode: 30
Episode: 31
Episode: 32
Episode: 33
Episode: 34
Episode: 35
Episode: 36
Episode: 37
Episode: 38
Episode: 39
Episode: 40
Episode: 41
SOLVED! Episode 41 Steps: 3 Epsilon 0.6936
Episode: 42
Episode: 43
Episode: 44
Episode: 45
Episode: 46
SOLVED! Episode 46 Steps: 3 Epsilon 0.6663
Episode: 47
SOLVED! Episode 47 Steps: 3 Epsilon 0.6663
Episode: 48
SOLVED! Episode 48 Steps: 1 Epsil

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 24
defaultdict(<class 'int'>, {3: 9, 1: 10, 2: 5})
******************************
Episode: 100
Episode: 101
Episode: 102
SOLVED! Episode 102 Steps: 18 Epsilon 0.4244
Episode: 103
SOLVED! Episode 103 Steps: 5 Epsilon 0.4244
Episode: 104
Episode: 105
Episode: 106
Episode: 107
SOLVED! Episode 107 Steps: 3 Epsilon 0.4118
Episode: 108
Episode: 109
Episode: 110
Episode: 111
Episode: 112
Episode: 113
Episode: 114
SOLVED! Episode 114 Steps: 1 Epsilon 0.3878
Episode: 115
Episode: 116
Episode: 117
Episode: 118
SOLVED! Episode 118 Steps: 1 Epsilon 0.3763
Episode: 119
Episode: 120
Episode: 121
Episode: 122
Episode: 123
Episode: 124
SOLVED! Episode 124 Steps: 3 Epsilon 0.3579
Episode: 125
Episode: 126
Episode: 127
Episode: 128
SOLVED! Episode 128 Steps: 2 Epsilon 0.3473
Episode: 129
Episode: 130
Episode: 131
Episode: 132
Episode: 133
SOLVED! Episode 133 Steps: 23 Epsilon 0.3303
Episode: 134
Episode: 135
SOLVED! Epi

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 28
defaultdict(<class 'int'>, {3: 13, 1: 9, 2: 6})
******************************
Episode: 200
SOLVED! Episode 200 Steps: 15 Epsilon 0.2988
Episode: 201
Episode: 202
Episode: 203
Episode: 204
Episode: 205
Episode: 206
Episode: 207
Episode: 208
Episode: 209
SOLVED! Episode 209 Steps: 9 Epsilon 0.2988
Episode: 210
Episode: 211
Episode: 212
Episode: 213
SOLVED! Episode 213 Steps: 3 Epsilon 0.2988
Episode: 214
SOLVED! Episode 214 Steps: 3 Epsilon 0.2988
Episode: 215
Episode: 216
SOLVED! Episode 216 Steps: 3 Epsilon 0.2988
Episode: 217
Episode: 218
SOLVED! Episode 218 Steps: 2 Epsilon 0.2988
Episode: 219
Episode: 220
SOLVED! Episode 220 Steps: 6 Epsilon 0.2988
Episode: 221
SOLVED! Episode 221 Steps: 1 Epsilon 0.2988
Episode: 222
Episode: 223
SOLVED! Episode 223 Steps: 1 Epsilon 0.2988
Episode: 224
Episode: 225
Episode: 226
Episode: 227
Episode: 228
Episode: 229
Episode: 230
SOLVED! Episode 230 Steps: 23 Eps

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 30
defaultdict(<class 'int'>, {3: 16, 2: 4, 1: 10})
******************************
Episode: 300
SOLVED! Episode 300 Steps: 1 Epsilon 0.2988
Episode: 301
Episode: 302
Episode: 303
Episode: 304
Episode: 305
SOLVED! Episode 305 Steps: 2 Epsilon 0.2988
Episode: 306
Episode: 307
Episode: 308
Episode: 309
Episode: 310
Episode: 311
Episode: 312
Episode: 313
Episode: 314
Episode: 315
SOLVED! Episode 315 Steps: 3 Epsilon 0.2988
Episode: 316
Episode: 317
Episode: 318
Episode: 319
SOLVED! Episode 319 Steps: 3 Epsilon 0.2988
Episode: 320
Episode: 321
Episode: 322
SOLVED! Episode 322 Steps: 1 Epsilon 0.2988
Episode: 323
SOLVED! Episode 323 Steps: 3 Epsilon 0.2988
Episode: 324
Episode: 325
Episode: 326
Episode: 327
SOLVED! Episode 327 Steps: 2 Epsilon 0.2988
Episode: 328
Episode: 329
Episode: 330
SOLVED! Episode 330 Steps: 2 Epsilon 0.2988
Episode: 331
Episode: 332
Episode: 333
Episode: 334
SOLVED! Episode 334 Steps

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 32
defaultdict(<class 'int'>, {3: 16, 2: 6, 1: 10})
******************************
Episode: 400
Episode: 401
Episode: 402
Episode: 403
Episode: 404
SOLVED! Episode 404 Steps: 3 Epsilon 0.2988
Episode: 405
SOLVED! Episode 405 Steps: 1 Epsilon 0.2988
Episode: 406
Episode: 407
Episode: 408
Episode: 409
Episode: 410
Episode: 411
Episode: 412
Episode: 413
SOLVED! Episode 413 Steps: 3 Epsilon 0.2988
Episode: 414
Episode: 415
Episode: 416
SOLVED! Episode 416 Steps: 3 Epsilon 0.2988
Episode: 417
Episode: 418
SOLVED! Episode 418 Steps: 3 Epsilon 0.2988
Episode: 419
Episode: 420
Episode: 421
Episode: 422
Episode: 423
Episode: 424
Episode: 425
Episode: 426
Episode: 427
SOLVED! Episode 427 Steps: 3 Epsilon 0.2988
Episode: 428
Episode: 429
SOLVED! Episode 429 Steps: 6 Epsilon 0.2988
Episode: 430
Episode: 431
Episode: 432
Episode: 433
Episode: 434
Episode: 435
SOLVED! Episode 435 Steps: 11 Epsilon 0.2988
Episode: 43

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 33
defaultdict(<class 'int'>, {3: 18, 1: 11, 2: 4})
******************************
Episode: 500
SOLVED! Episode 500 Steps: 2 Epsilon 0.2988
Episode: 501
Episode: 502
Episode: 503
SOLVED! Episode 503 Steps: 3 Epsilon 0.2988
Episode: 504
Episode: 505
SOLVED! Episode 505 Steps: 3 Epsilon 0.2988
Episode: 506
Episode: 507
Episode: 508
Episode: 509
Episode: 510
Episode: 511
SOLVED! Episode 511 Steps: 2 Epsilon 0.2988
Episode: 512
SOLVED! Episode 512 Steps: 6 Epsilon 0.2988
Episode: 513
Episode: 514
Episode: 515
Episode: 516
Episode: 517
SOLVED! Episode 517 Steps: 1 Epsilon 0.2988
Episode: 518
Episode: 519
SOLVED! Episode 519 Steps: 2 Epsilon 0.2988
Episode: 520
SOLVED! Episode 520 Steps: 1 Epsilon 0.2988
Episode: 521
Episode: 522
SOLVED! Episode 522 Steps: 3 Epsilon 0.2988
Episode: 523
SOLVED! Episode 523 Steps: 3 Epsilon 0.2988
Episode: 524
SOLVED! Episode 524 Steps: 3 Epsilon 0.2988
Episode: 525
Episode: 5

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 36
defaultdict(<class 'int'>, {3: 17, 1: 13, 2: 6})
******************************
Episode: 600
SOLVED! Episode 600 Steps: 3 Epsilon 0.2988
Episode: 601
Episode: 602
Episode: 603
SOLVED! Episode 603 Steps: 2 Epsilon 0.2988
Episode: 604
Episode: 605
Episode: 606
Episode: 607
SOLVED! Episode 607 Steps: 1 Epsilon 0.2988
Episode: 608
SOLVED! Episode 608 Steps: 1 Epsilon 0.2988
Episode: 609
Episode: 610
Episode: 611
SOLVED! Episode 611 Steps: 15 Epsilon 0.2988
Episode: 612
Episode: 613
SOLVED! Episode 613 Steps: 1 Epsilon 0.2988
Episode: 614
Episode: 615
SOLVED! Episode 615 Steps: 2 Epsilon 0.2988
Episode: 616
Episode: 617
Episode: 618
Episode: 619
Episode: 620
SOLVED! Episode 620 Steps: 3 Epsilon 0.2988
Episode: 621
Episode: 622
Episode: 623
Episode: 624
Episode: 625
SOLVED! Episode 625 Steps: 2 Epsilon 0.2988
Episode: 626
Episode: 627
Episode: 628
SOLVED! Episode 628 Steps: 3 Epsilon 0.2988
Episode: 629
E

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 35
defaultdict(<class 'int'>, {3: 18, 2: 6, 1: 11})
******************************
Episode: 700
Episode: 701
Episode: 702
SOLVED! Episode 702 Steps: 1 Epsilon 0.2988
Episode: 703
Episode: 704
Episode: 705
SOLVED! Episode 705 Steps: 1 Epsilon 0.2988
Episode: 706
Episode: 707
Episode: 708
Episode: 709
Episode: 710
SOLVED! Episode 710 Steps: 1 Epsilon 0.2988
Episode: 711
SOLVED! Episode 711 Steps: 3 Epsilon 0.2988
Episode: 712
Episode: 713
Episode: 714
Episode: 715
Episode: 716
Episode: 717
Episode: 718
SOLVED! Episode 718 Steps: 3 Epsilon 0.2988
Episode: 719
Episode: 720
Episode: 721
SOLVED! Episode 721 Steps: 2 Epsilon 0.2988
Episode: 722
Episode: 723
Episode: 724
Episode: 725
SOLVED! Episode 725 Steps: 3 Epsilon 0.2988
Episode: 726
Episode: 727
Episode: 728
SOLVED! Episode 728 Steps: 1 Epsilon 0.2988
Episode: 729
Episode: 730
Episode: 731
Episode: 732
SOLVED! Episode 732 Steps: 3 Epsilon 0.2988
Episode

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 33
defaultdict(<class 'int'>, {3: 14, 1: 14, 2: 5})
******************************
Episode: 800
SOLVED! Episode 800 Steps: 2 Epsilon 0.2988
Episode: 801
Episode: 802
Episode: 803
SOLVED! Episode 803 Steps: 23 Epsilon 0.2988
Episode: 804
Episode: 805
SOLVED! Episode 805 Steps: 3 Epsilon 0.2988
Episode: 806
SOLVED! Episode 806 Steps: 3 Epsilon 0.2988
Episode: 807
Episode: 808
Episode: 809
Episode: 810
SOLVED! Episode 810 Steps: 3 Epsilon 0.2988
Episode: 811
Episode: 812
Episode: 813
Episode: 814
Episode: 815
Episode: 816
Episode: 817
Episode: 818
Episode: 819
Episode: 820
Episode: 821
Episode: 822
Episode: 823
SOLVED! Episode 823 Steps: 2 Epsilon 0.2988
Episode: 824
Episode: 825
Episode: 826
Episode: 827
SOLVED! Episode 827 Steps: 3 Epsilon 0.2988
Episode: 828
SOLVED! Episode 828 Steps: 2 Epsilon 0.2988
Episode: 829
Episode: 830
Episode: 831
Episode: 832
SOLVED! Episode 832 Steps: 1 Epsilon 0.2988
Episod

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 41
defaultdict(<class 'int'>, {3: 21, 1: 13, 2: 7})
******************************
Episode: 900
Episode: 901
Episode: 902
SOLVED! Episode 902 Steps: 3 Epsilon 0.2988
Episode: 903
Episode: 904
Episode: 905
Episode: 906
Episode: 907
Episode: 908
Episode: 909
SOLVED! Episode 909 Steps: 1 Epsilon 0.2988
Episode: 910
Episode: 911
Episode: 912
Episode: 913
Episode: 914
Episode: 915
Episode: 916
Episode: 917
Episode: 918
Episode: 919
Episode: 920
SOLVED! Episode 920 Steps: 2 Epsilon 0.2988
Episode: 921
SOLVED! Episode 921 Steps: 3 Epsilon 0.2988
Episode: 922
SOLVED! Episode 922 Steps: 3 Epsilon 0.2988
Episode: 923
Episode: 924
Episode: 925
Episode: 926
SOLVED! Episode 926 Steps: 1 Epsilon 0.2988
Episode: 927
Episode: 928
Episode: 929
Episode: 930
Episode: 931
Episode: 932
Episode: 933
Episode: 934
SOLVED! Episode 934 Steps: 2 Epsilon 0.2988
Episode: 935
Episode: 936
Episode: 937
SOLVED! Episode 937 Steps: 2 E

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 40
defaultdict(<class 'int'>, {3: 21, 1: 13, 2: 6})
******************************
Episode: 1000
SOLVED! Episode 1000 Steps: 3 Epsilon 0.2988
Episode: 1001
SOLVED! Episode 1001 Steps: 3 Epsilon 0.2988
Episode: 1002
Episode: 1003
Episode: 1004
Episode: 1005
SOLVED! Episode 1005 Steps: 2 Epsilon 0.2988
Episode: 1006
Episode: 1007
Episode: 1008
Episode: 1009
Episode: 1010
SOLVED! Episode 1010 Steps: 2 Epsilon 0.2988
Episode: 1011
Episode: 1012
Episode: 1013
SOLVED! Episode 1013 Steps: 3 Epsilon 0.2988
Episode: 1014
Episode: 1015
Episode: 1016
SOLVED! Episode 1016 Steps: 3 Epsilon 0.2988
Episode: 1017
SOLVED! Episode 1017 Steps: 2 Epsilon 0.2988
Episode: 1018
Episode: 1019
Episode: 1020
Episode: 1021
Episode: 1022
Episode: 1023
Episode: 1024
SOLVED! Episode 1024 Steps: 1 Epsilon 0.2988
Episode: 1025
Episode: 1026
Episode: 1027
Episode: 1028
SOLVED! Episode 1028 Steps: 22 Epsilon 0.2988
Episode: 1029
SOLVED

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 39
defaultdict(<class 'int'>, {3: 19, 1: 13, 2: 7})
******************************
Episode: 1100
Episode: 1101
SOLVED! Episode 1101 Steps: 1 Epsilon 0.2988
Episode: 1102
Episode: 1103
SOLVED! Episode 1103 Steps: 2 Epsilon 0.2988
Episode: 1104
Episode: 1105
SOLVED! Episode 1105 Steps: 3 Epsilon 0.2988
Episode: 1106
Episode: 1107
Episode: 1108
SOLVED! Episode 1108 Steps: 3 Epsilon 0.2988
Episode: 1109
Episode: 1110
SOLVED! Episode 1110 Steps: 3 Epsilon 0.2988
Episode: 1111
Episode: 1112
SOLVED! Episode 1112 Steps: 3 Epsilon 0.2988
Episode: 1113
Episode: 1114
Episode: 1115
Episode: 1116
Episode: 1117
Episode: 1118
SOLVED! Episode 1118 Steps: 1 Epsilon 0.2988
Episode: 1119
SOLVED! Episode 1119 Steps: 3 Epsilon 0.2988
Episode: 1120
SOLVED! Episode 1120 Steps: 1 Epsilon 0.2988
Episode: 1121
SOLVED! Episode 1121 Steps: 1 Epsilon 0.2988
Episode: 1122
Episode: 1123
Episode: 1124
Episode: 1125
Episode: 1126
Epis

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 38
defaultdict(<class 'int'>, {3: 18, 2: 8, 1: 12})
******************************
Episode: 1200
Episode: 1201
Episode: 1202
Episode: 1203
Episode: 1204
Episode: 1205
SOLVED! Episode 1205 Steps: 1 Epsilon 0.2988
Episode: 1206
Episode: 1207
Episode: 1208
Episode: 1209
Episode: 1210
SOLVED! Episode 1210 Steps: 3 Epsilon 0.2988
Episode: 1211
SOLVED! Episode 1211 Steps: 3 Epsilon 0.2988
Episode: 1212
Episode: 1213
Episode: 1214
Episode: 1215
Episode: 1216
Episode: 1217
Episode: 1218
SOLVED! Episode 1218 Steps: 7 Epsilon 0.2988
Episode: 1219
SOLVED! Episode 1219 Steps: 13 Epsilon 0.2988
Episode: 1220
Episode: 1221
SOLVED! Episode 1221 Steps: 2 Epsilon 0.2988
Episode: 1222
Episode: 1223
Episode: 1224
Episode: 1225
SOLVED! Episode 1225 Steps: 1 Epsilon 0.2988
Episode: 1226
Episode: 1227
Episode: 1228
SOLVED! Episode 1228 Steps: 1 Epsilon 0.2988
Episode: 1229
Episode: 1230
SOLVED! Episode 1230 Steps: 2 Epsilon

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 39
defaultdict(<class 'int'>, {3: 19, 1: 12, 2: 8})
******************************
Episode: 1300
Episode: 1301
Episode: 1302
Episode: 1303
Episode: 1304
SOLVED! Episode 1304 Steps: 1 Epsilon 0.2988
Episode: 1305
Episode: 1306
SOLVED! Episode 1306 Steps: 3 Epsilon 0.2988
Episode: 1307
Episode: 1308
Episode: 1309
Episode: 1310
SOLVED! Episode 1310 Steps: 3 Epsilon 0.2988
Episode: 1311
Episode: 1312
Episode: 1313
SOLVED! Episode 1313 Steps: 1 Epsilon 0.2988
Episode: 1314
Episode: 1315
Episode: 1316
Episode: 1317
Episode: 1318
Episode: 1319
SOLVED! Episode 1319 Steps: 2 Epsilon 0.2988
Episode: 1320
Episode: 1321
Episode: 1322
SOLVED! Episode 1322 Steps: 3 Epsilon 0.2988
Episode: 1323
Episode: 1324
Episode: 1325
SOLVED! Episode 1325 Steps: 2 Epsilon 0.2988
Episode: 1326
Episode: 1327
SOLVED! Episode 1327 Steps: 3 Epsilon 0.2988
Episode: 1328
Episode: 1329
Episode: 1330
SOLVED! Episode 1330 Steps: 3 Epsilon 

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 44
defaultdict(<class 'int'>, {3: 20, 1: 16, 2: 8})
******************************
Episode: 1400
SOLVED! Episode 1400 Steps: 1 Epsilon 0.2988
Episode: 1401
SOLVED! Episode 1401 Steps: 1 Epsilon 0.2988
Episode: 1402
SOLVED! Episode 1402 Steps: 3 Epsilon 0.2988
Episode: 1403
SOLVED! Episode 1403 Steps: 2 Epsilon 0.2988
Episode: 1404
Episode: 1405
SOLVED! Episode 1405 Steps: 3 Epsilon 0.2988
Episode: 1406
Episode: 1407
Episode: 1408
Episode: 1409
Episode: 1410
Episode: 1411
Episode: 1412
SOLVED! Episode 1412 Steps: 1 Epsilon 0.2988
Episode: 1413
Episode: 1414
SOLVED! Episode 1414 Steps: 2 Epsilon 0.2988
Episode: 1415
SOLVED! Episode 1415 Steps: 2 Epsilon 0.2988
Episode: 1416
Episode: 1417
SOLVED! Episode 1417 Steps: 2 Epsilon 0.2988
Episode: 1418
Episode: 1419
Episode: 1420
Episode: 1421
SOLVED! Episode 1421 Steps: 2 Epsilon 0.2988
Episode: 1422
Episode: 1423
SOLVED! Episode 1423 Steps: 1 Epsilon 0.2988
E

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 42
defaultdict(<class 'int'>, {3: 20, 1: 14, 2: 8})
******************************
Episode: 1500
SOLVED! Episode 1500 Steps: 3 Epsilon 0.2988
Episode: 1501
Episode: 1502
Episode: 1503
Episode: 1504
SOLVED! Episode 1504 Steps: 2 Epsilon 0.2988
Episode: 1505
SOLVED! Episode 1505 Steps: 3 Epsilon 0.2988
Episode: 1506
Episode: 1507
Episode: 1508
SOLVED! Episode 1508 Steps: 3 Epsilon 0.2988
Episode: 1509
Episode: 1510
Episode: 1511
Episode: 1512
Episode: 1513
SOLVED! Episode 1513 Steps: 2 Epsilon 0.2988
Episode: 1514
SOLVED! Episode 1514 Steps: 1 Epsilon 0.2988
Episode: 1515
Episode: 1516
Episode: 1517
SOLVED! Episode 1517 Steps: 3 Epsilon 0.2988
Episode: 1518
SOLVED! Episode 1518 Steps: 1 Epsilon 0.2988
Episode: 1519
SOLVED! Episode 1519 Steps: 2 Epsilon 0.2988
Episode: 1520
SOLVED! Episode 1520 Steps: 3 Epsilon 0.2988
Episode: 1521
Episode: 1522
Episode: 1523
Episode: 1524
Episode: 1525
Episode: 1526
SOLV

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 43
defaultdict(<class 'int'>, {3: 21, 1: 14, 2: 8})
******************************
Episode: 1600
SOLVED! Episode 1600 Steps: 2 Epsilon 0.2988
Episode: 1601
SOLVED! Episode 1601 Steps: 18 Epsilon 0.2988
Episode: 1602
SOLVED! Episode 1602 Steps: 3 Epsilon 0.2988
Episode: 1603
SOLVED! Episode 1603 Steps: 3 Epsilon 0.2988
Episode: 1604
Episode: 1605
Episode: 1606
Episode: 1607
SOLVED! Episode 1607 Steps: 3 Epsilon 0.2988
Episode: 1608
Episode: 1609
SOLVED! Episode 1609 Steps: 3 Epsilon 0.2988
Episode: 1610
Episode: 1611
Episode: 1612
SOLVED! Episode 1612 Steps: 13 Epsilon 0.2988
Episode: 1613
Episode: 1614
SOLVED! Episode 1614 Steps: 3 Epsilon 0.2988
Episode: 1615
SOLVED! Episode 1615 Steps: 19 Epsilon 0.2988
Episode: 1616
Episode: 1617
Episode: 1618
SOLVED! Episode 1618 Steps: 3 Epsilon 0.2988
Episode: 1619
SOLVED! Episode 1619 Steps: 6 Epsilon 0.2988
Episode: 1620
Episode: 1621
Episode: 1622
SOLVED! Epis

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 42
defaultdict(<class 'int'>, {3: 20, 1: 14, 2: 8})
******************************
Episode: 1700
Episode: 1701
Episode: 1702
Episode: 1703
Episode: 1704
SOLVED! Episode 1704 Steps: 1 Epsilon 0.2988
Episode: 1705
Episode: 1706
Episode: 1707
SOLVED! Episode 1707 Steps: 1 Epsilon 0.2988
Episode: 1708
Episode: 1709
SOLVED! Episode 1709 Steps: 3 Epsilon 0.2988
Episode: 1710
Episode: 1711
SOLVED! Episode 1711 Steps: 2 Epsilon 0.2988
Episode: 1712
Episode: 1713
Episode: 1714
Episode: 1715
SOLVED! Episode 1715 Steps: 3 Epsilon 0.2988
Episode: 1716
SOLVED! Episode 1716 Steps: 3 Epsilon 0.2988
Episode: 1717
Episode: 1718
SOLVED! Episode 1718 Steps: 2 Epsilon 0.2988
Episode: 1719
Episode: 1720
SOLVED! Episode 1720 Steps: 2 Epsilon 0.2988
Episode: 1721
Episode: 1722
SOLVED! Episode 1722 Steps: 3 Epsilon 0.2988
Episode: 1723
SOLVED! Episode 1723 Steps: 1 Epsilon 0.2988
Episode: 1724
SOLVED! Episode 1724 Steps: 3 Ep

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 41
defaultdict(<class 'int'>, {3: 20, 1: 12, 2: 9})
******************************
Episode: 1800
SOLVED! Episode 1800 Steps: 1 Epsilon 0.2988
Episode: 1801
Episode: 1802
Episode: 1803
Episode: 1804
Episode: 1805
Episode: 1806
Episode: 1807
Episode: 1808
SOLVED! Episode 1808 Steps: 3 Epsilon 0.2988
Episode: 1809
Episode: 1810
Episode: 1811
Episode: 1812
SOLVED! Episode 1812 Steps: 2 Epsilon 0.2988
Episode: 1813
SOLVED! Episode 1813 Steps: 3 Epsilon 0.2988
Episode: 1814
Episode: 1815
Episode: 1816
SOLVED! Episode 1816 Steps: 7 Epsilon 0.2988
Episode: 1817
SOLVED! Episode 1817 Steps: 2 Epsilon 0.2988
Episode: 1818
Episode: 1819
SOLVED! Episode 1819 Steps: 2 Epsilon 0.2988
Episode: 1820
SOLVED! Episode 1820 Steps: 3 Epsilon 0.2988
Episode: 1821
Episode: 1822
Episode: 1823
Episode: 1824
Episode: 1825
Episode: 1826
Episode: 1827
Episode: 1828
Episode: 1829
Episode: 1830
SOLVED! Episode 1830 Steps: 3 Epsilon 

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 43
defaultdict(<class 'int'>, {3: 18, 1: 15, 2: 10})
******************************
Episode: 1900
Episode: 1901
Episode: 1902
SOLVED! Episode 1902 Steps: 1 Epsilon 0.2988
Episode: 1903
SOLVED! Episode 1903 Steps: 3 Epsilon 0.2988
Episode: 1904
Episode: 1905
Episode: 1906
SOLVED! Episode 1906 Steps: 3 Epsilon 0.2988
Episode: 1907
Episode: 1908
Episode: 1909
Episode: 1910
SOLVED! Episode 1910 Steps: 1 Epsilon 0.2988
Episode: 1911
Episode: 1912
Episode: 1913
Episode: 1914
Episode: 1915
SOLVED! Episode 1915 Steps: 1 Epsilon 0.2988
Episode: 1916
SOLVED! Episode 1916 Steps: 2 Epsilon 0.2988
Episode: 1917
Episode: 1918
Episode: 1919
Episode: 1920
Episode: 1921
Episode: 1922
Episode: 1923
Episode: 1924
Episode: 1925
Episode: 1926
SOLVED! Episode 1926 Steps: 3 Epsilon 0.2988
Episode: 1927
Episode: 1928
SOLVED! Episode 1928 Steps: 1 Epsilon 0.2988
Episode: 1929
Episode: 1930
Episode: 1931
Episode: 1932
Episode: 

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 45
defaultdict(<class 'int'>, {3: 20, 1: 16, 2: 9})
******************************
Episode: 2000
Episode: 2001
SOLVED! Episode 2001 Steps: 3 Epsilon 0.2988
Episode: 2002
Episode: 2003
Episode: 2004
SOLVED! Episode 2004 Steps: 1 Epsilon 0.2988
Episode: 2005
Episode: 2006
Episode: 2007
SOLVED! Episode 2007 Steps: 3 Epsilon 0.2988
Episode: 2008
Episode: 2009
Episode: 2010
SOLVED! Episode 2010 Steps: 3 Epsilon 0.2988
Episode: 2011
Episode: 2012
Episode: 2013
Episode: 2014
SOLVED! Episode 2014 Steps: 3 Epsilon 0.2988
Episode: 2015
Episode: 2016
Episode: 2017
SOLVED! Episode 2017 Steps: 1 Epsilon 0.2988
Episode: 2018
Episode: 2019
Episode: 2020
Episode: 2021
SOLVED! Episode 2021 Steps: 3 Epsilon 0.2988
Episode: 2022
SOLVED! Episode 2022 Steps: 3 Epsilon 0.2988
Episode: 2023
Episode: 2024
Episode: 2025
Episode: 2026
Episode: 2027
SOLVED! Episode 2027 Steps: 10 Epsilon 0.2988
Episode: 2028
Episode: 2029
SOLVED

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 46
defaultdict(<class 'int'>, {3: 21, 1: 16, 2: 9})
******************************
Episode: 2100
Episode: 2101
Episode: 2102
SOLVED! Episode 2102 Steps: 3 Epsilon 0.2988
Episode: 2103
SOLVED! Episode 2103 Steps: 1 Epsilon 0.2988
Episode: 2104
SOLVED! Episode 2104 Steps: 3 Epsilon 0.2988
Episode: 2105
SOLVED! Episode 2105 Steps: 3 Epsilon 0.2988
Episode: 2106
Episode: 2107
Episode: 2108
Episode: 2109
SOLVED! Episode 2109 Steps: 1 Epsilon 0.2988
Episode: 2110
Episode: 2111
Episode: 2112
SOLVED! Episode 2112 Steps: 2 Epsilon 0.2988
Episode: 2113
SOLVED! Episode 2113 Steps: 2 Epsilon 0.2988
Episode: 2114
Episode: 2115
Episode: 2116
Episode: 2117
Episode: 2118
Episode: 2119
Episode: 2120
Episode: 2121
Episode: 2122
SOLVED! Episode 2122 Steps: 3 Epsilon 0.2988
Episode: 2123
Episode: 2124
Episode: 2125
Episode: 2126
SOLVED! Episode 2126 Steps: 2 Epsilon 0.2988
Episode: 2127
Episode: 2128
Episode: 2129
Episode

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 41
defaultdict(<class 'int'>, {3: 19, 1: 14, 2: 8})
******************************
Episode: 2200
SOLVED! Episode 2200 Steps: 1 Epsilon 0.2988
Episode: 2201
Episode: 2202
Episode: 2203
Episode: 2204
Episode: 2205
Episode: 2206
SOLVED! Episode 2206 Steps: 3 Epsilon 0.2988
Episode: 2207
SOLVED! Episode 2207 Steps: 3 Epsilon 0.2988
Episode: 2208
Episode: 2209
Episode: 2210
SOLVED! Episode 2210 Steps: 3 Epsilon 0.2988
Episode: 2211
Episode: 2212
Episode: 2213
Episode: 2214
Episode: 2215
Episode: 2216
Episode: 2217
Episode: 2218
SOLVED! Episode 2218 Steps: 2 Epsilon 0.2988
Episode: 2219
Episode: 2220
Episode: 2221
Episode: 2222
Episode: 2223
Episode: 2224
SOLVED! Episode 2224 Steps: 1 Epsilon 0.2988
Episode: 2225
Episode: 2226
SOLVED! Episode 2226 Steps: 25 Epsilon 0.2988
Episode: 2227
SOLVED! Episode 2227 Steps: 1 Epsilon 0.2988
Episode: 2228
Episode: 2229
SOLVED! Episode 2229 Steps: 1 Epsilon 0.2988
Episod

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 46
defaultdict(<class 'int'>, {3: 21, 1: 16, 2: 9})
******************************
Episode: 2300
SOLVED! Episode 2300 Steps: 1 Epsilon 0.2988
Episode: 2301
Episode: 2302
Episode: 2303
Episode: 2304
Episode: 2305
SOLVED! Episode 2305 Steps: 2 Epsilon 0.2988
Episode: 2306
Episode: 2307
SOLVED! Episode 2307 Steps: 3 Epsilon 0.2988
Episode: 2308
Episode: 2309
Episode: 2310
Episode: 2311
SOLVED! Episode 2311 Steps: 1 Epsilon 0.2988
Episode: 2312
Episode: 2313
Episode: 2314
Episode: 2315
SOLVED! Episode 2315 Steps: 2 Epsilon 0.2988
Episode: 2316
SOLVED! Episode 2316 Steps: 1 Epsilon 0.2988
Episode: 2317
Episode: 2318
Episode: 2319
Episode: 2320
SOLVED! Episode 2320 Steps: 3 Epsilon 0.2988
Episode: 2321
SOLVED! Episode 2321 Steps: 3 Epsilon 0.2988
Episode: 2322
Episode: 2323
SOLVED! Episode 2323 Steps: 1 Epsilon 0.2988
Episode: 2324
SOLVED! Episode 2324 Steps: 3 Epsilon 0.2988
Episode: 2325
Episode: 2326
Epis

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 44
defaultdict(<class 'int'>, {3: 21, 1: 16, 2: 7})
******************************
Episode: 2400
SOLVED! Episode 2400 Steps: 3 Epsilon 0.2988
Episode: 2401
Episode: 2402
Episode: 2403
Episode: 2404
Episode: 2405
Episode: 2406
Episode: 2407
Episode: 2408
Episode: 2409
Episode: 2410
SOLVED! Episode 2410 Steps: 1 Epsilon 0.2988
Episode: 2411
SOLVED! Episode 2411 Steps: 1 Epsilon 0.2988
Episode: 2412
SOLVED! Episode 2412 Steps: 1 Epsilon 0.2988
Episode: 2413
Episode: 2414
SOLVED! Episode 2414 Steps: 3 Epsilon 0.2988
Episode: 2415
SOLVED! Episode 2415 Steps: 3 Epsilon 0.2988
Episode: 2416
Episode: 2417
SOLVED! Episode 2417 Steps: 2 Epsilon 0.2988
Episode: 2418
SOLVED! Episode 2418 Steps: 2 Epsilon 0.2988
Episode: 2419
Episode: 2420
Episode: 2421
SOLVED! Episode 2421 Steps: 7 Epsilon 0.2988
Episode: 2422
SOLVED! Episode 2422 Steps: 3 Epsilon 0.2988
Episode: 2423
SOLVED! Episode 2423 Steps: 1 Epsilon 0.2988
E

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 44
defaultdict(<class 'int'>, {3: 22, 1: 15, 2: 7})
******************************
Episode: 2500
Episode: 2501
SOLVED! Episode 2501 Steps: 3 Epsilon 0.2988
Episode: 2502
Episode: 2503
Episode: 2504
Episode: 2505
SOLVED! Episode 2505 Steps: 3 Epsilon 0.2988
Episode: 2506
SOLVED! Episode 2506 Steps: 3 Epsilon 0.2988
Episode: 2507
Episode: 2508
SOLVED! Episode 2508 Steps: 3 Epsilon 0.2988
Episode: 2509
Episode: 2510
Episode: 2511
Episode: 2512
SOLVED! Episode 2512 Steps: 3 Epsilon 0.2988
Episode: 2513
Episode: 2514
Episode: 2515
Episode: 2516
SOLVED! Episode 2516 Steps: 12 Epsilon 0.2988
Episode: 2517
Episode: 2518
Episode: 2519
SOLVED! Episode 2519 Steps: 1 Epsilon 0.2988
Episode: 2520
SOLVED! Episode 2520 Steps: 3 Epsilon 0.2988
Episode: 2521
Episode: 2522
Episode: 2523
Episode: 2524
Episode: 2525
Episode: 2526
Episode: 2527
Episode: 2528
Episode: 2529
SOLVED! Episode 2529 Steps: 1 Epsilon 0.2988
Episod

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 43
defaultdict(<class 'int'>, {3: 21, 1: 15, 2: 7})
******************************
Episode: 2600
Episode: 2601
Episode: 2602
Episode: 2603
Episode: 2604
Episode: 2605
Episode: 2606
SOLVED! Episode 2606 Steps: 3 Epsilon 0.2988
Episode: 2607
SOLVED! Episode 2607 Steps: 1 Epsilon 0.2988
Episode: 2608
SOLVED! Episode 2608 Steps: 3 Epsilon 0.2988
Episode: 2609
SOLVED! Episode 2609 Steps: 1 Epsilon 0.2988
Episode: 2610
SOLVED! Episode 2610 Steps: 3 Epsilon 0.2988
Episode: 2611
Episode: 2612
Episode: 2613
Episode: 2614
Episode: 2615
Episode: 2616
Episode: 2617
SOLVED! Episode 2617 Steps: 3 Epsilon 0.2988
Episode: 2618
SOLVED! Episode 2618 Steps: 18 Epsilon 0.2988
Episode: 2619
SOLVED! Episode 2619 Steps: 3 Epsilon 0.2988
Episode: 2620
Episode: 2621
Episode: 2622
SOLVED! Episode 2622 Steps: 8 Epsilon 0.2988
Episode: 2623
Episode: 2624
SOLVED! Episode 2624 Steps: 3 Epsilon 0.2988
Episode: 2625
SOLVED! Episode 2

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 45
defaultdict(<class 'int'>, {3: 20, 1: 15, 2: 10})
******************************
Episode: 2700
Episode: 2701
SOLVED! Episode 2701 Steps: 1 Epsilon 0.2988
Episode: 2702
Episode: 2703
Episode: 2704
SOLVED! Episode 2704 Steps: 2 Epsilon 0.2988
Episode: 2705
SOLVED! Episode 2705 Steps: 3 Epsilon 0.2988
Episode: 2706
Episode: 2707
Episode: 2708
Episode: 2709
Episode: 2710
Episode: 2711
Episode: 2712
SOLVED! Episode 2712 Steps: 1 Epsilon 0.2988
Episode: 2713
Episode: 2714
Episode: 2715
Episode: 2716
Episode: 2717
Episode: 2718
Episode: 2719
SOLVED! Episode 2719 Steps: 1 Epsilon 0.2988
Episode: 2720
Episode: 2721
SOLVED! Episode 2721 Steps: 2 Epsilon 0.2988
Episode: 2722
Episode: 2723
Episode: 2724
Episode: 2725
SOLVED! Episode 2725 Steps: 3 Epsilon 0.2988
Episode: 2726
Episode: 2727
SOLVED! Episode 2727 Steps: 1 Epsilon 0.2988
Episode: 2728
SOLVED! Episode 2728 Steps: 1 Epsilon 0.2988
Episode: 2729
Episod

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 46
defaultdict(<class 'int'>, {3: 21, 1: 16, 2: 9})
******************************
Episode: 2800
Episode: 2801
Episode: 2802
Episode: 2803
SOLVED! Episode 2803 Steps: 2 Epsilon 0.2988
Episode: 2804
SOLVED! Episode 2804 Steps: 1 Epsilon 0.2988
Episode: 2805
Episode: 2806
SOLVED! Episode 2806 Steps: 2 Epsilon 0.2988
Episode: 2807
SOLVED! Episode 2807 Steps: 1 Epsilon 0.2988
Episode: 2808
Episode: 2809
Episode: 2810
Episode: 2811
Episode: 2812
SOLVED! Episode 2812 Steps: 3 Epsilon 0.2988
Episode: 2813
SOLVED! Episode 2813 Steps: 1 Epsilon 0.2988
Episode: 2814
Episode: 2815
Episode: 2816
Episode: 2817
SOLVED! Episode 2817 Steps: 3 Epsilon 0.2988
Episode: 2818
Episode: 2819
Episode: 2820
Episode: 2821
Episode: 2822
Episode: 2823
Episode: 2824
Episode: 2825
SOLVED! Episode 2825 Steps: 1 Epsilon 0.2988
Episode: 2826
Episode: 2827
Episode: 2828
Episode: 2829
Episode: 2830
Episode: 2831
Episode: 2832
Episode: 2

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 45
defaultdict(<class 'int'>, {3: 21, 1: 15, 2: 9})
******************************
Episode: 2900
Episode: 2901
Episode: 2902
SOLVED! Episode 2902 Steps: 3 Epsilon 0.2988
Episode: 2903
SOLVED! Episode 2903 Steps: 3 Epsilon 0.2988
Episode: 2904
Episode: 2905
Episode: 2906
Episode: 2907
SOLVED! Episode 2907 Steps: 2 Epsilon 0.2988
Episode: 2908
Episode: 2909
SOLVED! Episode 2909 Steps: 3 Epsilon 0.2988
Episode: 2910
Episode: 2911
Episode: 2912
Episode: 2913
Episode: 2914
SOLVED! Episode 2914 Steps: 1 Epsilon 0.2988
Episode: 2915
Episode: 2916
Episode: 2917
Episode: 2918
Episode: 2919
Episode: 2920
SOLVED! Episode 2920 Steps: 3 Epsilon 0.2988
Episode: 2921
Episode: 2922
SOLVED! Episode 2922 Steps: 3 Epsilon 0.2988
Episode: 2923
SOLVED! Episode 2923 Steps: 2 Epsilon 0.2988
Episode: 2924
Episode: 2925
SOLVED! Episode 2925 Steps: 3 Epsilon 0.2988
Episode: 2926
Episode: 2927
Episode: 2928
SOLVED! Episode 2928 

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 40
defaultdict(<class 'int'>, {3: 21, 1: 12, 2: 7})
******************************
Episode: 3000
SOLVED! Episode 3000 Steps: 3 Epsilon 0.2988
Episode: 3001
SOLVED! Episode 3001 Steps: 3 Epsilon 0.2988
Episode: 3002
Episode: 3003
Episode: 3004
Episode: 3005
Episode: 3006
Episode: 3007
Episode: 3008
SOLVED! Episode 3008 Steps: 3 Epsilon 0.2988
Episode: 3009
SOLVED! Episode 3009 Steps: 1 Epsilon 0.2988
Episode: 3010
Episode: 3011
Episode: 3012
SOLVED! Episode 3012 Steps: 3 Epsilon 0.2988
Episode: 3013
Episode: 3014
Episode: 3015
Episode: 3016
SOLVED! Episode 3016 Steps: 3 Epsilon 0.2988
Episode: 3017
SOLVED! Episode 3017 Steps: 2 Epsilon 0.2988
Episode: 3018
Episode: 3019
Episode: 3020
SOLVED! Episode 3020 Steps: 25 Epsilon 0.2988
Episode: 3021
Episode: 3022
Episode: 3023
Episode: 3024
Episode: 3025
Episode: 3026
Episode: 3027
Episode: 3028
SOLVED! Episode 3028 Steps: 1 Epsilon 0.2988
Episode: 3029
SOLVED

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 43
defaultdict(<class 'int'>, {3: 19, 1: 15, 2: 9})
******************************
Episode: 3100
SOLVED! Episode 3100 Steps: 1 Epsilon 0.2988
Episode: 3101
Episode: 3102
Episode: 3103
Episode: 3104
SOLVED! Episode 3104 Steps: 9 Epsilon 0.2988
Episode: 3105
SOLVED! Episode 3105 Steps: 2 Epsilon 0.2988
Episode: 3106
SOLVED! Episode 3106 Steps: 2 Epsilon 0.2988
Episode: 3107
SOLVED! Episode 3107 Steps: 3 Epsilon 0.2988
Episode: 3108
Episode: 3109
Episode: 3110
Episode: 3111
Episode: 3112
SOLVED! Episode 3112 Steps: 1 Epsilon 0.2988
Episode: 3113
Episode: 3114
SOLVED! Episode 3114 Steps: 2 Epsilon 0.2988
Episode: 3115
Episode: 3116
Episode: 3117
SOLVED! Episode 3117 Steps: 2 Epsilon 0.2988
Episode: 3118
Episode: 3119
Episode: 3120
SOLVED! Episode 3120 Steps: 3 Epsilon 0.2988
Episode: 3121
SOLVED! Episode 3121 Steps: 1 Epsilon 0.2988
Episode: 3122
Episode: 3123
Episode: 3124
Episode: 3125
SOLVED! Episode 31

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 42
defaultdict(<class 'int'>, {3: 20, 1: 15, 2: 7})
******************************
Episode: 3200
SOLVED! Episode 3200 Steps: 23 Epsilon 0.2988
Episode: 3201
Episode: 3202
SOLVED! Episode 3202 Steps: 1 Epsilon 0.2988
Episode: 3203
Episode: 3204
SOLVED! Episode 3204 Steps: 1 Epsilon 0.2988
Episode: 3205
Episode: 3206
Episode: 3207
SOLVED! Episode 3207 Steps: 3 Epsilon 0.2988
Episode: 3208
Episode: 3209
Episode: 3210
SOLVED! Episode 3210 Steps: 2 Epsilon 0.2988
Episode: 3211
Episode: 3212
Episode: 3213
SOLVED! Episode 3213 Steps: 3 Epsilon 0.2988
Episode: 3214
Episode: 3215
Episode: 3216
Episode: 3217
Episode: 3218
SOLVED! Episode 3218 Steps: 9 Epsilon 0.2988
Episode: 3219
SOLVED! Episode 3219 Steps: 1 Epsilon 0.2988
Episode: 3220
SOLVED! Episode 3220 Steps: 1 Epsilon 0.2988
Episode: 3221
Episode: 3222
Episode: 3223
SOLVED! Episode 3223 Steps: 1 Epsilon 0.2988
Episode: 3224
SOLVED! Episode 3224 Steps: 3 E

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 36
defaultdict(<class 'int'>, {3: 18, 1: 11, 2: 7})
******************************
Episode: 3300
Episode: 3301
Episode: 3302
Episode: 3303
Episode: 3304
SOLVED! Episode 3304 Steps: 3 Epsilon 0.2988
Episode: 3305
SOLVED! Episode 3305 Steps: 3 Epsilon 0.2988
Episode: 3306
SOLVED! Episode 3306 Steps: 1 Epsilon 0.2988
Episode: 3307
SOLVED! Episode 3307 Steps: 13 Epsilon 0.2988
Episode: 3308
Episode: 3309
Episode: 3310
Episode: 3311
SOLVED! Episode 3311 Steps: 1 Epsilon 0.2988
Episode: 3312
Episode: 3313
Episode: 3314
Episode: 3315
Episode: 3316
Episode: 3317
Episode: 3318
Episode: 3319
SOLVED! Episode 3319 Steps: 2 Epsilon 0.2988
Episode: 3320
Episode: 3321
Episode: 3322
Episode: 3323
Episode: 3324
Episode: 3325
Episode: 3326
SOLVED! Episode 3326 Steps: 15 Epsilon 0.2988
Episode: 3327
Episode: 3328
SOLVED! Episode 3328 Steps: 1 Epsilon 0.2988
Episode: 3329
Episode: 3330
Episode: 3331
SOLVED! Episode 3331 S

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 44
defaultdict(<class 'int'>, {3: 19, 1: 17, 2: 8})
******************************
Episode: 3400
Episode: 3401
Episode: 3402
SOLVED! Episode 3402 Steps: 1 Epsilon 0.2988
Episode: 3403
SOLVED! Episode 3403 Steps: 3 Epsilon 0.2988
Episode: 3404
Episode: 3405
SOLVED! Episode 3405 Steps: 1 Epsilon 0.2988
Episode: 3406
SOLVED! Episode 3406 Steps: 2 Epsilon 0.2988
Episode: 3407
Episode: 3408
SOLVED! Episode 3408 Steps: 3 Epsilon 0.2988
Episode: 3409
Episode: 3410
Episode: 3411
Episode: 3412
Episode: 3413
SOLVED! Episode 3413 Steps: 10 Epsilon 0.2988
Episode: 3414
SOLVED! Episode 3414 Steps: 1 Epsilon 0.2988
Episode: 3415
SOLVED! Episode 3415 Steps: 2 Epsilon 0.2988
Episode: 3416
Episode: 3417
Episode: 3418
Episode: 3419
Episode: 3420
Episode: 3421
Episode: 3422
Episode: 3423
Episode: 3424
Episode: 3425
SOLVED! Episode 3425 Steps: 2 Epsilon 0.2988
Episode: 3426
SOLVED! Episode 3426 Steps: 3 Epsilon 0.2988
Epi

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 46
defaultdict(<class 'int'>, {3: 20, 1: 17, 2: 9})
******************************
Episode: 3500
Episode: 3501
Episode: 3502
Episode: 3503
SOLVED! Episode 3503 Steps: 3 Epsilon 0.2988
Episode: 3504
Episode: 3505
SOLVED! Episode 3505 Steps: 3 Epsilon 0.2988
Episode: 3506
SOLVED! Episode 3506 Steps: 3 Epsilon 0.2988
Episode: 3507
SOLVED! Episode 3507 Steps: 2 Epsilon 0.2988
Episode: 3508
Episode: 3509
Episode: 3510
Episode: 3511
SOLVED! Episode 3511 Steps: 3 Epsilon 0.2988
Episode: 3512
Episode: 3513
SOLVED! Episode 3513 Steps: 3 Epsilon 0.2988
Episode: 3514
Episode: 3515
Episode: 3516
SOLVED! Episode 3516 Steps: 2 Epsilon 0.2988
Episode: 3517
Episode: 3518
Episode: 3519
SOLVED! Episode 3519 Steps: 3 Epsilon 0.2988
Episode: 3520
SOLVED! Episode 3520 Steps: 21 Epsilon 0.2988
Episode: 3521
Episode: 3522
SOLVED! Episode 3522 Steps: 3 Epsilon 0.2988
Episode: 3523
SOLVED! Episode 3523 Steps: 3 Epsilon 0.2988


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 44
defaultdict(<class 'int'>, {3: 22, 1: 15, 2: 7})
******************************
Episode: 3600
Episode: 3601
Episode: 3602
SOLVED! Episode 3602 Steps: 1 Epsilon 0.2988
Episode: 3603
Episode: 3604
Episode: 3605
Episode: 3606
SOLVED! Episode 3606 Steps: 2 Epsilon 0.2988
Episode: 3607
SOLVED! Episode 3607 Steps: 2 Epsilon 0.2988
Episode: 3608
Episode: 3609
Episode: 3610
SOLVED! Episode 3610 Steps: 3 Epsilon 0.2988
Episode: 3611
Episode: 3612
SOLVED! Episode 3612 Steps: 3 Epsilon 0.2988
Episode: 3613
SOLVED! Episode 3613 Steps: 3 Epsilon 0.2988
Episode: 3614
SOLVED! Episode 3614 Steps: 1 Epsilon 0.2988
Episode: 3615
Episode: 3616
Episode: 3617
SOLVED! Episode 3617 Steps: 2 Epsilon 0.2988
Episode: 3618
SOLVED! Episode 3618 Steps: 3 Epsilon 0.2988
Episode: 3619
Episode: 3620
Episode: 3621
Episode: 3622
Episode: 3623
Episode: 3624
Episode: 3625
SOLVED! Episode 3625 Steps: 15 Epsilon 0.2988
Episode: 3626
SOL