# Final Project - Reinforcements Learning 
Hello dear students,<br> this is the template notebook. Please click on the "File" tab and then on "Save a copy into drive".

---
<br>

### Name and ID:
Student 1: Avraham Raviv, 204355390
<br>
Student 2: Yevgeni Berkovitch, 317079234
<br><br>
<img src="https://play-lh.googleusercontent.com/e_oKlKPISbgdzut1H9opevS7-LTB8-8lsmpCdMkhlnqFenZhpjxbLmx7l158-xQQCIY">

### https://github.com/mpSchrader/gym-sokoban

# Installs

In [1]:
%%capture
!sudo apt-get update
!sudo apt-get install -y xvfb ffmpeg freeglut3-dev
!pip install 'imageio==2.4.0'
!pip install gym
!pip install pygame
!apt-get install python-opengl -y
!apt install xvfb -y
!pip install pyvirtualdisplay
!pip install piglet
!pip install gym
!apt-get install python-opengl -y
!apt install xvfb -y
!pip install gym_sokoban

!imageio_download_bin ffmpeg

# Imports

In [2]:
import random
import time
from tqdm.notebook import tqdm

import numpy as np
import matplotlib.pyplot as plt

import base64
import imageio
from pyvirtualdisplay import Display
from IPython.display import HTML

import gym
from gym import error, spaces, utils
from soko_pap import *

from collections import deque

from keras.models import Sequential
from keras.layers import Conv2D, Flatten, Dense

In [3]:
%matplotlib inline

In [4]:
imageio.plugins.ffmpeg.download()

In [5]:
from gym import logger as gymlogger
gymlogger.set_level(40) # error only

# Display utils
The cell below contains the video display configuration. No need to make changes here.

In [6]:
def embed_mp4(filename):
    """Embeds an mp4 file in the notebook."""
    video = open(filename,'rb').read()
    b64 = base64.b64encode(video)
    tag = '''
    <video width="640" height="480" controls>
    <source src="data:video/mp4;base64,{0}" type="video/mp4">
    Your browser does not support the video tag.
    </video>'''.format(b64.decode())

    return HTML(tag)

# Solution

In [7]:
class SOK_Agent:
    def __init__(self):
        # Construct DQN models
        self.state_size = (5,5,1) 
        self.action_size = 8
        self.model = self._build_model()
        self.target_model = self._build_model()
        self.target_model.set_weights(self.model.get_weights())
        self.batch_size = 8
        
        # Replay buffers
        self.replay_buffer = deque(maxlen=10000)
        self.prioritized_replay_buffer = deque(maxlen=2000)
        self.prioritized_replay_batch = 20        
        
        # Hyperparameters
        self.gamma = 0.99
        self.epsilon = 1.0   
        self.epsilon_min = 0.5
        self.epsilon_decay = 0.99995
        self.epsilon_update_rate = 10
        self.replay_rate = 10
        self.update_beta = 0.99

        self.verbosity = 100
        
        self.action_rotation_map = {
            0: 2,
            1: 3,
            2: 1,
            3: 0,
            4: 6,
            5: 7,
            6: 5,
            7: 4
        }

    def _build_model(self):
        model = Sequential()
        #model.add(Conv2D(8, (3,3), input_shape=self.state_size, activation='relu'))
        #model.add(Conv2D(16, (2,2), activation='relu'))
        model.add(Conv2D(4, (3,3), input_shape=self.state_size, activation='relu'))
        model.add(Flatten())
        model.add(Dense(16, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='huber', optimizer="adam")        
        return model

    def remember(self, state, action, reward, next_state, done):
        self.replay_buffer.append([state, action, reward, next_state, done])    
        
    def copy_to_prioritized_buffer(self, n):
        for i in range(n):
            self.prioritized_replay_buffer.append(self.replay_buffer[-1-i])  

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        
        act_values = self.model.predict(state, verbose=0)
        return np.argmax(act_values[0]) 
       
    def replay(self):
        #minibatch = random.sample(self.replay_buffer, self.batch_size//2) 
        #minibatch.extend(random.sample(self.prioritized_replay_buffer, self.batch_size//2))
        minibatch = random.sample(self.prioritized_replay_buffer, self.batch_size)
        
        states = np.zeros((self.batch_size*4, self.state_size[0], self.state_size[1]))
        actions = np.zeros(self.batch_size*4, dtype=int)
        rewards = np.zeros(self.batch_size*4)
        next_states = np.zeros((self.batch_size*4, self.state_size[0], self.state_size[1]))
        statuses = np.zeros(self.batch_size*4)
        targets = np.zeros((self.batch_size*4, self.action_size)) 
        
        for i, (state, action, reward, next_state, done) in enumerate(minibatch): 
            for rot in range(4):  
                ind = i*4+rot
                if rot != 0:
                    state = np.rot90(state, axes=(1,2))
                    next_state = np.rot90(next_state, axes=(1,2))
                    action = self.action_rotation_map.get(action)

                states[ind] = state.copy()
                actions[ind] = action
                rewards[ind] = reward
                next_states[ind] = next_state.copy()
                statuses[ind] = 1 if done else 0        

        targets = self.model.predict(states) 
        max_actions = np.argmax(self.model.predict(next_states), axis=1)
        next_rewards = self.target_model.predict(next_states)
        
        ind = 0
        for action, reward, next_reward, max_action, done in zip(actions, rewards, next_rewards, max_actions, statuses):  
            if not done:
                reward += self.gamma * next_reward[max_action]
            targets[ind][action] = reward
            ind += 1
        
        self.model.fit(states, targets, epochs=1, verbose=0)
        
    def update_epsilon(self):
        if self.epsilon > self.epsilon_min:
            self.epsilon = self.epsilon * self.epsilon_decay
        
    def update_target_model(self):
        model_w = self.model.get_weights()
        target_model_w = self.target_model.get_weights()
        updated_target_model_w = []
        for i in range(len(model_w)):
            updated_target_model_w.append(self.update_beta*target_model_w[i] + (1-self.update_beta)*model_w[i])
        self.target_model.set_weights(updated_target_model_w)    
            
    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)

In [8]:
def process_frame(frame):
    f = frame[16:96, 16:96, 0]   
    f = f.reshape(5, 16, 5, 16).max(axis=(1, 3))
    #f = f.flatten()
    f = f / 255
    return np.expand_dims(f, axis=0)

In [9]:
def get_distances(room_state):
    for i in range(room_state.shape[0]):
        for j in range(room_state.shape[1]):
            if room_state[i][j] == 2:
                target = (i, j)

    distances = np.zeros(shape=room_state.shape)
    visited_cells = set()
    cell_queue = deque()

    visited_cells.add(target)
    cell_queue.appendleft(target)

    while len(cell_queue) != 0:
        cell = cell_queue.pop()
        distance = distances[cell[0]][cell[1]]
        for x,y in ((1,0), (-1,-0), (0,1), (0,-1)):
            next_cell_x, next_cell_y = cell[0]+x, cell[1]+y
            if room_state[next_cell_x][next_cell_y] != 0 and not (next_cell_x, next_cell_y) in visited_cells:
                distances[next_cell_x][next_cell_y] = distance + 1
                visited_cells.add((next_cell_x, next_cell_y))
                cell_queue.appendleft((next_cell_x, next_cell_y))
                
    return distances   

def calc_distances(room_state, distances):
    box = None
    mover = None
    for i in range(room_state.shape[0]):
        for j in range(room_state.shape[1]):            
            if room_state[i][j] == 4:
                box = (i,j)
            
            if room_state[i][j] == 5:
                mover = (i,j)
    
    return mover, box, distances[box[0]][box[1]]   

def box2target_change_reward(room_state, next_room_state, distances):
    mover, box, t2b = calc_distances(room_state, distances)
    n_mover, n_box, n_t2b = calc_distances(next_room_state, distances)
    
    change_reward = 0.0
    if n_t2b < t2b:
        change_reward += 1.0
    elif n_t2b > t2b:
        change_reward -= 1.0
        
    m2b = np.sqrt((mover[0]-box[0])**2 + (mover[1]-box[1])**2)
    n_m2b = np.sqrt((n_mover[0]-n_box[0])**2 + (n_mover[1]-n_box[1])**2)
    
    if n_m2b < m2b and m2b >= 2:
        change_reward += 0.25
    elif n_m2b > m2b and n_m2b >= 2:
        change_reward -= 0.25
        
    return change_reward   

## Training

In [10]:
def test_agent(agent):
    current_epsilon = agent.epsilon
    agent.epsilon = 0.0
    
    tests = [20, 37, 12, 35, 0, 21, 28, 59, 9, 19, 13, 18, 5, 32, 26, 38, 2, 49, 65, 24]
    test_steps = []
    
    for t in tqdm(tests):
        steps = 0
        random.seed(t)
        sok = PushAndPullSokobanEnv(dim_room=(7, 7), num_boxes=1)
        sok.set_maxsteps(500)        
        state = sok.get_image('rgb_array')
        done = False
        while not done:
            steps += 1
            action = agent.act(process_frame(state))
        
            if action < 4:
                action += 1
            else:
                action += 5
        
            state, reward, done, info = sok.step(action)
        test_steps.append(steps)
        steps = 0
        
    agent.epsilon = current_epsilon
    
    return test_steps        

In [11]:
max_episodes = 10000
max_steps = 500

def init_sok(random_seed):
    random.seed(random_seed)
    sok = PushAndPullSokobanEnv(dim_room=(7, 7), num_boxes=1)
    sok.set_maxsteps(max_steps)
    return sok

In [12]:
agent = SOK_Agent()
successes_before_train = 20
successful_episodes = 0
continuous_successes_goal = 100
continuous_successes = 0

tests_performance = []
steps_per_episode = []

for e in range(max_episodes):
    if e % 200 == 0:
        print("Testing Agent")
        test_steps = test_agent(agent)
        tests_performance.append(test_steps)
        print("Test Results: %s" % test_steps)
    
    if continuous_successes >= continuous_successes_goal:
        print("Agent training finished!")
        break
    
    print("Episode: %d" % (e+1))
    
    sok = init_sok(e+100)
    state = process_frame(sok.get_image('rgb_array'))    
    room_state = sok.room_state.copy() 
    distances = get_distances(room_state)
    for step in range(sok.max_steps):        
        action = agent.act(state)
        if action < 4:
            next_state, reward, done, _ = sok.step(action+1) 
        else:
            next_state, reward, done, _ = sok.step(action+5)         
        
        next_state = process_frame(next_state)        
        next_room_state = sok.room_state
 
        if not done:
            reward += box2target_change_reward(room_state, next_room_state, distances)
            
        agent.remember(state, action, reward, next_state, done)
        
        state = next_state.copy() 
        room_state = next_room_state.copy()
        
        if successful_episodes >= successes_before_train:
            if (step+1) % agent.replay_rate == 0:
                agent.replay() 
                agent.update_target_model()
                agent.update_epsilon()                
        
        if done:            
            if 3 in sok.room_state:
                print("SOLVED! Episode %d Steps: %d Epsilon %.4f" % (e+1, step+1, agent.epsilon))
                successful_episodes += 1
                continuous_successes += 1                 
                agent.copy_to_prioritized_buffer(min(agent.prioritized_replay_batch, step+1))
            else:                
                continuous_successes = 0
                
            steps_per_episode.append(step+1)
            agent.save("saved_models/exp1_episode%d.h5" % (e+1))
            
            break

Testing Agent


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Test Results: [500, 1, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500]
Episode: 1
SOLVED! Episode 1 Steps: 258 Epsilon 1.0000
Episode: 2
SOLVED! Episode 2 Steps: 106 Epsilon 1.0000
Episode: 3
SOLVED! Episode 3 Steps: 409 Epsilon 1.0000
Episode: 4
Episode: 5
Episode: 6
SOLVED! Episode 6 Steps: 472 Epsilon 1.0000
Episode: 7
Episode: 8
SOLVED! Episode 8 Steps: 46 Epsilon 1.0000
Episode: 9
SOLVED! Episode 9 Steps: 26 Epsilon 1.0000
Episode: 10
Episode: 11
SOLVED! Episode 11 Steps: 9 Epsilon 1.0000
Episode: 12
SOLVED! Episode 12 Steps: 261 Epsilon 1.0000
Episode: 13
Episode: 14
Episode: 15
SOLVED! Episode 15 Steps: 1 Epsilon 1.0000
Episode: 16
Episode: 17
Episode: 18
Episode: 19
SOLVED! Episode 19 Steps: 89 Epsilon 1.0000
Episode: 20
SOLVED! Episode 20 Steps: 10 Epsilon 1.0000
Episode: 21
SOLVED! Episode 21 Steps: 470 Epsilon 1.0000
Episode: 22
SOLVED! Episode 22 Steps: 4 Epsilon 1.0000
Episode: 23
Episode: 24
Episode: 25
Episode: 26
SOLVED! Episod

HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Test Results: [500, 1, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500]
Episode: 201
Episode: 202
Episode: 203
Episode: 204
SOLVED! Episode 204 Steps: 233 Epsilon 0.7778
Episode: 205
SOLVED! Episode 205 Steps: 63 Epsilon 0.7776
Episode: 206
Episode: 207
Episode: 208
SOLVED! Episode 208 Steps: 9 Epsilon 0.7737
Episode: 209
Episode: 210
SOLVED! Episode 210 Steps: 53 Epsilon 0.7716
Episode: 211
Episode: 212
SOLVED! Episode 212 Steps: 8 Epsilon 0.7697
Episode: 213
SOLVED! Episode 213 Steps: 55 Epsilon 0.7695
Episode: 214
SOLVED! Episode 214 Steps: 218 Epsilon 0.7687
Episode: 215
SOLVED! Episode 215 Steps: 42 Epsilon 0.7685
Episode: 216
Episode: 217
SOLVED! Episode 217 Steps: 500 Epsilon 0.7647
Episode: 218
SOLVED! Episode 218 Steps: 25 Epsilon 0.7646
Episode: 219
SOLVED! Episode 219 Steps: 98 Epsilon 0.7643
Episode: 220
SOLVED! Episode 220 Steps: 12 Epsilon 0.7642
Episode: 221
Episode: 222
SOLVED! Episode 222 Steps: 37 Epsilon 0.7622
Episode: 223
E

HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Test Results: [500, 1, 500, 500, 500, 3, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500]
Episode: 401
SOLVED! Episode 401 Steps: 4 Epsilon 0.5713
Episode: 402
Episode: 403
Episode: 404
SOLVED! Episode 404 Steps: 13 Epsilon 0.5684
Episode: 405
SOLVED! Episode 405 Steps: 9 Epsilon 0.5684
Episode: 406
SOLVED! Episode 406 Steps: 9 Epsilon 0.5684
Episode: 407
Episode: 408
Episode: 409
SOLVED! Episode 409 Steps: 26 Epsilon 0.5655
Episode: 410
Episode: 411
SOLVED! Episode 411 Steps: 36 Epsilon 0.5640
Episode: 412
SOLVED! Episode 412 Steps: 289 Epsilon 0.5632
Episode: 413
Episode: 414
Episode: 415
SOLVED! Episode 415 Steps: 46 Epsilon 0.5603
Episode: 416
Episode: 417
SOLVED! Episode 417 Steps: 17 Epsilon 0.5589
Episode: 418
Episode: 419
Episode: 420
Episode: 421
Episode: 422
SOLVED! Episode 422 Steps: 198 Epsilon 0.5528
Episode: 423
Episode: 424
SOLVED! Episode 424 Steps: 39 Epsilon 0.5513
Episode: 425
Episode: 426
Episode: 427
Episode: 428
Episode: 429
SOLVED! Episode 4

HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Test Results: [500, 500, 2, 500, 500, 3, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500]
Episode: 601
SOLVED! Episode 601 Steps: 7 Epsilon 0.5000
Episode: 602
Episode: 603
Episode: 604
SOLVED! Episode 604 Steps: 50 Epsilon 0.5000
Episode: 605
SOLVED! Episode 605 Steps: 40 Epsilon 0.5000
Episode: 606
Episode: 607
SOLVED! Episode 607 Steps: 2 Epsilon 0.5000
Episode: 608
SOLVED! Episode 608 Steps: 11 Epsilon 0.5000
Episode: 609
SOLVED! Episode 609 Steps: 30 Epsilon 0.5000
Episode: 610
SOLVED! Episode 610 Steps: 33 Epsilon 0.5000
Episode: 611
Episode: 612
SOLVED! Episode 612 Steps: 1 Epsilon 0.5000
Episode: 613
Episode: 614
SOLVED! Episode 614 Steps: 1 Epsilon 0.5000
Episode: 615
Episode: 616
SOLVED! Episode 616 Steps: 2 Epsilon 0.5000
Episode: 617
Episode: 618
Episode: 619
Episode: 620
Episode: 621
SOLVED! Episode 621 Steps: 131 Epsilon 0.5000
Episode: 622
SOLVED! Episode 622 Steps: 115 Epsilon 0.5000
Episode: 623
Episode: 624
Episode: 625
SOLVED! Episode 625 Steps:

HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Test Results: [500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500]
Episode: 801
SOLVED! Episode 801 Steps: 2 Epsilon 0.5000
Episode: 802
SOLVED! Episode 802 Steps: 22 Epsilon 0.5000
Episode: 803
Episode: 804
SOLVED! Episode 804 Steps: 80 Epsilon 0.5000
Episode: 805
Episode: 806
Episode: 807
SOLVED! Episode 807 Steps: 3 Epsilon 0.5000
Episode: 808
SOLVED! Episode 808 Steps: 48 Epsilon 0.5000
Episode: 809
Episode: 810
Episode: 811
SOLVED! Episode 811 Steps: 40 Epsilon 0.5000
Episode: 812
Episode: 813
Episode: 814
SOLVED! Episode 814 Steps: 7 Epsilon 0.5000
Episode: 815
Episode: 816
Episode: 817
Episode: 818
Episode: 819
Episode: 820
Episode: 821
Episode: 822
SOLVED! Episode 822 Steps: 197 Epsilon 0.5000
Episode: 823
Episode: 824
SOLVED! Episode 824 Steps: 244 Epsilon 0.5000
Episode: 825
Episode: 826
Episode: 827
Episode: 828
SOLVED! Episode 828 Steps: 22 Epsilon 0.5000
Episode: 829
SOLVED! Episode 829 Steps: 5 Epsilon 0.5000
Episode: 830
E

HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Test Results: [500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500]
Episode: 1001
Episode: 1002
SOLVED! Episode 1002 Steps: 4 Epsilon 0.5000
Episode: 1003
Episode: 1004
SOLVED! Episode 1004 Steps: 417 Epsilon 0.5000
Episode: 1005
Episode: 1006
SOLVED! Episode 1006 Steps: 303 Epsilon 0.5000
Episode: 1007
Episode: 1008
Episode: 1009
SOLVED! Episode 1009 Steps: 46 Epsilon 0.5000
Episode: 1010
SOLVED! Episode 1010 Steps: 141 Epsilon 0.5000
Episode: 1011
SOLVED! Episode 1011 Steps: 2 Epsilon 0.5000
Episode: 1012
Episode: 1013
Episode: 1014
SOLVED! Episode 1014 Steps: 3 Epsilon 0.5000
Episode: 1015
SOLVED! Episode 1015 Steps: 159 Epsilon 0.5000
Episode: 1016
Episode: 1017
SOLVED! Episode 1017 Steps: 3 Epsilon 0.5000
Episode: 1018
SOLVED! Episode 1018 Steps: 2 Epsilon 0.5000
Episode: 1019
Episode: 1020
Episode: 1021
Episode: 1022
Episode: 1023
Episode: 1024
Episode: 1025
SOLVED! Episode 1025 Steps: 39 Epsilon 0.5000
Episode: 1026
Episode: 1027
E

HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Test Results: [500, 1, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500]
Episode: 1201
Episode: 1202
Episode: 1203
Episode: 1204
Episode: 1205
Episode: 1206
SOLVED! Episode 1206 Steps: 58 Epsilon 0.5000
Episode: 1207
SOLVED! Episode 1207 Steps: 8 Epsilon 0.5000
Episode: 1208
Episode: 1209
Episode: 1210
Episode: 1211
SOLVED! Episode 1211 Steps: 3 Epsilon 0.5000
Episode: 1212
SOLVED! Episode 1212 Steps: 5 Epsilon 0.5000
Episode: 1213
SOLVED! Episode 1213 Steps: 66 Epsilon 0.5000
Episode: 1214
Episode: 1215
SOLVED! Episode 1215 Steps: 110 Epsilon 0.5000
Episode: 1216
SOLVED! Episode 1216 Steps: 7 Epsilon 0.5000
Episode: 1217
Episode: 1218
Episode: 1219
SOLVED! Episode 1219 Steps: 60 Epsilon 0.5000
Episode: 1220
Episode: 1221
Episode: 1222
SOLVED! Episode 1222 Steps: 2 Epsilon 0.5000
Episode: 1223
SOLVED! Episode 1223 Steps: 125 Epsilon 0.5000
Episode: 1224
Episode: 1225
Episode: 1226
SOLVED! Episode 1226 Steps: 1 Epsilon 0.5000
Episode: 1227
Episod

HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Test Results: [500, 1, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500]
Episode: 1401
SOLVED! Episode 1401 Steps: 1 Epsilon 0.5000
Episode: 1402
Episode: 1403
Episode: 1404
SOLVED! Episode 1404 Steps: 2 Epsilon 0.5000
Episode: 1405
Episode: 1406
Episode: 1407
SOLVED! Episode 1407 Steps: 45 Epsilon 0.5000
Episode: 1408
Episode: 1409
Episode: 1410
Episode: 1411
SOLVED! Episode 1411 Steps: 190 Epsilon 0.5000
Episode: 1412
Episode: 1413
SOLVED! Episode 1413 Steps: 1 Epsilon 0.5000
Episode: 1414
Episode: 1415
SOLVED! Episode 1415 Steps: 2 Epsilon 0.5000
Episode: 1416
SOLVED! Episode 1416 Steps: 2 Epsilon 0.5000
Episode: 1417
Episode: 1418
SOLVED! Episode 1418 Steps: 94 Epsilon 0.5000
Episode: 1419
Episode: 1420
SOLVED! Episode 1420 Steps: 191 Epsilon 0.5000
Episode: 1421
Episode: 1422
SOLVED! Episode 1422 Steps: 2 Epsilon 0.5000
Episode: 1423
SOLVED! Episode 1423 Steps: 4 Epsilon 0.5000
Episode: 1424
SOLVED! Episode 1424 Steps: 5 Epsilon 0.5000
Epis

HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Test Results: [500, 500, 500, 500, 3, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500]
Episode: 1601
Episode: 1602
SOLVED! Episode 1602 Steps: 20 Epsilon 0.5000
Episode: 1603
SOLVED! Episode 1603 Steps: 5 Epsilon 0.5000
Episode: 1604
SOLVED! Episode 1604 Steps: 7 Epsilon 0.5000
Episode: 1605
SOLVED! Episode 1605 Steps: 188 Epsilon 0.5000
Episode: 1606
SOLVED! Episode 1606 Steps: 1 Epsilon 0.5000
Episode: 1607
Episode: 1608
SOLVED! Episode 1608 Steps: 82 Epsilon 0.5000
Episode: 1609
Episode: 1610
SOLVED! Episode 1610 Steps: 416 Epsilon 0.5000
Episode: 1611
Episode: 1612
Episode: 1613
SOLVED! Episode 1613 Steps: 91 Epsilon 0.5000
Episode: 1614
Episode: 1615
SOLVED! Episode 1615 Steps: 15 Epsilon 0.5000
Episode: 1616
Episode: 1617
SOLVED! Episode 1617 Steps: 77 Epsilon 0.5000
Episode: 1618
SOLVED! Episode 1618 Steps: 253 Epsilon 0.5000
Episode: 1619
SOLVED! Episode 1619 Steps: 25 Epsilon 0.5000
Episode: 1620
Episode: 1621
Episode: 1622
SOLVED! Episode 1622 Steps

HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Test Results: [500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500]
Episode: 1801
Episode: 1802
Episode: 1803
SOLVED! Episode 1803 Steps: 94 Epsilon 0.5000
Episode: 1804
Episode: 1805
Episode: 1806
Episode: 1807
Episode: 1808
SOLVED! Episode 1808 Steps: 2 Epsilon 0.5000
Episode: 1809
SOLVED! Episode 1809 Steps: 173 Epsilon 0.5000
Episode: 1810
Episode: 1811
SOLVED! Episode 1811 Steps: 79 Epsilon 0.5000
Episode: 1812
Episode: 1813
SOLVED! Episode 1813 Steps: 97 Epsilon 0.5000
Episode: 1814
SOLVED! Episode 1814 Steps: 4 Epsilon 0.5000
Episode: 1815
Episode: 1816
SOLVED! Episode 1816 Steps: 462 Epsilon 0.5000
Episode: 1817
SOLVED! Episode 1817 Steps: 1 Epsilon 0.5000
Episode: 1818
SOLVED! Episode 1818 Steps: 94 Epsilon 0.5000
Episode: 1819
Episode: 1820
SOLVED! Episode 1820 Steps: 2 Epsilon 0.5000
Episode: 1821
SOLVED! Episode 1821 Steps: 4 Epsilon 0.5000
Episode: 1822
SOLVED! Episode 1822 Steps: 310 Epsilon 0.5000
Episode: 1823
Episode: 182

HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Test Results: [500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500]
Episode: 2001
Episode: 2002
SOLVED! Episode 2002 Steps: 463 Epsilon 0.5000
Episode: 2003
Episode: 2004
Episode: 2005
SOLVED! Episode 2005 Steps: 14 Epsilon 0.5000
Episode: 2006
Episode: 2007
SOLVED! Episode 2007 Steps: 354 Epsilon 0.5000
Episode: 2008
Episode: 2009
Episode: 2010
Episode: 2011
SOLVED! Episode 2011 Steps: 305 Epsilon 0.5000
Episode: 2012
SOLVED! Episode 2012 Steps: 32 Epsilon 0.5000
Episode: 2013
Episode: 2014
SOLVED! Episode 2014 Steps: 2 Epsilon 0.5000
Episode: 2015
SOLVED! Episode 2015 Steps: 3 Epsilon 0.5000
Episode: 2016
Episode: 2017
Episode: 2018
SOLVED! Episode 2018 Steps: 7 Epsilon 0.5000
Episode: 2019
Episode: 2020
Episode: 2021
Episode: 2022
Episode: 2023
Episode: 2024
SOLVED! Episode 2024 Steps: 210 Epsilon 0.5000
Episode: 2025
Episode: 2026
Episode: 2027
Episode: 2028
Episode: 2029
Episode: 2030
Episode: 2031
Episode: 2032
SOLVED! Episode 2032 

HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Test Results: [1, 500, 2, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500]
Episode: 2201
SOLVED! Episode 2201 Steps: 11 Epsilon 0.5000
Episode: 2202
SOLVED! Episode 2202 Steps: 149 Epsilon 0.5000
Episode: 2203
Episode: 2204
Episode: 2205
Episode: 2206
Episode: 2207
Episode: 2208
SOLVED! Episode 2208 Steps: 4 Epsilon 0.5000
Episode: 2209
Episode: 2210
Episode: 2211
SOLVED! Episode 2211 Steps: 4 Epsilon 0.5000
Episode: 2212
Episode: 2213
Episode: 2214
SOLVED! Episode 2214 Steps: 50 Epsilon 0.5000
Episode: 2215
Episode: 2216
SOLVED! Episode 2216 Steps: 321 Epsilon 0.5000
Episode: 2217
Episode: 2218
SOLVED! Episode 2218 Steps: 6 Epsilon 0.5000
Episode: 2219
SOLVED! Episode 2219 Steps: 2 Epsilon 0.5000
Episode: 2220
Episode: 2221
Episode: 2222
SOLVED! Episode 2222 Steps: 59 Epsilon 0.5000
Episode: 2223
Episode: 2224
Episode: 2225
SOLVED! Episode 2225 Steps: 42 Epsilon 0.5000
Episode: 2226
Episode: 2227
SOLVED! Episode 2227 Steps: 48 Epsilon 0.5000
Episod

HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Test Results: [500, 500, 2, 500, 500, 3, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500]
Episode: 2401
SOLVED! Episode 2401 Steps: 3 Epsilon 0.5000
Episode: 2402
Episode: 2403
Episode: 2404
Episode: 2405
Episode: 2406
Episode: 2407
Episode: 2408
SOLVED! Episode 2408 Steps: 176 Epsilon 0.5000
Episode: 2409
Episode: 2410
Episode: 2411
SOLVED! Episode 2411 Steps: 1 Epsilon 0.5000
Episode: 2412
SOLVED! Episode 2412 Steps: 12 Epsilon 0.5000
Episode: 2413
SOLVED! Episode 2413 Steps: 16 Epsilon 0.5000
Episode: 2414
Episode: 2415
SOLVED! Episode 2415 Steps: 249 Epsilon 0.5000
Episode: 2416
SOLVED! Episode 2416 Steps: 6 Epsilon 0.5000
Episode: 2417
Episode: 2418
SOLVED! Episode 2418 Steps: 2 Epsilon 0.5000
Episode: 2419
SOLVED! Episode 2419 Steps: 75 Epsilon 0.5000
Episode: 2420
Episode: 2421
Episode: 2422
SOLVED! Episode 2422 Steps: 8 Epsilon 0.5000
Episode: 2423
Episode: 2424
SOLVED! Episode 2424 Steps: 49 Epsilon 0.5000
Episode: 2425
Episode: 2426
SOLVED! Episode 2426 

HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Test Results: [500, 500, 2, 500, 500, 3, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500]
Episode: 2601
Episode: 2602
Episode: 2603
Episode: 2604
SOLVED! Episode 2604 Steps: 310 Epsilon 0.5000
Episode: 2605
SOLVED! Episode 2605 Steps: 365 Epsilon 0.5000
Episode: 2606
Episode: 2607
SOLVED! Episode 2607 Steps: 5 Epsilon 0.5000
Episode: 2608
SOLVED! Episode 2608 Steps: 1 Epsilon 0.5000
Episode: 2609
SOLVED! Episode 2609 Steps: 15 Epsilon 0.5000
Episode: 2610
SOLVED! Episode 2610 Steps: 1 Epsilon 0.5000
Episode: 2611
Episode: 2612
Episode: 2613
Episode: 2614
SOLVED! Episode 2614 Steps: 139 Epsilon 0.5000
Episode: 2615
Episode: 2616
Episode: 2617
SOLVED! Episode 2617 Steps: 111 Epsilon 0.5000
Episode: 2618
Episode: 2619
SOLVED! Episode 2619 Steps: 1 Epsilon 0.5000
Episode: 2620
SOLVED! Episode 2620 Steps: 3 Epsilon 0.5000
Episode: 2621
Episode: 2622
SOLVED! Episode 2622 Steps: 32 Epsilon 0.5000
Episode: 2623
SOLVED! Episode 2623 Steps: 148 Epsilon 0.5000
Episode: 2624


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Test Results: [500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500]
Episode: 2801
Episode: 2802
Episode: 2803
Episode: 2804
SOLVED! Episode 2804 Steps: 30 Epsilon 0.5000
Episode: 2805
SOLVED! Episode 2805 Steps: 1 Epsilon 0.5000
Episode: 2806
Episode: 2807
SOLVED! Episode 2807 Steps: 2 Epsilon 0.5000
Episode: 2808
SOLVED! Episode 2808 Steps: 18 Epsilon 0.5000
Episode: 2809
Episode: 2810
Episode: 2811
SOLVED! Episode 2811 Steps: 83 Epsilon 0.5000
Episode: 2812
SOLVED! Episode 2812 Steps: 125 Epsilon 0.5000
Episode: 2813
Episode: 2814
Episode: 2815
SOLVED! Episode 2815 Steps: 214 Epsilon 0.5000
Episode: 2816
Episode: 2817
Episode: 2818
SOLVED! Episode 2818 Steps: 3 Epsilon 0.5000
Episode: 2819
Episode: 2820
SOLVED! Episode 2820 Steps: 228 Epsilon 0.5000
Episode: 2821
Episode: 2822
Episode: 2823
Episode: 2824
Episode: 2825
Episode: 2826
SOLVED! Episode 2826 Steps: 105 Epsilon 0.5000
Episode: 2827
Episode: 2828
SOLVED! Episode 2828 Steps: 195

HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Test Results: [500, 500, 500, 2, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500]
Episode: 3001
Episode: 3002
Episode: 3003
Episode: 3004
Episode: 3005
Episode: 3006
Episode: 3007
Episode: 3008
Episode: 3009
SOLVED! Episode 3009 Steps: 4 Epsilon 0.5000
Episode: 3010
SOLVED! Episode 3010 Steps: 1 Epsilon 0.5000
Episode: 3011
Episode: 3012
SOLVED! Episode 3012 Steps: 254 Epsilon 0.5000
Episode: 3013
SOLVED! Episode 3013 Steps: 6 Epsilon 0.5000
Episode: 3014
Episode: 3015
Episode: 3016
Episode: 3017
SOLVED! Episode 3017 Steps: 4 Epsilon 0.5000
Episode: 3018
SOLVED! Episode 3018 Steps: 2 Epsilon 0.5000
Episode: 3019
Episode: 3020
Episode: 3021
SOLVED! Episode 3021 Steps: 151 Epsilon 0.5000
Episode: 3022
Episode: 3023
SOLVED! Episode 3023 Steps: 78 Epsilon 0.5000
Episode: 3024
Episode: 3025
Episode: 3026
Episode: 3027
Episode: 3028
SOLVED! Episode 3028 Steps: 1 Epsilon 0.5000
Episode: 3029
SOLVED! Episode 3029 Steps: 29 Epsilon 0.5000
Episode: 3030
SOLVED! Ep

HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Test Results: [500, 500, 500, 500, 500, 3, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500]
Episode: 3201
SOLVED! Episode 3201 Steps: 1 Epsilon 0.5000
Episode: 3202
Episode: 3203
SOLVED! Episode 3203 Steps: 97 Epsilon 0.5000
Episode: 3204
Episode: 3205
SOLVED! Episode 3205 Steps: 58 Epsilon 0.5000
Episode: 3206
Episode: 3207
Episode: 3208
SOLVED! Episode 3208 Steps: 53 Epsilon 0.5000
Episode: 3209
Episode: 3210
Episode: 3211
SOLVED! Episode 3211 Steps: 31 Epsilon 0.5000
Episode: 3212
Episode: 3213
Episode: 3214
Episode: 3215
Episode: 3216
Episode: 3217
Episode: 3218
Episode: 3219
SOLVED! Episode 3219 Steps: 10 Epsilon 0.5000
Episode: 3220
SOLVED! Episode 3220 Steps: 66 Epsilon 0.5000
Episode: 3221
SOLVED! Episode 3221 Steps: 1 Epsilon 0.5000
Episode: 3222
Episode: 3223
SOLVED! Episode 3223 Steps: 75 Epsilon 0.5000
Episode: 3224
Episode: 3225
Episode: 3226
Episode: 3227
SOLVED! Episode 3227 Steps: 6 Epsilon 0.5000
Episode: 3228
Episode: 3229
SOLVED! Episode 3229 St

HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Test Results: [1, 500, 2, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500]
Episode: 3401
Episode: 3402
SOLVED! Episode 3402 Steps: 138 Epsilon 0.5000
Episode: 3403
SOLVED! Episode 3403 Steps: 1 Epsilon 0.5000
Episode: 3404
Episode: 3405
SOLVED! Episode 3405 Steps: 2 Epsilon 0.5000
Episode: 3406
SOLVED! Episode 3406 Steps: 6 Epsilon 0.5000
Episode: 3407
SOLVED! Episode 3407 Steps: 46 Epsilon 0.5000
Episode: 3408
SOLVED! Episode 3408 Steps: 16 Epsilon 0.5000
Episode: 3409
Episode: 3410
Episode: 3411
SOLVED! Episode 3411 Steps: 2 Epsilon 0.5000
Episode: 3412
Episode: 3413
Episode: 3414
Episode: 3415
SOLVED! Episode 3415 Steps: 117 Epsilon 0.5000
Episode: 3416
SOLVED! Episode 3416 Steps: 2 Epsilon 0.5000
Episode: 3417
Episode: 3418
Episode: 3419
Episode: 3420
Episode: 3421
Episode: 3422
Episode: 3423
Episode: 3424
Episode: 3425
Episode: 3426
SOLVED! Episode 3426 Steps: 12 Epsilon 0.5000
Episode: 3427
SOLVED! Episode 3427 Steps: 3 Epsilon 0.5000
Episode:

HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Test Results: [500, 500, 2, 2, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500]
Episode: 3601
Episode: 3602
Episode: 3603
SOLVED! Episode 3603 Steps: 5 Epsilon 0.5000
Episode: 3604
Episode: 3605
Episode: 3606
Episode: 3607
SOLVED! Episode 3607 Steps: 134 Epsilon 0.5000
Episode: 3608
SOLVED! Episode 3608 Steps: 2 Epsilon 0.5000
Episode: 3609
Episode: 3610
Episode: 3611
Episode: 3612
SOLVED! Episode 3612 Steps: 34 Epsilon 0.5000
Episode: 3613
Episode: 3614
SOLVED! Episode 3614 Steps: 29 Epsilon 0.5000
Episode: 3615
SOLVED! Episode 3615 Steps: 33 Epsilon 0.5000
Episode: 3616
Episode: 3617
Episode: 3618
SOLVED! Episode 3618 Steps: 28 Epsilon 0.5000
Episode: 3619
SOLVED! Episode 3619 Steps: 146 Epsilon 0.5000
Episode: 3620
Episode: 3621
SOLVED! Episode 3621 Steps: 1 Epsilon 0.5000
Episode: 3622
SOLVED! Episode 3622 Steps: 395 Epsilon 0.5000
Episode: 3623
Episode: 3624
SOLVED! Episode 3624 Steps: 316 Epsilon 0.5000
Episode: 3625
SOLVED! Episode 3625 Steps: 110

HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Test Results: [500, 500, 500, 500, 3, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500]
Episode: 3801
Episode: 3802
SOLVED! Episode 3802 Steps: 486 Epsilon 0.5000
Episode: 3803
SOLVED! Episode 3803 Steps: 397 Epsilon 0.5000
Episode: 3804
SOLVED! Episode 3804 Steps: 29 Epsilon 0.5000
Episode: 3805
Episode: 3806
SOLVED! Episode 3806 Steps: 75 Epsilon 0.5000
Episode: 3807
Episode: 3808
Episode: 3809
SOLVED! Episode 3809 Steps: 28 Epsilon 0.5000
Episode: 3810
Episode: 3811
SOLVED! Episode 3811 Steps: 3 Epsilon 0.5000
Episode: 3812
Episode: 3813
Episode: 3814
SOLVED! Episode 3814 Steps: 61 Epsilon 0.5000
Episode: 3815
Episode: 3816
SOLVED! Episode 3816 Steps: 6 Epsilon 0.5000
Episode: 3817
Episode: 3818
SOLVED! Episode 3818 Steps: 173 Epsilon 0.5000
Episode: 3819
Episode: 3820
Episode: 3821
SOLVED! Episode 3821 Steps: 32 Epsilon 0.5000
Episode: 3822
SOLVED! Episode 3822 Steps: 25 Epsilon 0.5000
Episode: 3823
Episode: 3824
Episode: 3825
SOLVED! Episode 3825 Steps: 2

HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Test Results: [500, 500, 2, 500, 3, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500]
Episode: 4001
Episode: 4002
Episode: 4003
SOLVED! Episode 4003 Steps: 7 Epsilon 0.5000
Episode: 4004
SOLVED! Episode 4004 Steps: 2 Epsilon 0.5000
Episode: 4005
SOLVED! Episode 4005 Steps: 1 Epsilon 0.5000
Episode: 4006
SOLVED! Episode 4006 Steps: 6 Epsilon 0.5000
Episode: 4007
SOLVED! Episode 4007 Steps: 1 Epsilon 0.5000
Episode: 4008
Episode: 4009
SOLVED! Episode 4009 Steps: 1 Epsilon 0.5000
Episode: 4010
SOLVED! Episode 4010 Steps: 35 Epsilon 0.5000
Episode: 4011
SOLVED! Episode 4011 Steps: 123 Epsilon 0.5000
Episode: 4012
SOLVED! Episode 4012 Steps: 213 Epsilon 0.5000
Episode: 4013
SOLVED! Episode 4013 Steps: 14 Epsilon 0.5000
Episode: 4014
SOLVED! Episode 4014 Steps: 181 Epsilon 0.5000
Episode: 4015
SOLVED! Episode 4015 Steps: 334 Epsilon 0.5000
Episode: 4016
SOLVED! Episode 4016 Steps: 113 Epsilon 0.5000
Episode: 4017
Episode: 4018
Episode: 4019
Episode: 4020
Episode: 40

KeyboardInterrupt: 