# Final Project - Reinforcements Learning 
Hello dear students,<br> this is the template notebook. Please click on the "File" tab and then on "Save a copy into drive".

---
<br>

### Name and ID:
Student 1: Avraham Raviv, 204355390
<br>
Student 2: Yevgeni Berkovitch, 317079234
<br><br>
<img src="https://play-lh.googleusercontent.com/e_oKlKPISbgdzut1H9opevS7-LTB8-8lsmpCdMkhlnqFenZhpjxbLmx7l158-xQQCIY">

### https://github.com/mpSchrader/gym-sokoban

# Installs

In [1]:
%%capture
!sudo apt-get update
!sudo apt-get install -y xvfb ffmpeg freeglut3-dev
!pip install 'imageio==2.4.0'
!pip install gym
!pip install pygame
!apt-get install python-opengl -y
!apt install xvfb -y
!pip install pyvirtualdisplay
!pip install piglet
!pip install gym
!apt-get install python-opengl -y
!apt install xvfb -y
!pip install gym_sokoban

!imageio_download_bin ffmpeg

# Imports

In [2]:
import random
import time

import numpy as np
import scipy as scp
import matplotlib.pyplot as plt

import base64
import imageio
from pyvirtualdisplay import Display
from IPython.display import HTML

import gym
from gym import error, spaces, utils
from soko_pap import *

from collections import deque
from queue import PriorityQueue

from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten

from tqdm.notebook import tqdm
from collections import defaultdict

In [3]:
%matplotlib inline

In [4]:
imageio.plugins.ffmpeg.download()

In [5]:
from gym import logger as gymlogger
gymlogger.set_level(40) # error only

# Display utils
The cell below contains the video display configuration. No need to make changes here.

In [6]:
def embed_mp4(filename):
    """Embeds an mp4 file in the notebook."""
    video = open(filename,'rb').read()
    b64 = base64.b64encode(video)
    tag = '''
    <video width="640" height="480" controls>
    <source src="data:video/mp4;base64,{0}" type="video/mp4">
    Your browser does not support the video tag.
    </video>'''.format(b64.decode())

    return HTML(tag)

# Utils

In [7]:
def get_distances(room_state):
    for i in range(room_state.shape[0]):
        for j in range(room_state.shape[1]):
            if room_state[i][j] == 2:
                target = (i, j)

    distances = np.zeros(shape=room_state.shape)
    visited_cells = set()
    cell_queue = deque()

    visited_cells.add(target)
    cell_queue.appendleft(target)

    while len(cell_queue) != 0:
        cell = cell_queue.pop()
        distance = distances[cell[0]][cell[1]]
        for x,y in ((1,0), (-1,-0), (0,1), (0,-1)):
            next_cell_x, next_cell_y = cell[0]+x, cell[1]+y
            if room_state[next_cell_x][next_cell_y] != 0 and not (next_cell_x, next_cell_y) in visited_cells:
                distances[next_cell_x][next_cell_y] = distance + 1
                visited_cells.add((next_cell_x, next_cell_y))
                cell_queue.appendleft((next_cell_x, next_cell_y))
                
    return distances   

def calc_distances(room_state, distances):
    box = None
    mover = None
    for i in range(room_state.shape[0]):
        for j in range(room_state.shape[1]):            
            if room_state[i][j] == 4:
                box = (i,j)
            
            if room_state[i][j] == 5:
                mover = (i,j)
    
    return mover, box, distances[box[0]][box[1]]   

def box2target_change_reward(room_state, next_room_state, distances):
    if np.array_equal(room_state, next_room_state):
        return -1.0
    
    mover, box, t2b = calc_distances(room_state, distances)
    n_mover, n_box, n_t2b = calc_distances(next_room_state, distances)
    
    change_reward = 0.0
    if n_t2b < t2b:
        change_reward += 5.0
    elif n_t2b > t2b:
        change_reward -= 5.0
        
    m2b = np.sqrt((mover[0]-box[0])**2 + (mover[1]-box[1])**2)
    n_m2b = np.sqrt((n_mover[0]-n_box[0])**2 + (n_mover[1]-n_box[1])**2)
    
    if n_m2b < m2b and m2b >= 2:
        change_reward += 1.0
    elif n_m2b > m2b and n_m2b >= 2:
        change_reward -= 1.0
        
    return change_reward   

# Solution

In [8]:
class SOK_Agent:
    def __init__(self):
        # Construct DQN models
        self.state_size = (112,112,1) 
        self.action_size = 8
        self.model = self._build_model()
        self.target_model = self._build_model()
        self.target_model.set_weights(self.model.get_weights())
        self.batch_size = 8
        
        # Replay buffers
        self.replay_buffer = deque(maxlen=5000)
        self.prioritized_replay_buffer = deque(maxlen=500)
        
        # Hyperparameters
        self.gamma = 0.9
        self.epsilon = 1.0   
        self.epsilon_min = 0.3
        self.epsilon_decay = 0.995
        self.replay_rate = 10
        self.update_beta = 0.999

        self.verbosity = 100 

    def _build_model(self):
        model = Sequential()
        model.add(Conv2D(64, (16,16), strides=(16,16), input_shape=self.state_size, activation='relu'))
        model.add(Conv2D(64, (3,3), activation='relu'))
        model.add(Flatten())
        model.add(Dense(512, activation='relu'))       
        model.add(Dense(64, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer="adam")        
        return model

    def remember(self, state, action, reward, next_state, done):
        self.replay_buffer.append([state, action, reward, next_state, done])    
        
    def copy_to_prioritized_buffer(self, n):
        for i in range(n):
            self.prioritized_replay_buffer.append(self.replay_buffer[-1-i])  

    def act(self, state, stochastic=False):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        
        act_values = self.model.predict(state, verbose=0)[0]
        
        if stochastic:
            act_probs = np.exp(act_values)/np.exp(act_values).sum()
            return np.random.choice(np.arange(self.action_size), size=1, p=act_probs)[0]
              
        return np.argmax(act_values) 

    def replay(self): 
        if len(self.replay_buffer) < self.batch_size:
            return
        
        if len(self.prioritized_replay_buffer) < self.batch_size//2:
            minibatch = random.sample(self.replay_buffer, self.batch_size) 
        else:    
            minibatch = random.sample(self.replay_buffer, self.batch_size//2) 
            minibatch.extend(random.sample(self.prioritized_replay_buffer, self.batch_size//2))
        
        states = np.zeros((self.batch_size, self.state_size[0], self.state_size[1]))
        actions = np.zeros(self.batch_size, dtype=int)
        rewards = np.zeros(self.batch_size)
        next_states = np.zeros((self.batch_size, self.state_size[0], self.state_size[1]))
        statuses = np.zeros(self.batch_size)
        targets = np.zeros((self.batch_size, self.action_size)) 
        
        for i, (state, action, reward, next_state, done) in enumerate(minibatch): 
            states[i] = state.copy()
            actions[i] = action
            rewards[i] = reward
            next_states[i] = next_state.copy()
            statuses[i] = 1 if done else 0    
        
        targets = self.model.predict(states) 
        max_actions = np.argmax(self.model.predict(next_states), axis=1)
        next_rewards = self.target_model.predict(next_states)
        
        ind = 0
        for action, reward, next_reward, max_action, done in zip(actions, rewards, next_rewards, max_actions, statuses):  
            if not done:
                reward += self.gamma * next_reward[max_action]
            targets[ind][action] = reward
            ind += 1
        
        self.model.fit(states, targets, epochs=10, verbose=0) 
        
        self.update_target_model()        
    
        if self.epsilon > self.epsilon_min:
            self.epsilon = self.epsilon * self.epsilon_decay
        
    def update_target_model(self):
        model_w = self.model.get_weights()
        target_model_w = self.target_model.get_weights()
        updated_target_model_w = []
        for i in range(len(model_w)):
            updated_target_model_w.append(self.update_beta*target_model_w[i] + (1-self.update_beta)*model_w[i])
        self.target_model.set_weights(updated_target_model_w)    
            
    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)

In [9]:
def process_frame(frame):
    f = frame.mean(axis=2)
    f = f / 255
    return np.expand_dims(f, axis=0)

## Training

#### Test Suite

In [10]:
def test_agent(stochastic=False):
    current_epsilon = agent.epsilon
    agent.epsilon = 0.0
    num_solved = 0
    solved_in_steps = defaultdict(int)

    for t in tqdm(range(100)):    
        random.seed(t)
        sok = PushAndPullSokobanEnv(dim_room=(7, 7), num_boxes=1)
        sok.set_maxsteps(20)
        steps = 0

        state = sok.get_image('rgb_array')
        done = False
        while not done:
            steps += 1
            action = agent.act(process_frame(state), stochastic)
            if action < 4:
                action += 1
            else:
                action += 5
            state, reward, done, info = sok.step(action)

        if 3 in sok.room_state:            
            num_solved += 1
            solved_in_steps[steps] += 1
    
    agent.epsilon = current_epsilon
    print("*" * 30)
    print("Stochastic" if stochastic else "Deterministic")
    print("*" * 30)
    print("Solved: %d" % num_solved)
    print("=" * 30)
    print(solved_in_steps)
    print("*" * 30)

In [11]:
max_episodes = 50000
max_steps = 20

def init_sok(r):
    random.seed(r+100)
    sok = PushAndPullSokobanEnv(dim_room=(7, 7), num_boxes=1)
    sok.set_maxsteps(max_steps)
    return sok

In [12]:
agent = SOK_Agent()

steps_per_episode = []

for e in range(max_episodes):
    if e % 100 == 0 and e > 0:
        test_agent(stochastic=False)
        
    print("Episode: %d" % (e))
    
    sok = init_sok(e)
    random.seed(e)
    
    state = process_frame(sok.get_image('rgb_array'))
    room_state = sok.room_state.copy() 
    distances = get_distances(room_state)
    
    for step in range(sok.max_steps):
        action = agent.act(state)
        if action < 4:
            next_state, reward, done, _ = sok.step(action+1) 
        else:
            next_state, reward, done, _ = sok.step(action+5)         
        
        next_state = process_frame(next_state)        
        next_room_state = sok.room_state
        
        if not done:
            reward += box2target_change_reward(room_state, next_room_state, distances)
        
        agent.remember(state, action, reward, next_state, done)
        
        state = next_state.copy() 
        room_state = next_room_state.copy()                
        
        if (step+1) % agent.replay_rate == 0:
            agent.replay()            
        
        if done:   
            steps_per_episode.append(step+1)
            
            if 3 in sok.room_state:                
                print("SOLVED! Episode %d Steps: %d Epsilon %.4f" % (e, step+1, agent.epsilon)) 
                agent.copy_to_prioritized_buffer(step+1)
            
            #agent.save("exp1_episode%d.h5" % (e))            
            break

Episode: 0
Episode: 1
Episode: 2
Episode: 3
Episode: 4
Episode: 5
Episode: 6
Episode: 7
SOLVED! Episode 7 Steps: 6 Epsilon 0.9322
Episode: 8
Episode: 9
Episode: 10
SOLVED! Episode 10 Steps: 13 Epsilon 0.9092
Episode: 11
Episode: 12
Episode: 13
SOLVED! Episode 13 Steps: 7 Epsilon 0.8911
Episode: 14
Episode: 15
Episode: 16
SOLVED! Episode 16 Steps: 19 Epsilon 0.8691
Episode: 17
Episode: 18
Episode: 19
Episode: 20
Episode: 21
Episode: 22
SOLVED! Episode 22 Steps: 13 Epsilon 0.8224
Episode: 23
Episode: 24
Episode: 25
Episode: 26
SOLVED! Episode 26 Steps: 4 Epsilon 0.7981
Episode: 27
Episode: 28
Episode: 29
SOLVED! Episode 29 Steps: 1 Epsilon 0.7822
Episode: 30
Episode: 31
Episode: 32
Episode: 33
Episode: 34
Episode: 35
Episode: 36
Episode: 37
Episode: 38
Episode: 39
Episode: 40
Episode: 41
Episode: 42
Episode: 43
Episode: 44
Episode: 45
Episode: 46
Episode: 47
Episode: 48
SOLVED! Episode 48 Steps: 1 Epsilon 0.6531
Episode: 49
Episode: 50
Episode: 51
Episode: 52
Episode: 53
Episode: 54
Epis

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 23
defaultdict(<class 'int'>, {2: 4, 1: 15, 3: 4})
******************************
Episode: 100
Episode: 101
Episode: 102
Episode: 103
SOLVED! Episode 103 Steps: 3 Epsilon 0.4180
Episode: 104
Episode: 105
Episode: 106
Episode: 107
Episode: 108
Episode: 109
Episode: 110
Episode: 111
Episode: 112
Episode: 113
Episode: 114
SOLVED! Episode 114 Steps: 1 Epsilon 0.3782
Episode: 115
Episode: 116
Episode: 117
Episode: 118
SOLVED! Episode 118 Steps: 1 Epsilon 0.3670
Episode: 119
Episode: 120
Episode: 121
Episode: 122
Episode: 123
Episode: 124
Episode: 125
SOLVED! Episode 125 Steps: 2 Epsilon 0.3455
Episode: 126
Episode: 127
Episode: 128
Episode: 129
SOLVED! Episode 129 Steps: 1 Epsilon 0.3353
Episode: 130
Episode: 131
Episode: 132
Episode: 133
SOLVED! Episode 133 Steps: 13 Epsilon 0.3237
Episode: 134
Episode: 135
Episode: 136
Episode: 137
Episode: 138
Episode: 139
Episode: 140
Episode: 141
SOLVED! Episode 141 St

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 30
defaultdict(<class 'int'>, {3: 9, 1: 16, 2: 5})
******************************
Episode: 200
SOLVED! Episode 200 Steps: 13 Epsilon 0.2988
Episode: 201
Episode: 202
Episode: 203
Episode: 204
Episode: 205
Episode: 206
Episode: 207
Episode: 208
Episode: 209
SOLVED! Episode 209 Steps: 2 Epsilon 0.2988
Episode: 210
Episode: 211
Episode: 212
SOLVED! Episode 212 Steps: 3 Epsilon 0.2988
Episode: 213
Episode: 214
Episode: 215
Episode: 216
Episode: 217
SOLVED! Episode 217 Steps: 3 Epsilon 0.2988
Episode: 218
SOLVED! Episode 218 Steps: 2 Epsilon 0.2988
Episode: 219
SOLVED! Episode 219 Steps: 2 Epsilon 0.2988
Episode: 220
SOLVED! Episode 220 Steps: 6 Epsilon 0.2988
Episode: 221
SOLVED! Episode 221 Steps: 1 Epsilon 0.2988
Episode: 222
Episode: 223
Episode: 224
Episode: 225
SOLVED! Episode 225 Steps: 1 Epsilon 0.2988
Episode: 226
Episode: 227
Episode: 228
Episode: 229
Episode: 230
SOLVED! Episode 230 Steps: 1 Epsi

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 41
defaultdict(<class 'int'>, {3: 15, 1: 17, 2: 9})
******************************
Episode: 300
SOLVED! Episode 300 Steps: 1 Epsilon 0.2988
Episode: 301
Episode: 302
Episode: 303
Episode: 304
Episode: 305
SOLVED! Episode 305 Steps: 2 Epsilon 0.2988
Episode: 306
Episode: 307
Episode: 308
Episode: 309
Episode: 310
Episode: 311
Episode: 312
Episode: 313
Episode: 314
Episode: 315
SOLVED! Episode 315 Steps: 3 Epsilon 0.2988
Episode: 316
Episode: 317
Episode: 318
SOLVED! Episode 318 Steps: 2 Epsilon 0.2988
Episode: 319
Episode: 320
Episode: 321
Episode: 322
SOLVED! Episode 322 Steps: 13 Epsilon 0.2988
Episode: 323
Episode: 324
SOLVED! Episode 324 Steps: 1 Epsilon 0.2988
Episode: 325
Episode: 326
Episode: 327
SOLVED! Episode 327 Steps: 2 Epsilon 0.2988
Episode: 328
Episode: 329
Episode: 330
Episode: 331
Episode: 332
Episode: 333
Episode: 334
SOLVED! Episode 334 Steps: 1 Epsilon 0.2988
Episode: 335
Episode: 33

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 49
defaultdict(<class 'int'>, {3: 21, 1: 20, 2: 8})
******************************
Episode: 400
SOLVED! Episode 400 Steps: 2 Epsilon 0.2988
Episode: 401
Episode: 402
Episode: 403
SOLVED! Episode 403 Steps: 3 Epsilon 0.2988
Episode: 404
SOLVED! Episode 404 Steps: 3 Epsilon 0.2988
Episode: 405
SOLVED! Episode 405 Steps: 1 Epsilon 0.2988
Episode: 406
SOLVED! Episode 406 Steps: 1 Epsilon 0.2988
Episode: 407
Episode: 408
Episode: 409
Episode: 410
SOLVED! Episode 410 Steps: 2 Epsilon 0.2988
Episode: 411
Episode: 412
Episode: 413
SOLVED! Episode 413 Steps: 3 Epsilon 0.2988
Episode: 414
Episode: 415
SOLVED! Episode 415 Steps: 13 Epsilon 0.2988
Episode: 416
SOLVED! Episode 416 Steps: 3 Epsilon 0.2988
Episode: 417
Episode: 418
Episode: 419
Episode: 420
Episode: 421
Episode: 422
Episode: 423
SOLVED! Episode 423 Steps: 1 Epsilon 0.2988
Episode: 424
Episode: 425
Episode: 426
Episode: 427
SOLVED! Episode 427 Steps: 

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 53
defaultdict(<class 'int'>, {3: 23, 1: 19, 2: 10, 5: 1})
******************************
Episode: 500
SOLVED! Episode 500 Steps: 2 Epsilon 0.2988
Episode: 501
Episode: 502
Episode: 503
SOLVED! Episode 503 Steps: 3 Epsilon 0.2988
Episode: 504
Episode: 505
SOLVED! Episode 505 Steps: 3 Epsilon 0.2988
Episode: 506
Episode: 507
Episode: 508
Episode: 509
SOLVED! Episode 509 Steps: 2 Epsilon 0.2988
Episode: 510
SOLVED! Episode 510 Steps: 1 Epsilon 0.2988
Episode: 511
SOLVED! Episode 511 Steps: 2 Epsilon 0.2988
Episode: 512
SOLVED! Episode 512 Steps: 1 Epsilon 0.2988
Episode: 513
Episode: 514
Episode: 515
Episode: 516
Episode: 517
SOLVED! Episode 517 Steps: 1 Epsilon 0.2988
Episode: 518
Episode: 519
SOLVED! Episode 519 Steps: 2 Epsilon 0.2988
Episode: 520
SOLVED! Episode 520 Steps: 1 Epsilon 0.2988
Episode: 521
Episode: 522
SOLVED! Episode 522 Steps: 3 Epsilon 0.2988
Episode: 523
SOLVED! Episode 523 Steps: 3 

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 54
defaultdict(<class 'int'>, {3: 24, 1: 19, 2: 11})
******************************
Episode: 600
SOLVED! Episode 600 Steps: 3 Epsilon 0.2988
Episode: 601
Episode: 602
SOLVED! Episode 602 Steps: 1 Epsilon 0.2988
Episode: 603
Episode: 604
Episode: 605
Episode: 606
SOLVED! Episode 606 Steps: 2 Epsilon 0.2988
Episode: 607
SOLVED! Episode 607 Steps: 1 Epsilon 0.2988
Episode: 608
SOLVED! Episode 608 Steps: 1 Epsilon 0.2988
Episode: 609
Episode: 610
Episode: 611
SOLVED! Episode 611 Steps: 15 Epsilon 0.2988
Episode: 612
Episode: 613
SOLVED! Episode 613 Steps: 1 Epsilon 0.2988
Episode: 614
Episode: 615
SOLVED! Episode 615 Steps: 2 Epsilon 0.2988
Episode: 616
SOLVED! Episode 616 Steps: 12 Epsilon 0.2988
Episode: 617
Episode: 618
Episode: 619
Episode: 620
SOLVED! Episode 620 Steps: 3 Epsilon 0.2988
Episode: 621
Episode: 622
Episode: 623
SOLVED! Episode 623 Steps: 2 Epsilon 0.2988
Episode: 624
Episode: 625
SOLVED!

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 57
defaultdict(<class 'int'>, {3: 24, 1: 21, 2: 11, 5: 1})
******************************
Episode: 700
Episode: 701
Episode: 702
SOLVED! Episode 702 Steps: 1 Epsilon 0.2988
Episode: 703
Episode: 704
Episode: 705
SOLVED! Episode 705 Steps: 1 Epsilon 0.2988
Episode: 706
Episode: 707
Episode: 708
Episode: 709
Episode: 710
SOLVED! Episode 710 Steps: 1 Epsilon 0.2988
Episode: 711
SOLVED! Episode 711 Steps: 3 Epsilon 0.2988
Episode: 712
Episode: 713
Episode: 714
SOLVED! Episode 714 Steps: 4 Epsilon 0.2988
Episode: 715
Episode: 716
Episode: 717
Episode: 718
SOLVED! Episode 718 Steps: 3 Epsilon 0.2988
Episode: 719
Episode: 720
Episode: 721
Episode: 722
Episode: 723
Episode: 724
Episode: 725
SOLVED! Episode 725 Steps: 3 Epsilon 0.2988
Episode: 726
Episode: 727
Episode: 728
SOLVED! Episode 728 Steps: 1 Epsilon 0.2988
Episode: 729
Episode: 730
Episode: 731
Episode: 732
SOLVED! Episode 732 Steps: 3 Epsilon 0.2988


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 56
defaultdict(<class 'int'>, {3: 24, 1: 21, 2: 11})
******************************
Episode: 800
SOLVED! Episode 800 Steps: 2 Epsilon 0.2988
Episode: 801
Episode: 802
Episode: 803
Episode: 804
Episode: 805
SOLVED! Episode 805 Steps: 3 Epsilon 0.2988
Episode: 806
SOLVED! Episode 806 Steps: 3 Epsilon 0.2988
Episode: 807
Episode: 808
Episode: 809
Episode: 810
SOLVED! Episode 810 Steps: 3 Epsilon 0.2988
Episode: 811
Episode: 812
Episode: 813
SOLVED! Episode 813 Steps: 1 Epsilon 0.2988
Episode: 814
Episode: 815
Episode: 816
Episode: 817
Episode: 818
Episode: 819
Episode: 820
Episode: 821
SOLVED! Episode 821 Steps: 1 Epsilon 0.2988
Episode: 822
Episode: 823
SOLVED! Episode 823 Steps: 2 Epsilon 0.2988
Episode: 824
Episode: 825
Episode: 826
SOLVED! Episode 826 Steps: 1 Epsilon 0.2988
Episode: 827
SOLVED! Episode 827 Steps: 3 Epsilon 0.2988
Episode: 828
SOLVED! Episode 828 Steps: 2 Epsilon 0.2988
Episode: 829
S

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 61
defaultdict(<class 'int'>, {3: 23, 1: 21, 7: 1, 6: 2, 2: 11, 5: 3})
******************************
Episode: 900
Episode: 901
Episode: 902
SOLVED! Episode 902 Steps: 3 Epsilon 0.2988
Episode: 903
SOLVED! Episode 903 Steps: 16 Epsilon 0.2988
Episode: 904
Episode: 905
Episode: 906
SOLVED! Episode 906 Steps: 2 Epsilon 0.2988
Episode: 907
Episode: 908
Episode: 909
SOLVED! Episode 909 Steps: 1 Epsilon 0.2988
Episode: 910
Episode: 911
SOLVED! Episode 911 Steps: 2 Epsilon 0.2988
Episode: 912
Episode: 913
Episode: 914
Episode: 915
Episode: 916
Episode: 917
Episode: 918
Episode: 919
Episode: 920
SOLVED! Episode 920 Steps: 2 Epsilon 0.2988
Episode: 921
SOLVED! Episode 921 Steps: 3 Epsilon 0.2988
Episode: 922
SOLVED! Episode 922 Steps: 3 Epsilon 0.2988
Episode: 923
SOLVED! Episode 923 Steps: 1 Epsilon 0.2988
Episode: 924
Episode: 925
Episode: 926
SOLVED! Episode 926 Steps: 1 Epsilon 0.2988
Episode: 927
Episode:

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 57
defaultdict(<class 'int'>, {3: 23, 1: 21, 2: 11, 5: 2})
******************************
Episode: 1000
SOLVED! Episode 1000 Steps: 3 Epsilon 0.2988
Episode: 1001
SOLVED! Episode 1001 Steps: 3 Epsilon 0.2988
Episode: 1002
Episode: 1003
Episode: 1004
Episode: 1005
SOLVED! Episode 1005 Steps: 2 Epsilon 0.2988
Episode: 1006
Episode: 1007
Episode: 1008
Episode: 1009
Episode: 1010
SOLVED! Episode 1010 Steps: 2 Epsilon 0.2988
Episode: 1011
Episode: 1012
SOLVED! Episode 1012 Steps: 4 Epsilon 0.2988
Episode: 1013
SOLVED! Episode 1013 Steps: 3 Epsilon 0.2988
Episode: 1014
SOLVED! Episode 1014 Steps: 1 Epsilon 0.2988
Episode: 1015
Episode: 1016
SOLVED! Episode 1016 Steps: 3 Epsilon 0.2988
Episode: 1017
SOLVED! Episode 1017 Steps: 2 Epsilon 0.2988
Episode: 1018
Episode: 1019
SOLVED! Episode 1019 Steps: 16 Epsilon 0.2988
Episode: 1020
Episode: 1021
Episode: 1022
Episode: 1023
Episode: 1024
SOLVED! Episode 1024 Ste

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 56
defaultdict(<class 'int'>, {3: 23, 1: 21, 2: 11, 7: 1})
******************************
Episode: 1100
Episode: 1101
SOLVED! Episode 1101 Steps: 1 Epsilon 0.2988
Episode: 1102
Episode: 1103
SOLVED! Episode 1103 Steps: 2 Epsilon 0.2988
Episode: 1104
Episode: 1105
SOLVED! Episode 1105 Steps: 3 Epsilon 0.2988
Episode: 1106
SOLVED! Episode 1106 Steps: 2 Epsilon 0.2988
Episode: 1107
Episode: 1108
SOLVED! Episode 1108 Steps: 3 Epsilon 0.2988
Episode: 1109
Episode: 1110
Episode: 1111
Episode: 1112
SOLVED! Episode 1112 Steps: 3 Epsilon 0.2988
Episode: 1113
Episode: 1114
Episode: 1115
Episode: 1116
SOLVED! Episode 1116 Steps: 1 Epsilon 0.2988
Episode: 1117
Episode: 1118
SOLVED! Episode 1118 Steps: 1 Epsilon 0.2988
Episode: 1119
SOLVED! Episode 1119 Steps: 3 Epsilon 0.2988
Episode: 1120
SOLVED! Episode 1120 Steps: 1 Epsilon 0.2988
Episode: 1121
SOLVED! Episode 1121 Steps: 1 Epsilon 0.2988
Episode: 1122
Episode:

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 55
defaultdict(<class 'int'>, {3: 23, 1: 20, 2: 10, 5: 2})
******************************
Episode: 1200
Episode: 1201
Episode: 1202
Episode: 1203
Episode: 1204
Episode: 1205
SOLVED! Episode 1205 Steps: 1 Epsilon 0.2988
Episode: 1206
SOLVED! Episode 1206 Steps: 3 Epsilon 0.2988
Episode: 1207
SOLVED! Episode 1207 Steps: 5 Epsilon 0.2988
Episode: 1208
Episode: 1209
Episode: 1210
SOLVED! Episode 1210 Steps: 3 Epsilon 0.2988
Episode: 1211
SOLVED! Episode 1211 Steps: 3 Epsilon 0.2988
Episode: 1212
SOLVED! Episode 1212 Steps: 1 Epsilon 0.2988
Episode: 1213
Episode: 1214
Episode: 1215
Episode: 1216
Episode: 1217
SOLVED! Episode 1217 Steps: 3 Epsilon 0.2988
Episode: 1218
SOLVED! Episode 1218 Steps: 1 Epsilon 0.2988
Episode: 1219
SOLVED! Episode 1219 Steps: 3 Epsilon 0.2988
Episode: 1220
Episode: 1221
SOLVED! Episode 1221 Steps: 2 Epsilon 0.2988
Episode: 1222
SOLVED! Episode 1222 Steps: 3 Epsilon 0.2988
Episode:

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 54
defaultdict(<class 'int'>, {3: 24, 1: 20, 2: 10})
******************************
Episode: 1300
Episode: 1301
Episode: 1302
Episode: 1303
Episode: 1304
SOLVED! Episode 1304 Steps: 1 Epsilon 0.2988
Episode: 1305
Episode: 1306
SOLVED! Episode 1306 Steps: 3 Epsilon 0.2988
Episode: 1307
SOLVED! Episode 1307 Steps: 16 Epsilon 0.2988
Episode: 1308
Episode: 1309
Episode: 1310
SOLVED! Episode 1310 Steps: 3 Epsilon 0.2988
Episode: 1311
Episode: 1312
Episode: 1313
SOLVED! Episode 1313 Steps: 1 Epsilon 0.2988
Episode: 1314
Episode: 1315
Episode: 1316
Episode: 1317
Episode: 1318
SOLVED! Episode 1318 Steps: 3 Epsilon 0.2988
Episode: 1319
SOLVED! Episode 1319 Steps: 2 Epsilon 0.2988
Episode: 1320
Episode: 1321
Episode: 1322
SOLVED! Episode 1322 Steps: 3 Epsilon 0.2988
Episode: 1323
Episode: 1324
Episode: 1325
SOLVED! Episode 1325 Steps: 2 Epsilon 0.2988
Episode: 1326
Episode: 1327
SOLVED! Episode 1327 Steps: 3 Eps

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 57
defaultdict(<class 'int'>, {3: 25, 1: 20, 2: 10, 5: 2})
******************************
Episode: 1400
SOLVED! Episode 1400 Steps: 1 Epsilon 0.2988
Episode: 1401
SOLVED! Episode 1401 Steps: 1 Epsilon 0.2988
Episode: 1402
SOLVED! Episode 1402 Steps: 3 Epsilon 0.2988
Episode: 1403
SOLVED! Episode 1403 Steps: 2 Epsilon 0.2988
Episode: 1404
Episode: 1405
SOLVED! Episode 1405 Steps: 3 Epsilon 0.2988
Episode: 1406
Episode: 1407
Episode: 1408
Episode: 1409
Episode: 1410
Episode: 1411
Episode: 1412
SOLVED! Episode 1412 Steps: 1 Epsilon 0.2988
Episode: 1413
Episode: 1414
SOLVED! Episode 1414 Steps: 2 Epsilon 0.2988
Episode: 1415
SOLVED! Episode 1415 Steps: 2 Epsilon 0.2988
Episode: 1416
Episode: 1417
SOLVED! Episode 1417 Steps: 2 Epsilon 0.2988
Episode: 1418
Episode: 1419
SOLVED! Episode 1419 Steps: 2 Epsilon 0.2988
Episode: 1420
SOLVED! Episode 1420 Steps: 5 Epsilon 0.2988
Episode: 1421
SOLVED! Episode 1421 S

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 59
defaultdict(<class 'int'>, {3: 25, 1: 20, 6: 2, 2: 10, 5: 2})
******************************
Episode: 1500
SOLVED! Episode 1500 Steps: 3 Epsilon 0.2988
Episode: 1501
Episode: 1502
Episode: 1503
SOLVED! Episode 1503 Steps: 1 Epsilon 0.2988
Episode: 1504
SOLVED! Episode 1504 Steps: 2 Epsilon 0.2988
Episode: 1505
SOLVED! Episode 1505 Steps: 3 Epsilon 0.2988
Episode: 1506
SOLVED! Episode 1506 Steps: 11 Epsilon 0.2988
Episode: 1507
Episode: 1508
SOLVED! Episode 1508 Steps: 3 Epsilon 0.2988
Episode: 1509
Episode: 1510
SOLVED! Episode 1510 Steps: 1 Epsilon 0.2988
Episode: 1511
Episode: 1512
Episode: 1513
SOLVED! Episode 1513 Steps: 2 Epsilon 0.2988
Episode: 1514
SOLVED! Episode 1514 Steps: 1 Epsilon 0.2988
Episode: 1515
Episode: 1516
Episode: 1517
SOLVED! Episode 1517 Steps: 3 Epsilon 0.2988
Episode: 1518
SOLVED! Episode 1518 Steps: 1 Epsilon 0.2988
Episode: 1519
SOLVED! Episode 1519 Steps: 2 Epsilon 0.298

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 61
defaultdict(<class 'int'>, {3: 25, 1: 20, 6: 4, 2: 10, 5: 2})
******************************
Episode: 1600
SOLVED! Episode 1600 Steps: 2 Epsilon 0.2988
Episode: 1601
SOLVED! Episode 1601 Steps: 1 Epsilon 0.2988
Episode: 1602
SOLVED! Episode 1602 Steps: 3 Epsilon 0.2988
Episode: 1603
SOLVED! Episode 1603 Steps: 3 Epsilon 0.2988
Episode: 1604
Episode: 1605
SOLVED! Episode 1605 Steps: 1 Epsilon 0.2988
Episode: 1606
Episode: 1607
SOLVED! Episode 1607 Steps: 3 Epsilon 0.2988
Episode: 1608
Episode: 1609
SOLVED! Episode 1609 Steps: 3 Epsilon 0.2988
Episode: 1610
Episode: 1611
SOLVED! Episode 1611 Steps: 2 Epsilon 0.2988
Episode: 1612
SOLVED! Episode 1612 Steps: 1 Epsilon 0.2988
Episode: 1613
Episode: 1614
SOLVED! Episode 1614 Steps: 3 Epsilon 0.2988
Episode: 1615
SOLVED! Episode 1615 Steps: 2 Epsilon 0.2988
Episode: 1616
Episode: 1617
Episode: 1618
SOLVED! Episode 1618 Steps: 3 Epsilon 0.2988
Episode: 1619

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 60
defaultdict(<class 'int'>, {3: 25, 1: 20, 6: 2, 2: 10, 5: 2, 7: 1})
******************************
Episode: 1700
SOLVED! Episode 1700 Steps: 2 Epsilon 0.2988
Episode: 1701
Episode: 1702
Episode: 1703
Episode: 1704
SOLVED! Episode 1704 Steps: 1 Epsilon 0.2988
Episode: 1705
Episode: 1706
SOLVED! Episode 1706 Steps: 1 Epsilon 0.2988
Episode: 1707
SOLVED! Episode 1707 Steps: 1 Epsilon 0.2988
Episode: 1708
Episode: 1709
SOLVED! Episode 1709 Steps: 3 Epsilon 0.2988
Episode: 1710
Episode: 1711
SOLVED! Episode 1711 Steps: 2 Epsilon 0.2988
Episode: 1712
Episode: 1713
Episode: 1714
SOLVED! Episode 1714 Steps: 2 Epsilon 0.2988
Episode: 1715
SOLVED! Episode 1715 Steps: 3 Epsilon 0.2988
Episode: 1716
SOLVED! Episode 1716 Steps: 3 Epsilon 0.2988
Episode: 1717
Episode: 1718
SOLVED! Episode 1718 Steps: 2 Epsilon 0.2988
Episode: 1719
Episode: 1720
SOLVED! Episode 1720 Steps: 2 Epsilon 0.2988
Episode: 1721
Episode: 1

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 58
defaultdict(<class 'int'>, {3: 25, 1: 20, 6: 2, 2: 9, 5: 2})
******************************
Episode: 1800
SOLVED! Episode 1800 Steps: 1 Epsilon 0.2988
Episode: 1801
SOLVED! Episode 1801 Steps: 6 Epsilon 0.2988
Episode: 1802
Episode: 1803
Episode: 1804
Episode: 1805
Episode: 1806
Episode: 1807
SOLVED! Episode 1807 Steps: 2 Epsilon 0.2988
Episode: 1808
SOLVED! Episode 1808 Steps: 3 Epsilon 0.2988
Episode: 1809
Episode: 1810
Episode: 1811
Episode: 1812
SOLVED! Episode 1812 Steps: 2 Epsilon 0.2988
Episode: 1813
SOLVED! Episode 1813 Steps: 3 Epsilon 0.2988
Episode: 1814
SOLVED! Episode 1814 Steps: 3 Epsilon 0.2988
Episode: 1815
Episode: 1816
SOLVED! Episode 1816 Steps: 1 Epsilon 0.2988
Episode: 1817
SOLVED! Episode 1817 Steps: 2 Epsilon 0.2988
Episode: 1818
Episode: 1819
SOLVED! Episode 1819 Steps: 2 Epsilon 0.2988
Episode: 1820
SOLVED! Episode 1820 Steps: 3 Epsilon 0.2988
Episode: 1821
SOLVED! Episode 1

KeyboardInterrupt: 