# Final Project - Reinforcements Learning 
Hello dear students,<br> this is the template notebook. Please click on the "File" tab and then on "Save a copy into drive".

---
<br>

### Name and ID:
Student 1: Avraham Raviv, 204355390
<br>
Student 2: Yevgeni Berkovitch, 317079234
<br><br>
<img src="https://play-lh.googleusercontent.com/e_oKlKPISbgdzut1H9opevS7-LTB8-8lsmpCdMkhlnqFenZhpjxbLmx7l158-xQQCIY">

### https://github.com/mpSchrader/gym-sokoban

# Installs

In [1]:
%%capture
!sudo apt-get update
!sudo apt-get install -y xvfb ffmpeg freeglut3-dev
!pip install 'imageio==2.4.0'
!pip install gym
!pip install pygame
!apt-get install python-opengl -y
!apt install xvfb -y
!pip install pyvirtualdisplay
!pip install piglet
!pip install gym
!apt-get install python-opengl -y
!apt install xvfb -y
!pip install gym_sokoban

!imageio_download_bin ffmpeg

# Imports

In [2]:
import random
import time

import numpy as np
import scipy as scp
import matplotlib.pyplot as plt

import base64
import imageio
from pyvirtualdisplay import Display
from IPython.display import HTML

import gym
from gym import error, spaces, utils
from soko_pap import *

from collections import deque
from queue import PriorityQueue

from keras.models import Sequential
from keras.layers import Dense

from tqdm.notebook import tqdm
from collections import defaultdict

In [3]:
%matplotlib inline

In [4]:
imageio.plugins.ffmpeg.download()

In [5]:
from gym import logger as gymlogger
gymlogger.set_level(40) # error only

# Display utils
The cell below contains the video display configuration. No need to make changes here.

In [6]:
def embed_mp4(filename):
    """Embeds an mp4 file in the notebook."""
    video = open(filename,'rb').read()
    b64 = base64.b64encode(video)
    tag = '''
    <video width="640" height="480" controls>
    <source src="data:video/mp4;base64,{0}" type="video/mp4">
    Your browser does not support the video tag.
    </video>'''.format(b64.decode())

    return HTML(tag)

# Utils

In [7]:
def get_distances(room_state):
    for i in range(room_state.shape[0]):
        for j in range(room_state.shape[1]):
            if room_state[i][j] == 2:
                target = (i, j)

    distances = np.zeros(shape=room_state.shape)
    visited_cells = set()
    cell_queue = deque()

    visited_cells.add(target)
    cell_queue.appendleft(target)

    while len(cell_queue) != 0:
        cell = cell_queue.pop()
        distance = distances[cell[0]][cell[1]]
        for x,y in ((1,0), (-1,-0), (0,1), (0,-1)):
            next_cell_x, next_cell_y = cell[0]+x, cell[1]+y
            if room_state[next_cell_x][next_cell_y] != 0 and not (next_cell_x, next_cell_y) in visited_cells:
                distances[next_cell_x][next_cell_y] = distance + 1
                visited_cells.add((next_cell_x, next_cell_y))
                cell_queue.appendleft((next_cell_x, next_cell_y))
                
    return distances   

def calc_distances(room_state, distances):
    box = None
    mover = None
    for i in range(room_state.shape[0]):
        for j in range(room_state.shape[1]):            
            if room_state[i][j] == 4:
                box = (i,j)
            
            if room_state[i][j] == 5:
                mover = (i,j)
    
    return mover, box, distances[box[0]][box[1]]   

def box2target_change_reward(room_state, next_room_state, distances):
    if np.array_equal(room_state, next_room_state):
        return -1.0
    
    mover, box, t2b = calc_distances(room_state, distances)
    n_mover, n_box, n_t2b = calc_distances(next_room_state, distances)
    
    change_reward = 0.0
    if n_t2b < t2b:
        change_reward += 5.0
    elif n_t2b > t2b:
        change_reward -= 5.0
        
    m2b = np.sqrt((mover[0]-box[0])**2 + (mover[1]-box[1])**2)
    n_m2b = np.sqrt((n_mover[0]-n_box[0])**2 + (n_mover[1]-n_box[1])**2)
    
    if n_m2b < m2b and m2b >= 2:
        change_reward += 1.0
    elif n_m2b > m2b and n_m2b >= 2:
        change_reward -= 1.0
        
    return change_reward   

# Solution

In [13]:
class SOK_Agent:
    def __init__(self):
        # Construct DQN models
        self.state_size = (25,) 
        self.action_size = 8
        self.model = self._build_model()
        self.target_model = self._build_model()
        self.target_model.set_weights(self.model.get_weights())
        self.batch_size = 8
        
        # Replay buffers
        self.replay_buffer = deque(maxlen=5000)
        self.prioritized_replay_buffer = deque(maxlen=500)
        self.prioritized_replay_batch = 50        
        
        # Hyperparameters
        self.gamma = 0.9
        self.epsilon = 1.0   
        self.epsilon_min = 0.3
        self.epsilon_decay = 0.995
        self.replay_rate = 10
        self.update_beta = 0.999

        self.verbosity = 100 

    def _build_model(self):
        model = Sequential()
        model.add(Dense(512, input_shape=self.state_size, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer="adam")        
        return model

    def remember(self, state, action, reward, next_state, done):
        self.replay_buffer.append([state, action, reward, next_state, done])    
        
    def copy_to_prioritized_buffer(self, n):
        for i in range(n):
            self.prioritized_replay_buffer.append(self.replay_buffer[-1-i])  

    def act(self, state, stochastic=False):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        
        act_values = self.model.predict(state, verbose=0)[0]
        
        if stochastic:
            act_probs = np.exp(act_values)/np.exp(act_values).sum()
            return np.random.choice(np.arange(self.action_size), size=1, p=act_probs)[0]
              
        return np.argmax(act_values) 

    def replay(self): 
        if len(self.replay_buffer) < self.batch_size:
            return
        
        if len(self.prioritized_replay_buffer) < self.batch_size//2:
            minibatch = random.sample(self.replay_buffer, self.batch_size) 
        else:    
            minibatch = random.sample(self.replay_buffer, self.batch_size//2) 
            minibatch.extend(random.sample(self.prioritized_replay_buffer, self.batch_size//2))
        
        states = np.zeros((self.batch_size, self.state_size[0]))
        actions = np.zeros(self.batch_size, dtype=int)
        rewards = np.zeros(self.batch_size)
        next_states = np.zeros((self.batch_size, self.state_size[0]))
        statuses = np.zeros(self.batch_size)
        targets = np.zeros((self.batch_size, self.action_size)) 
        
        for i, (state, action, reward, next_state, done) in enumerate(minibatch): 
            states[i] = state.copy()
            actions[i] = action
            rewards[i] = reward
            next_states[i] = next_state.copy()
            statuses[i] = 1 if done else 0    
        
        targets = self.model.predict(states) 
        max_actions = np.argmax(self.model.predict(next_states), axis=1)
        next_rewards = self.target_model.predict(next_states)
        
        ind = 0
        for action, reward, next_reward, max_action, done in zip(actions, rewards, next_rewards, max_actions, statuses):  
            if not done:
                reward += self.gamma * next_reward[max_action]
            targets[ind][action] = reward
            ind += 1
        
        self.model.fit(states, targets, epochs=10, verbose=0) 
        
        self.update_target_model()        
    
        if self.epsilon > self.epsilon_min:
            self.epsilon = self.epsilon * self.epsilon_decay
        
    def update_target_model(self):
        model_w = self.model.get_weights()
        target_model_w = self.target_model.get_weights()
        updated_target_model_w = []
        for i in range(len(model_w)):
            updated_target_model_w.append(self.update_beta*target_model_w[i] + (1-self.update_beta)*model_w[i])
        self.target_model.set_weights(updated_target_model_w)    
            
    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)

In [14]:
def process_frame(frame):
    f = frame[16:96, 16:96, 0]   
    f = f.reshape(5, 16, 5, 16).max(axis=(1, 3))
    f = f.flatten()
    f = f / 255
    return np.expand_dims(f, axis=0)

## Training

#### Test Suite

In [15]:
def test_agent(stochastic=False):
    current_epsilon = agent.epsilon
    agent.epsilon = 0.0
    num_solved = 0
    solved_in_steps = defaultdict(int)

    for t in tqdm(range(100)):    
        random.seed(t)
        sok = PushAndPullSokobanEnv(dim_room=(7, 7), num_boxes=1)
        sok.set_maxsteps(20)
        steps = 0

        state = sok.get_image('rgb_array')
        done = False
        while not done:
            steps += 1
            action = agent.act(process_frame(state), stochastic)
            if action < 4:
                action += 1
            else:
                action += 5
            state, reward, done, info = sok.step(action)

        if 3 in sok.room_state:            
            num_solved += 1
            solved_in_steps[steps] += 1
    
    agent.epsilon = current_epsilon
    print("*" * 30)
    print("Stochastic" if stochastic else "Deterministic")
    print("*" * 30)
    print("Solved: %d" % num_solved)
    print("=" * 30)
    print(solved_in_steps)
    print("*" * 30)

In [16]:
max_episodes = 5000
max_steps = 100

def init_sok(r):
    random.seed(r+100)
    sok = PushAndPullSokobanEnv(dim_room=(7, 7), num_boxes=1)
    sok.set_maxsteps(max_steps)
    return sok

In [17]:
agent = SOK_Agent()

steps_per_episode = []

for e in range(max_episodes):
    if (e+1) % 100 == 0:
        test_agent(stochastic=False)
        test_agent(stochastic=True)
        
    print("Episode: %d" % (e))
    
    sok = init_sok(e)
    random.seed(e)
    
    state = process_frame(sok.get_image('rgb_array'))
    room_state = sok.room_state.copy() 
    distances = get_distances(room_state)
    
    for step in range(sok.max_steps):
        action = agent.act(state)
        if action < 4:
            next_state, reward, done, _ = sok.step(action+1) 
        else:
            next_state, reward, done, _ = sok.step(action+5)         
        
        next_state = process_frame(next_state)        
        next_room_state = sok.room_state
        
        if not done:
            reward += box2target_change_reward(room_state, next_room_state, distances)
        
        agent.remember(state, action, reward, next_state, done)
        
        state = next_state.copy() 
        room_state = next_room_state.copy()                
        
        if (step+1) % agent.replay_rate == 0:
            agent.replay()            
        
        if done:   
            steps_per_episode.append(step+1)
            
            if 3 in sok.room_state:                
                print("SOLVED! Episode %d Steps: %d Epsilon %.4f" % (e, step+1, agent.epsilon)) 
                agent.copy_to_prioritized_buffer(min(agent.prioritized_replay_batch, step+1))
            
            #agent.save("exp1_episode%d.h5" % (e))            
            break

Episode: 0
Episode: 1
Episode: 2
Episode: 3
Episode: 4
Episode: 5
SOLVED! Episode 5 Steps: 47 Epsilon 0.7629
Episode: 6
Episode: 7
SOLVED! Episode 7 Steps: 9 Epsilon 0.7256
Episode: 8
Episode: 9
Episode: 10
SOLVED! Episode 10 Steps: 63 Epsilon 0.6369
Episode: 11
Episode: 12
Episode: 13
SOLVED! Episode 13 Steps: 5 Epsilon 0.5762
Episode: 14
Episode: 15
Episode: 16
Episode: 17
Episode: 18
SOLVED! Episode 18 Steps: 28 Epsilon 0.4668
Episode: 19
Episode: 20
Episode: 21
Episode: 22
Episode: 23
Episode: 24
SOLVED! Episode 24 Steps: 51 Epsilon 0.3543
Episode: 25
Episode: 26
SOLVED! Episode 26 Steps: 1 Epsilon 0.3370
Episode: 27
Episode: 28
SOLVED! Episode 28 Steps: 3 Epsilon 0.3205
Episode: 29
SOLVED! Episode 29 Steps: 1 Epsilon 0.3205
Episode: 30
Episode: 31
SOLVED! Episode 31 Steps: 73 Epsilon 0.2988
Episode: 32
Episode: 33
Episode: 34
Episode: 35
Episode: 36
Episode: 37
Episode: 38
Episode: 39
Episode: 40
Episode: 41
Episode: 42
SOLVED! Episode 42 Steps: 95 Epsilon 0.2988
Episode: 43
Episo

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 7
defaultdict(<class 'int'>, {1: 5, 3: 2})
******************************


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Stochastic
******************************
Solved: 17
defaultdict(<class 'int'>, {3: 2, 7: 5, 6: 2, 4: 2, 2: 1, 1: 1, 13: 2, 11: 1, 12: 1})
******************************
Episode: 99
SOLVED! Episode 99 Steps: 3 Epsilon 0.2988
Episode: 100
Episode: 101
Episode: 102
SOLVED! Episode 102 Steps: 42 Epsilon 0.2988
Episode: 103
Episode: 104
Episode: 105
Episode: 106
Episode: 107
Episode: 108
Episode: 109
Episode: 110
Episode: 111
Episode: 112
Episode: 113
Episode: 114
SOLVED! Episode 114 Steps: 1 Epsilon 0.2988
Episode: 115
Episode: 116
SOLVED! Episode 116 Steps: 3 Epsilon 0.2988
Episode: 117
Episode: 118
SOLVED! Episode 118 Steps: 1 Epsilon 0.2988
Episode: 119
Episode: 120
SOLVED! Episode 120 Steps: 3 Epsilon 0.2988
Episode: 121
Episode: 122
Episode: 123
Episode: 124
SOLVED! Episode 124 Steps: 3 Epsilon 0.2988
Episode: 125
SOLVED! Episode 125 Steps: 32 Epsilon 0.2988
Episode: 126
Episode: 127
SOLVED! Episode 127 Steps: 2 Epsilon 0.2988
Episode: 128
Episode: 129

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 30
defaultdict(<class 'int'>, {3: 12, 1: 10, 2: 8})
******************************


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Stochastic
******************************
Solved: 34
defaultdict(<class 'int'>, {4: 1, 3: 4, 2: 8, 1: 9, 10: 1, 15: 2, 7: 1, 8: 1, 5: 5, 9: 2})
******************************
Episode: 199
Episode: 200
Episode: 201
Episode: 202
Episode: 203
Episode: 204
Episode: 205
Episode: 206
Episode: 207
Episode: 208
Episode: 209
Episode: 210
Episode: 211
Episode: 212
SOLVED! Episode 212 Steps: 33 Epsilon 0.2988
Episode: 213
SOLVED! Episode 213 Steps: 14 Epsilon 0.2988
Episode: 214
SOLVED! Episode 214 Steps: 3 Epsilon 0.2988
Episode: 215
Episode: 216
SOLVED! Episode 216 Steps: 3 Epsilon 0.2988
Episode: 217
SOLVED! Episode 217 Steps: 17 Epsilon 0.2988
Episode: 218
Episode: 219
SOLVED! Episode 219 Steps: 79 Epsilon 0.2988
Episode: 220
SOLVED! Episode 220 Steps: 6 Epsilon 0.2988
Episode: 221
Episode: 222
SOLVED! Episode 222 Steps: 88 Epsilon 0.2988
Episode: 223
SOLVED! Episode 223 Steps: 1 Epsilon 0.2988
Episode: 224
Episode: 225
Episode: 226
Episode: 227
Episode: 228
Ep

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 34
defaultdict(<class 'int'>, {3: 16, 1: 9, 2: 9})
******************************


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Stochastic
******************************
Solved: 38
defaultdict(<class 'int'>, {3: 9, 4: 6, 1: 6, 12: 1, 2: 8, 7: 1, 5: 3, 8: 1, 16: 1, 20: 1, 11: 1})
******************************
Episode: 299
SOLVED! Episode 299 Steps: 3 Epsilon 0.2988
Episode: 300
SOLVED! Episode 300 Steps: 1 Epsilon 0.2988
Episode: 301
Episode: 302
SOLVED! Episode 302 Steps: 2 Epsilon 0.2988
Episode: 303
Episode: 304
Episode: 305
SOLVED! Episode 305 Steps: 2 Epsilon 0.2988
Episode: 306
Episode: 307
SOLVED! Episode 307 Steps: 32 Epsilon 0.2988
Episode: 308
Episode: 309
Episode: 310
Episode: 311
Episode: 312
Episode: 313
SOLVED! Episode 313 Steps: 92 Epsilon 0.2988
Episode: 314
Episode: 315
Episode: 316
Episode: 317
Episode: 318
SOLVED! Episode 318 Steps: 2 Epsilon 0.2988
Episode: 319
Episode: 320
SOLVED! Episode 320 Steps: 43 Epsilon 0.2988
Episode: 321
Episode: 322
SOLVED! Episode 322 Steps: 14 Epsilon 0.2988
Episode: 323
Episode: 324
SOLVED! Episode 324 Steps: 1 Epsilon 0.2988
Epi

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 34
defaultdict(<class 'int'>, {3: 16, 1: 10, 2: 8})
******************************


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Stochastic
******************************
Solved: 36
defaultdict(<class 'int'>, {3: 15, 5: 2, 1: 11, 2: 5, 4: 1, 10: 1, 9: 1})
******************************
Episode: 399
Episode: 400
SOLVED! Episode 400 Steps: 2 Epsilon 0.2988
Episode: 401
Episode: 402
Episode: 403
SOLVED! Episode 403 Steps: 3 Epsilon 0.2988
Episode: 404
Episode: 405
SOLVED! Episode 405 Steps: 1 Epsilon 0.2988
Episode: 406
SOLVED! Episode 406 Steps: 1 Epsilon 0.2988
Episode: 407
Episode: 408
Episode: 409
Episode: 410
Episode: 411
Episode: 412
Episode: 413
SOLVED! Episode 413 Steps: 3 Epsilon 0.2988
Episode: 414
Episode: 415
Episode: 416
SOLVED! Episode 416 Steps: 3 Epsilon 0.2988
Episode: 417
Episode: 418
SOLVED! Episode 418 Steps: 3 Epsilon 0.2988
Episode: 419
Episode: 420
Episode: 421
SOLVED! Episode 421 Steps: 44 Epsilon 0.2988
Episode: 422
Episode: 423
SOLVED! Episode 423 Steps: 7 Epsilon 0.2988
Episode: 424
Episode: 425
Episode: 426
Episode: 427
SOLVED! Episode 427 Steps: 3 Epsilon

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 30
defaultdict(<class 'int'>, {3: 17, 2: 6, 1: 7})
******************************


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Stochastic
******************************
Solved: 30
defaultdict(<class 'int'>, {3: 14, 1: 5, 4: 1, 2: 6, 7: 1, 12: 1, 9: 1, 16: 1})
******************************
Episode: 499
SOLVED! Episode 499 Steps: 2 Epsilon 0.2988
Episode: 500
SOLVED! Episode 500 Steps: 2 Epsilon 0.2988
Episode: 501
Episode: 502
Episode: 503
SOLVED! Episode 503 Steps: 3 Epsilon 0.2988
Episode: 504
Episode: 505
Episode: 506
Episode: 507
Episode: 508
Episode: 509
SOLVED! Episode 509 Steps: 2 Epsilon 0.2988
Episode: 510
SOLVED! Episode 510 Steps: 32 Epsilon 0.2988
Episode: 511
Episode: 512
SOLVED! Episode 512 Steps: 1 Epsilon 0.2988
Episode: 513
Episode: 514
Episode: 515
Episode: 516
Episode: 517
SOLVED! Episode 517 Steps: 1 Epsilon 0.2988
Episode: 518
Episode: 519
SOLVED! Episode 519 Steps: 2 Epsilon 0.2988
Episode: 520
SOLVED! Episode 520 Steps: 1 Epsilon 0.2988
Episode: 521
Episode: 522
SOLVED! Episode 522 Steps: 3 Epsilon 0.2988
Episode: 523
SOLVED! Episode 523 Steps: 3 Epsilon 0

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 29
defaultdict(<class 'int'>, {3: 14, 1: 10, 2: 5})
******************************


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Stochastic
******************************
Solved: 30
defaultdict(<class 'int'>, {3: 16, 1: 4, 2: 7, 7: 2, 9: 1})
******************************
Episode: 599
SOLVED! Episode 599 Steps: 3 Epsilon 0.2988
Episode: 600
SOLVED! Episode 600 Steps: 57 Epsilon 0.2988
Episode: 601
Episode: 602
SOLVED! Episode 602 Steps: 1 Epsilon 0.2988
Episode: 603
Episode: 604
Episode: 605
Episode: 606
Episode: 607
Episode: 608
SOLVED! Episode 608 Steps: 1 Epsilon 0.2988
Episode: 609
Episode: 610
Episode: 611
SOLVED! Episode 611 Steps: 15 Epsilon 0.2988
Episode: 612
Episode: 613
SOLVED! Episode 613 Steps: 1 Epsilon 0.2988
Episode: 614
Episode: 615
SOLVED! Episode 615 Steps: 2 Epsilon 0.2988
Episode: 616
Episode: 617
Episode: 618
Episode: 619
Episode: 620
SOLVED! Episode 620 Steps: 3 Epsilon 0.2988
Episode: 621
Episode: 622
Episode: 623
Episode: 624
Episode: 625
SOLVED! Episode 625 Steps: 2 Epsilon 0.2988
Episode: 626
Episode: 627
Episode: 628
SOLVED! Episode 628 Steps: 3 Epsilon

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 34
defaultdict(<class 'int'>, {3: 16, 1: 11, 2: 7})
******************************


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Stochastic
******************************
Solved: 35
defaultdict(<class 'int'>, {3: 14, 1: 10, 2: 4, 7: 2, 19: 1, 4: 2, 15: 1, 10: 1})
******************************
Episode: 699
SOLVED! Episode 699 Steps: 3 Epsilon 0.2988
Episode: 700
Episode: 701
Episode: 702
SOLVED! Episode 702 Steps: 1 Epsilon 0.2988
Episode: 703
Episode: 704
Episode: 705
SOLVED! Episode 705 Steps: 35 Epsilon 0.2988
Episode: 706
Episode: 707
Episode: 708
Episode: 709
Episode: 710
SOLVED! Episode 710 Steps: 1 Epsilon 0.2988
Episode: 711
SOLVED! Episode 711 Steps: 3 Epsilon 0.2988
Episode: 712
Episode: 713
Episode: 714
Episode: 715
Episode: 716
Episode: 717
Episode: 718
SOLVED! Episode 718 Steps: 3 Epsilon 0.2988
Episode: 719
Episode: 720
Episode: 721
SOLVED! Episode 721 Steps: 35 Epsilon 0.2988
Episode: 722
Episode: 723
Episode: 724
Episode: 725
SOLVED! Episode 725 Steps: 3 Epsilon 0.2988
Episode: 726
SOLVED! Episode 726 Steps: 65 Epsilon 0.2988
Episode: 727
Episode: 728
SOLVED! Episo

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 25
defaultdict(<class 'int'>, {3: 13, 1: 10, 2: 2})
******************************


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Stochastic
******************************
Solved: 31
defaultdict(<class 'int'>, {3: 6, 4: 4, 1: 8, 11: 2, 2: 5, 7: 2, 5: 1, 10: 1, 15: 1, 12: 1})
******************************
Episode: 799
Episode: 800
Episode: 801
SOLVED! Episode 801 Steps: 59 Epsilon 0.2988
Episode: 802
Episode: 803
SOLVED! Episode 803 Steps: 23 Epsilon 0.2988
Episode: 804
Episode: 805
Episode: 806
SOLVED! Episode 806 Steps: 3 Epsilon 0.2988
Episode: 807
Episode: 808
SOLVED! Episode 808 Steps: 11 Epsilon 0.2988
Episode: 809
Episode: 810
SOLVED! Episode 810 Steps: 3 Epsilon 0.2988
Episode: 811
Episode: 812
Episode: 813
SOLVED! Episode 813 Steps: 42 Epsilon 0.2988
Episode: 814
Episode: 815
Episode: 816
Episode: 817
Episode: 818
SOLVED! Episode 818 Steps: 72 Epsilon 0.2988
Episode: 819
Episode: 820
Episode: 821
Episode: 822
Episode: 823
SOLVED! Episode 823 Steps: 66 Epsilon 0.2988
Episode: 824
Episode: 825
SOLVED! Episode 825 Steps: 42 Epsilon 0.2988
Episode: 826
SOLVED! Episode 826 Step

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 27
defaultdict(<class 'int'>, {3: 12, 1: 11, 2: 4})
******************************


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Stochastic
******************************
Solved: 30
defaultdict(<class 'int'>, {3: 10, 4: 3, 1: 5, 13: 1, 2: 5, 15: 1, 17: 1, 7: 1, 9: 2, 5: 1})
******************************
Episode: 899
Episode: 900
Episode: 901
Episode: 902
SOLVED! Episode 902 Steps: 3 Epsilon 0.2988
Episode: 903
SOLVED! Episode 903 Steps: 62 Epsilon 0.2988
Episode: 904
Episode: 905
Episode: 906
Episode: 907
Episode: 908
Episode: 909
Episode: 910
Episode: 911
SOLVED! Episode 911 Steps: 2 Epsilon 0.2988
Episode: 912
Episode: 913
Episode: 914
Episode: 915
Episode: 916
Episode: 917
Episode: 918
Episode: 919
Episode: 920
SOLVED! Episode 920 Steps: 2 Epsilon 0.2988
Episode: 921
SOLVED! Episode 921 Steps: 3 Epsilon 0.2988
Episode: 922
Episode: 923
Episode: 924
Episode: 925
Episode: 926
SOLVED! Episode 926 Steps: 1 Epsilon 0.2988
Episode: 927
Episode: 928
Episode: 929
Episode: 930
Episode: 931
Episode: 932
Episode: 933
Episode: 934
SOLVED! Episode 934 Steps: 2 Epsilon 0.2988
Episode: 935
E

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 25
defaultdict(<class 'int'>, {3: 12, 1: 7, 2: 6})
******************************


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Stochastic
******************************
Solved: 36
defaultdict(<class 'int'>, {3: 11, 4: 6, 2: 8, 1: 6, 9: 1, 5: 1, 18: 1, 12: 1, 6: 1})
******************************
Episode: 999
SOLVED! Episode 999 Steps: 3 Epsilon 0.2988
Episode: 1000
SOLVED! Episode 1000 Steps: 3 Epsilon 0.2988
Episode: 1001
SOLVED! Episode 1001 Steps: 3 Epsilon 0.2988
Episode: 1002
Episode: 1003
Episode: 1004
Episode: 1005
Episode: 1006
Episode: 1007
Episode: 1008
SOLVED! Episode 1008 Steps: 79 Epsilon 0.2988
Episode: 1009
Episode: 1010
SOLVED! Episode 1010 Steps: 2 Epsilon 0.2988
Episode: 1011
Episode: 1012
Episode: 1013
Episode: 1014
SOLVED! Episode 1014 Steps: 26 Epsilon 0.2988
Episode: 1015
Episode: 1016
SOLVED! Episode 1016 Steps: 3 Epsilon 0.2988
Episode: 1017
SOLVED! Episode 1017 Steps: 2 Epsilon 0.2988
Episode: 1018
Episode: 1019
Episode: 1020
Episode: 1021
Episode: 1022
Episode: 1023
Episode: 1024
Episode: 1025
SOLVED! Episode 1025 Steps: 30 Epsilon 0.2988
Episode: 1026


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 26
defaultdict(<class 'int'>, {3: 12, 2: 7, 1: 7})
******************************


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Stochastic
******************************
Solved: 34
defaultdict(<class 'int'>, {3: 11, 4: 4, 2: 6, 12: 2, 7: 1, 1: 4, 20: 1, 14: 1, 6: 1, 5: 1, 15: 1, 8: 1})
******************************
Episode: 1099
Episode: 1100
Episode: 1101
SOLVED! Episode 1101 Steps: 1 Epsilon 0.2988
Episode: 1102
Episode: 1103
Episode: 1104
Episode: 1105
SOLVED! Episode 1105 Steps: 3 Epsilon 0.2988
Episode: 1106
Episode: 1107
Episode: 1108
SOLVED! Episode 1108 Steps: 3 Epsilon 0.2988
Episode: 1109
Episode: 1110
SOLVED! Episode 1110 Steps: 3 Epsilon 0.2988
Episode: 1111
Episode: 1112
SOLVED! Episode 1112 Steps: 3 Epsilon 0.2988
Episode: 1113
Episode: 1114
Episode: 1115
Episode: 1116
SOLVED! Episode 1116 Steps: 28 Epsilon 0.2988
Episode: 1117
Episode: 1118
SOLVED! Episode 1118 Steps: 14 Epsilon 0.2988
Episode: 1119
SOLVED! Episode 1119 Steps: 3 Epsilon 0.2988
Episode: 1120
SOLVED! Episode 1120 Steps: 1 Epsilon 0.2988
Episode: 1121
Episode: 1122
Episode: 1123
Episode: 1124
Episode

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 24
defaultdict(<class 'int'>, {3: 11, 2: 4, 1: 9})
******************************


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Stochastic
******************************
Solved: 32
defaultdict(<class 'int'>, {3: 10, 12: 1, 7: 4, 1: 5, 2: 5, 4: 2, 5: 1, 6: 1, 9: 3})
******************************
Episode: 1199
Episode: 1200
Episode: 1201
Episode: 1202
Episode: 1203
Episode: 1204
Episode: 1205
SOLVED! Episode 1205 Steps: 1 Epsilon 0.2988
Episode: 1206
Episode: 1207
Episode: 1208
Episode: 1209
Episode: 1210
SOLVED! Episode 1210 Steps: 28 Epsilon 0.2988
Episode: 1211
SOLVED! Episode 1211 Steps: 3 Epsilon 0.2988
Episode: 1212
SOLVED! Episode 1212 Steps: 1 Epsilon 0.2988
Episode: 1213
Episode: 1214
Episode: 1215
Episode: 1216
Episode: 1217
Episode: 1218
SOLVED! Episode 1218 Steps: 7 Epsilon 0.2988
Episode: 1219
SOLVED! Episode 1219 Steps: 13 Epsilon 0.2988
Episode: 1220
Episode: 1221
SOLVED! Episode 1221 Steps: 2 Epsilon 0.2988
Episode: 1222
SOLVED! Episode 1222 Steps: 36 Epsilon 0.2988
Episode: 1223
Episode: 1224
Episode: 1225
SOLVED! Episode 1225 Steps: 1 Epsilon 0.2988
Episode: 1226

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 30
defaultdict(<class 'int'>, {3: 17, 1: 8, 2: 5})
******************************


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Stochastic
******************************
Solved: 31
defaultdict(<class 'int'>, {3: 13, 1: 4, 5: 3, 7: 3, 2: 4, 9: 1, 10: 1, 4: 1, 12: 1})
******************************
Episode: 1299
SOLVED! Episode 1299 Steps: 93 Epsilon 0.2988
Episode: 1300
Episode: 1301
Episode: 1302
Episode: 1303
Episode: 1304
Episode: 1305
Episode: 1306
SOLVED! Episode 1306 Steps: 3 Epsilon 0.2988
Episode: 1307
Episode: 1308
Episode: 1309
Episode: 1310
SOLVED! Episode 1310 Steps: 3 Epsilon 0.2988
Episode: 1311
Episode: 1312
Episode: 1313
SOLVED! Episode 1313 Steps: 1 Epsilon 0.2988
Episode: 1314
Episode: 1315
Episode: 1316
Episode: 1317
Episode: 1318
SOLVED! Episode 1318 Steps: 3 Epsilon 0.2988
Episode: 1319
SOLVED! Episode 1319 Steps: 2 Epsilon 0.2988
Episode: 1320
Episode: 1321
Episode: 1322
Episode: 1323
Episode: 1324
Episode: 1325
SOLVED! Episode 1325 Steps: 2 Epsilon 0.2988
Episode: 1326
Episode: 1327
SOLVED! Episode 1327 Steps: 3 Epsilon 0.2988
Episode: 1328
Episode: 1329
Epi

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 23
defaultdict(<class 'int'>, {3: 13, 1: 8, 2: 2})
******************************


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Stochastic
******************************
Solved: 32
defaultdict(<class 'int'>, {3: 13, 1: 8, 7: 2, 2: 6, 12: 1, 5: 1, 4: 1})
******************************
Episode: 1399
SOLVED! Episode 1399 Steps: 28 Epsilon 0.2988
Episode: 1400
SOLVED! Episode 1400 Steps: 1 Epsilon 0.2988
Episode: 1401
SOLVED! Episode 1401 Steps: 8 Epsilon 0.2988
Episode: 1402
SOLVED! Episode 1402 Steps: 3 Epsilon 0.2988
Episode: 1403
SOLVED! Episode 1403 Steps: 2 Epsilon 0.2988
Episode: 1404
Episode: 1405
SOLVED! Episode 1405 Steps: 3 Epsilon 0.2988
Episode: 1406
Episode: 1407
Episode: 1408
Episode: 1409
Episode: 1410
SOLVED! Episode 1410 Steps: 93 Epsilon 0.2988
Episode: 1411
Episode: 1412
SOLVED! Episode 1412 Steps: 1 Epsilon 0.2988
Episode: 1413
Episode: 1414
Episode: 1415
Episode: 1416
Episode: 1417
SOLVED! Episode 1417 Steps: 2 Epsilon 0.2988
Episode: 1418
Episode: 1419
SOLVED! Episode 1419 Steps: 33 Epsilon 0.2988
Episode: 1420
Episode: 1421
SOLVED! Episode 1421 Steps: 2 Epsilo

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 29
defaultdict(<class 'int'>, {3: 13, 2: 6, 1: 10})
******************************


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Stochastic
******************************
Solved: 31
defaultdict(<class 'int'>, {3: 12, 1: 9, 4: 2, 2: 6, 9: 1, 7: 1})
******************************
Episode: 1499
Episode: 1500
SOLVED! Episode 1500 Steps: 3 Epsilon 0.2988
Episode: 1501
Episode: 1502
Episode: 1503
Episode: 1504
SOLVED! Episode 1504 Steps: 2 Epsilon 0.2988
Episode: 1505
SOLVED! Episode 1505 Steps: 3 Epsilon 0.2988
Episode: 1506
Episode: 1507
Episode: 1508
SOLVED! Episode 1508 Steps: 3 Epsilon 0.2988
Episode: 1509
Episode: 1510
SOLVED! Episode 1510 Steps: 30 Epsilon 0.2988
Episode: 1511
Episode: 1512
Episode: 1513
Episode: 1514
SOLVED! Episode 1514 Steps: 1 Epsilon 0.2988
Episode: 1515
Episode: 1516
Episode: 1517
SOLVED! Episode 1517 Steps: 3 Epsilon 0.2988
Episode: 1518
Episode: 1519
SOLVED! Episode 1519 Steps: 2 Epsilon 0.2988
Episode: 1520
SOLVED! Episode 1520 Steps: 3 Epsilon 0.2988
Episode: 1521
Episode: 1522
Episode: 1523
Episode: 1524
SOLVED! Episode 1524 Steps: 56 Epsilon 0.2988
Ep

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 31
defaultdict(<class 'int'>, {3: 16, 1: 9, 2: 6})
******************************


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Stochastic
******************************
Solved: 32
defaultdict(<class 'int'>, {4: 2, 6: 1, 1: 7, 3: 9, 2: 7, 9: 3, 5: 3})
******************************
Episode: 1599
Episode: 1600
SOLVED! Episode 1600 Steps: 2 Epsilon 0.2988
Episode: 1601
SOLVED! Episode 1601 Steps: 1 Epsilon 0.2988
Episode: 1602
SOLVED! Episode 1602 Steps: 3 Epsilon 0.2988
Episode: 1603
SOLVED! Episode 1603 Steps: 3 Epsilon 0.2988
Episode: 1604
Episode: 1605
SOLVED! Episode 1605 Steps: 29 Epsilon 0.2988
Episode: 1606
Episode: 1607
SOLVED! Episode 1607 Steps: 22 Epsilon 0.2988
Episode: 1608
Episode: 1609
SOLVED! Episode 1609 Steps: 3 Epsilon 0.2988
Episode: 1610
Episode: 1611
Episode: 1612
SOLVED! Episode 1612 Steps: 1 Epsilon 0.2988
Episode: 1613
Episode: 1614
SOLVED! Episode 1614 Steps: 3 Epsilon 0.2988
Episode: 1615
Episode: 1616
Episode: 1617
Episode: 1618
SOLVED! Episode 1618 Steps: 3 Epsilon 0.2988
Episode: 1619
SOLVED! Episode 1619 Steps: 1 Epsilon 0.2988
Episode: 1620
Episode:

KeyboardInterrupt: 