# Final Project - Reinforcements Learning 
Hello dear students,<br> this is the template notebook. Please click on the "File" tab and then on "Save a copy into drive".

---
<br>

### Name and ID:
Student 1: Avraham Raviv, 204355390
<br>
Student 2: Yevgeni Berkovitch, 317079234
<br><br>
<img src="https://play-lh.googleusercontent.com/e_oKlKPISbgdzut1H9opevS7-LTB8-8lsmpCdMkhlnqFenZhpjxbLmx7l158-xQQCIY">

### https://github.com/mpSchrader/gym-sokoban

# Installs

In [1]:
%%capture
!sudo apt-get update
!sudo apt-get install -y xvfb ffmpeg freeglut3-dev
!pip install 'imageio==2.4.0'
!pip install gym
!pip install pygame
!apt-get install python-opengl -y
!apt install xvfb -y
!pip install pyvirtualdisplay
!pip install piglet
!pip install gym
!apt-get install python-opengl -y
!apt install xvfb -y
!pip install gym_sokoban

!imageio_download_bin ffmpeg

# Imports

In [2]:
import random
import time

import numpy as np
import scipy as scp
import matplotlib.pyplot as plt

import base64
import imageio
from pyvirtualdisplay import Display
from IPython.display import HTML

import gym
from gym import error, spaces, utils
from soko_pap import *

from collections import deque
from queue import PriorityQueue

from keras.models import Sequential
from keras.layers import Conv2D, Dense, Flatten

from tqdm.notebook import tqdm
from collections import defaultdict

In [3]:
%matplotlib inline

In [4]:
imageio.plugins.ffmpeg.download()

In [5]:
from gym import logger as gymlogger
gymlogger.set_level(40) # error only

# Display utils
The cell below contains the video display configuration. No need to make changes here.

In [6]:
def embed_mp4(filename):
    """Embeds an mp4 file in the notebook."""
    video = open(filename,'rb').read()
    b64 = base64.b64encode(video)
    tag = '''
    <video width="640" height="480" controls>
    <source src="data:video/mp4;base64,{0}" type="video/mp4">
    Your browser does not support the video tag.
    </video>'''.format(b64.decode())

    return HTML(tag)

# Utils

In [7]:
def get_distances(room_state):
    for i in range(room_state.shape[0]):
        for j in range(room_state.shape[1]):
            if room_state[i][j] == 2:
                target = (i, j)

    distances = np.zeros(shape=room_state.shape)
    visited_cells = set()
    cell_queue = deque()

    visited_cells.add(target)
    cell_queue.appendleft(target)

    while len(cell_queue) != 0:
        cell = cell_queue.pop()
        distance = distances[cell[0]][cell[1]]
        for x,y in ((1,0), (-1,-0), (0,1), (0,-1)):
            next_cell_x, next_cell_y = cell[0]+x, cell[1]+y
            if room_state[next_cell_x][next_cell_y] != 0 and not (next_cell_x, next_cell_y) in visited_cells:
                distances[next_cell_x][next_cell_y] = distance + 1
                visited_cells.add((next_cell_x, next_cell_y))
                cell_queue.appendleft((next_cell_x, next_cell_y))
                
    return distances   

def calc_distances(room_state, distances):
    box = None
    mover = None
    for i in range(room_state.shape[0]):
        for j in range(room_state.shape[1]):            
            if room_state[i][j] == 4:
                box = (i,j)
            
            if room_state[i][j] == 5:
                mover = (i,j)
    
    return mover, box, distances[box[0]][box[1]]   

def box2target_change_reward(room_state, next_room_state, distances):
    if np.array_equal(room_state, next_room_state):
        return -1.0
    
    mover, box, t2b = calc_distances(room_state, distances)
    n_mover, n_box, n_t2b = calc_distances(next_room_state, distances)
    
    change_reward = 0.0
    if n_t2b < t2b:
        change_reward += 5.0
    elif n_t2b > t2b:
        change_reward -= 5.0
        
    m2b = np.sqrt((mover[0]-box[0])**2 + (mover[1]-box[1])**2)
    n_m2b = np.sqrt((n_mover[0]-n_box[0])**2 + (n_mover[1]-n_box[1])**2)
    
    if n_m2b < m2b and m2b >= 2:
        change_reward += 1.0
    elif n_m2b > m2b and n_m2b >= 2:
        change_reward -= 1.0
        
    return change_reward   

# Solution

In [8]:
class SOK_Agent:
    def __init__(self):
        # Construct DQN models
        self.state_size = (5, 5, 1)  
        self.action_size = 8
        self.model = self._build_model()
        self.target_model = self._build_model()
        self.target_model.set_weights(self.model.get_weights())
        self.batch_size = 8
        
        # Replay buffers
        self.replay_buffer = deque(maxlen=5000)
        self.prioritized_replay_buffer = deque(maxlen=500)
        self.prioritized_replay_batch = 50        
        
        # Hyperparameters
        self.gamma = 0.9
        self.epsilon = 1.0   
        self.epsilon_min = 0.3
        self.epsilon_decay = 0.995
        self.replay_rate = 10
        self.update_beta = 0.999

        self.verbosity = 100 

    def _build_model(self):
        model = Sequential()
        model.add(Conv2D(32, (3, 3), input_shape=self.state_size, activation='relu'))
        model.add(Conv2D(64, (3, 3), activation='relu'))
        model.add(Flatten())
        model.add(Dense(32, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer="adam")        
        return model

    def remember(self, state, action, reward, next_state, done):
        self.replay_buffer.append([state, action, reward, next_state, done])    
        
    def copy_to_prioritized_buffer(self, n):
        for i in range(n):
            self.prioritized_replay_buffer.append(self.replay_buffer[-1-i])  

    def act(self, state, stochastic=False):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        
        act_values = self.model.predict(state, verbose=0)[0]
        
        if stochastic:
            act_probs = np.exp(act_values)/np.exp(act_values).sum()
            return np.random.choice(np.arange(self.action_size), size=1, p=act_probs)[0]
              
        return np.argmax(act_values) 

    def replay(self): 
        if len(self.replay_buffer) < self.batch_size:
            return
        
        if len(self.prioritized_replay_buffer) < self.batch_size//2:
            minibatch = random.sample(self.replay_buffer, self.batch_size) 
        else:    
            minibatch = random.sample(self.replay_buffer, self.batch_size//2) 
            minibatch.extend(random.sample(self.prioritized_replay_buffer, self.batch_size//2))
        
        states = np.zeros((self.batch_size, self.state_size[0], self.state_size[1]))
        actions = np.zeros(self.batch_size, dtype=int)
        rewards = np.zeros(self.batch_size)
        next_states = np.zeros((self.batch_size, self.state_size[0], self.state_size[1]))
        statuses = np.zeros(self.batch_size)
        targets = np.zeros((self.batch_size, self.action_size)) 
        
        for i, (state, action, reward, next_state, done) in enumerate(minibatch): 
            states[i] = state.copy()
            actions[i] = action
            rewards[i] = reward
            next_states[i] = next_state.copy()
            statuses[i] = 1 if done else 0    
        
        targets = self.model.predict(states) 
        max_actions = np.argmax(self.model.predict(next_states), axis=1)
        next_rewards = self.target_model.predict(next_states)
        
        ind = 0
        for action, reward, next_reward, max_action, done in zip(actions, rewards, next_rewards, max_actions, statuses):  
            if not done:
                reward += self.gamma * next_reward[max_action]
            targets[ind][action] = reward
            ind += 1
        
        self.model.fit(states, targets, epochs=10, verbose=0) 
        
        self.update_target_model()        
    
        if self.epsilon > self.epsilon_min:
            self.epsilon = self.epsilon * self.epsilon_decay
        
    def update_target_model(self):
        model_w = self.model.get_weights()
        target_model_w = self.target_model.get_weights()
        updated_target_model_w = []
        for i in range(len(model_w)):
            updated_target_model_w.append(self.update_beta*target_model_w[i] + (1-self.update_beta)*model_w[i])
        self.target_model.set_weights(updated_target_model_w)    
            
    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)

In [9]:
def process_frame(frame):
    f = frame[16:96, 16:96, 0]   
    f = f.reshape(5, 16, 5, 16).max(axis=(1, 3))
    #f = f.flatten()
    f = f / 255
    return np.expand_dims(f, axis=0)

## Training

#### Test Suite

In [10]:
def test_agent(stochastic=False):
    current_epsilon = agent.epsilon
    agent.epsilon = 0.0
    num_solved = 0
    solved_in_steps = defaultdict(int)

    for t in tqdm(range(100)):    
        random.seed(t)
        sok = PushAndPullSokobanEnv(dim_room=(7, 7), num_boxes=1)
        sok.set_maxsteps(20)
        steps = 0

        state = sok.get_image('rgb_array')
        done = False
        while not done:
            steps += 1
            action = agent.act(process_frame(state), stochastic)
            if action < 4:
                action += 1
            else:
                action += 5
            state, reward, done, info = sok.step(action)

        if 3 in sok.room_state:            
            num_solved += 1
            solved_in_steps[steps] += 1
    
    agent.epsilon = current_epsilon
    print("*" * 30)
    print("Stochastic" if stochastic else "Deterministic")
    print("*" * 30)
    print("Solved: %d" % num_solved)
    print("=" * 30)
    print(solved_in_steps)
    print("*" * 30)

In [11]:
max_episodes = 5000
max_steps = 100

def init_sok(r):
    random.seed(r+100)
    sok = PushAndPullSokobanEnv(dim_room=(7, 7), num_boxes=1)
    sok.set_maxsteps(max_steps)
    return sok

In [None]:
agent = SOK_Agent()

steps_per_episode = []

for e in range(max_episodes):
    if e % 100 == 0:
        test_agent(stochastic=False)
        test_agent(stochastic=True)
        
    print("Episode: %d" % (e))
    
    sok = init_sok(e)
    random.seed(e)
    
    state = process_frame(sok.get_image('rgb_array'))
    room_state = sok.room_state.copy() 
    distances = get_distances(room_state)
    
    for step in range(sok.max_steps):
        action = agent.act(state)
        if action < 4:
            next_state, reward, done, _ = sok.step(action+1) 
        else:
            next_state, reward, done, _ = sok.step(action+5)         
        
        next_state = process_frame(next_state)        
        next_room_state = sok.room_state
        
        if not done:
            reward += box2target_change_reward(room_state, next_room_state, distances)
        
        agent.remember(state, action, reward, next_state, done)
        
        state = next_state.copy() 
        room_state = next_room_state.copy()                
        
        if (step+1) % agent.replay_rate == 0:
            agent.replay()            
        
        if done:   
            steps_per_episode.append(step+1)
            
            if 3 in sok.room_state:                
                print("SOLVED! Episode %d Steps: %d Epsilon %.4f" % (e, step+1, agent.epsilon)) 
                agent.copy_to_prioritized_buffer(min(agent.prioritized_replay_batch, step+1))
            
            #agent.save("exp1_episode%d.h5" % (e))            
            break

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 5
defaultdict(<class 'int'>, {2: 2, 1: 1, 3: 2})
******************************


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Stochastic
******************************
Solved: 12
defaultdict(<class 'int'>, {15: 1, 6: 4, 3: 3, 14: 2, 4: 1, 8: 1})
******************************
Episode: 0
Episode: 1
Episode: 2
Episode: 3
Episode: 4
SOLVED! Episode 4 Steps: 95 Epsilon 0.7822
Episode: 5
Episode: 6
Episode: 7
SOLVED! Episode 7 Steps: 9 Epsilon 0.7076
Episode: 8
Episode: 9
Episode: 10
Episode: 11
Episode: 12
Episode: 13
Episode: 14
SOLVED! Episode 14 Steps: 78 Epsilon 0.5058
Episode: 15
Episode: 16
Episode: 17
Episode: 18
Episode: 19
SOLVED! Episode 19 Steps: 3 Epsilon 0.4139
Episode: 20
Episode: 21
Episode: 22
SOLVED! Episode 22 Steps: 1 Epsilon 0.3744
Episode: 23
Episode: 24
SOLVED! Episode 24 Steps: 35 Epsilon 0.3508
Episode: 25
Episode: 26
Episode: 27
Episode: 28
SOLVED! Episode 28 Steps: 3 Epsilon 0.3018
Episode: 29
Episode: 30
Episode: 31
Episode: 32
Episode: 33
SOLVED! Episode 33 Steps: 100 Epsilon 0.2988
Episode: 34
Episode: 35
Episode: 36
Episode: 37
Episode: 38
Episode: 39


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 12
defaultdict(<class 'int'>, {1: 8, 2: 2, 3: 2})
******************************


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Stochastic
******************************
Solved: 17
defaultdict(<class 'int'>, {6: 4, 2: 1, 4: 3, 3: 2, 11: 1, 1: 3, 13: 1, 9: 1, 16: 1})
******************************
Episode: 100
Episode: 101
Episode: 102
Episode: 103
Episode: 104
Episode: 105
Episode: 106
Episode: 107
Episode: 108
Episode: 109
Episode: 110
Episode: 111
Episode: 112
Episode: 113
SOLVED! Episode 113 Steps: 1 Epsilon 0.2988
Episode: 114
SOLVED! Episode 114 Steps: 1 Epsilon 0.2988
Episode: 115
Episode: 116
SOLVED! Episode 116 Steps: 3 Epsilon 0.2988
Episode: 117
Episode: 118
Episode: 119
Episode: 120
Episode: 121
Episode: 122
Episode: 123
SOLVED! Episode 123 Steps: 3 Epsilon 0.2988
Episode: 124
Episode: 125
SOLVED! Episode 125 Steps: 2 Epsilon 0.2988
Episode: 126
Episode: 127
SOLVED! Episode 127 Steps: 45 Epsilon 0.2988
Episode: 128
Episode: 129
SOLVED! Episode 129 Steps: 45 Epsilon 0.2988
Episode: 130
Episode: 131
Episode: 132
Episode: 133
SOLVED! Episode 133 Steps: 3 Epsilon 0.2988
Ep

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 24
defaultdict(<class 'int'>, {3: 9, 2: 5, 1: 10})
******************************


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Stochastic
******************************
Solved: 25
defaultdict(<class 'int'>, {3: 8, 10: 1, 7: 1, 2: 3, 1: 8, 14: 1, 13: 1, 8: 1, 9: 1})
******************************
Episode: 200
Episode: 201
Episode: 202
Episode: 203
Episode: 204
SOLVED! Episode 204 Steps: 98 Epsilon 0.2988
Episode: 205
Episode: 206
Episode: 207
SOLVED! Episode 207 Steps: 3 Epsilon 0.2988
Episode: 208
Episode: 209
SOLVED! Episode 209 Steps: 2 Epsilon 0.2988
Episode: 210
Episode: 211
Episode: 212
Episode: 213
SOLVED! Episode 213 Steps: 3 Epsilon 0.2988
Episode: 214
SOLVED! Episode 214 Steps: 3 Epsilon 0.2988
Episode: 215
Episode: 216
Episode: 217
SOLVED! Episode 217 Steps: 3 Epsilon 0.2988
Episode: 218
SOLVED! Episode 218 Steps: 59 Epsilon 0.2988
Episode: 219
SOLVED! Episode 219 Steps: 55 Epsilon 0.2988
Episode: 220
Episode: 221
Episode: 222
Episode: 223
Episode: 224
Episode: 225
Episode: 226
Episode: 227
Episode: 228
Episode: 229
Episode: 230
SOLVED! Episode 230 Steps: 25 Epsilon 0.

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 23
defaultdict(<class 'int'>, {3: 9, 1: 10, 2: 4})
******************************


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Stochastic
******************************
Solved: 25
defaultdict(<class 'int'>, {3: 9, 4: 2, 2: 2, 10: 2, 1: 5, 7: 3, 13: 1, 5: 1})
******************************
Episode: 300
SOLVED! Episode 300 Steps: 1 Epsilon 0.2988
Episode: 301
Episode: 302
Episode: 303
SOLVED! Episode 303 Steps: 24 Epsilon 0.2988
Episode: 304
Episode: 305
SOLVED! Episode 305 Steps: 48 Epsilon 0.2988
Episode: 306
Episode: 307
SOLVED! Episode 307 Steps: 32 Epsilon 0.2988
Episode: 308
Episode: 309
Episode: 310
Episode: 311
Episode: 312
Episode: 313
Episode: 314
Episode: 315
Episode: 316
Episode: 317
Episode: 318
SOLVED! Episode 318 Steps: 2 Epsilon 0.2988
Episode: 319
SOLVED! Episode 319 Steps: 3 Epsilon 0.2988
Episode: 320
Episode: 321
Episode: 322
SOLVED! Episode 322 Steps: 1 Epsilon 0.2988
Episode: 323
SOLVED! Episode 323 Steps: 3 Epsilon 0.2988
Episode: 324
SOLVED! Episode 324 Steps: 1 Epsilon 0.2988
Episode: 325
Episode: 326
SOLVED! Episode 326 Steps: 50 Epsilon 0.2988
Episode: 3

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 22
defaultdict(<class 'int'>, {3: 8, 1: 9, 2: 5})
******************************


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Stochastic
******************************
Solved: 32
defaultdict(<class 'int'>, {3: 8, 1: 7, 10: 1, 17: 1, 2: 5, 8: 2, 11: 1, 14: 1, 4: 1, 5: 2, 12: 1, 9: 1, 6: 1})
******************************
Episode: 400
Episode: 401
Episode: 402
Episode: 403
Episode: 404
Episode: 405
SOLVED! Episode 405 Steps: 1 Epsilon 0.2988
Episode: 406
SOLVED! Episode 406 Steps: 1 Epsilon 0.2988
Episode: 407
Episode: 408
Episode: 409
Episode: 410
SOLVED! Episode 410 Steps: 2 Epsilon 0.2988
Episode: 411
SOLVED! Episode 411 Steps: 97 Epsilon 0.2988
Episode: 412
Episode: 413
Episode: 414
Episode: 415
SOLVED! Episode 415 Steps: 78 Epsilon 0.2988
Episode: 416
SOLVED! Episode 416 Steps: 3 Epsilon 0.2988
Episode: 417
Episode: 418
Episode: 419
Episode: 420
Episode: 421
Episode: 422
Episode: 423
SOLVED! Episode 423 Steps: 1 Epsilon 0.2988
Episode: 424
Episode: 425
Episode: 426
Episode: 427
Episode: 428
SOLVED! Episode 428 Steps: 2 Epsilon 0.2988
Episode: 429
SOLVED! Episode 429 Steps: 1

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 26
defaultdict(<class 'int'>, {2: 6, 1: 9, 3: 11})
******************************


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Stochastic
******************************
Solved: 30
defaultdict(<class 'int'>, {9: 2, 11: 1, 2: 6, 1: 7, 14: 1, 3: 10, 5: 1, 7: 1, 4: 1})
******************************
Episode: 500
SOLVED! Episode 500 Steps: 2 Epsilon 0.2988
Episode: 501
Episode: 502
Episode: 503
SOLVED! Episode 503 Steps: 3 Epsilon 0.2988
Episode: 504
SOLVED! Episode 504 Steps: 91 Epsilon 0.2988
Episode: 505
Episode: 506
Episode: 507
Episode: 508
Episode: 509
Episode: 510
Episode: 511
SOLVED! Episode 511 Steps: 2 Epsilon 0.2988
Episode: 512
SOLVED! Episode 512 Steps: 1 Epsilon 0.2988
Episode: 513
Episode: 514
Episode: 515
Episode: 516
Episode: 517
SOLVED! Episode 517 Steps: 1 Epsilon 0.2988
Episode: 518
Episode: 519
SOLVED! Episode 519 Steps: 51 Epsilon 0.2988
Episode: 520
SOLVED! Episode 520 Steps: 1 Epsilon 0.2988
Episode: 521
Episode: 522
SOLVED! Episode 522 Steps: 3 Epsilon 0.2988
Episode: 523
SOLVED! Episode 523 Steps: 3 Epsilon 0.2988
Episode: 524
Episode: 525
Episode: 526
Episo

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 23
defaultdict(<class 'int'>, {3: 12, 1: 8, 2: 3})
******************************


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Stochastic
******************************
Solved: 28
defaultdict(<class 'int'>, {3: 10, 5: 3, 11: 1, 2: 5, 1: 6, 7: 1, 15: 1, 14: 1})
******************************
Episode: 600
SOLVED! Episode 600 Steps: 3 Epsilon 0.2988
Episode: 601
Episode: 602
SOLVED! Episode 602 Steps: 1 Epsilon 0.2988
Episode: 603
Episode: 604
Episode: 605
Episode: 606
Episode: 607
Episode: 608
SOLVED! Episode 608 Steps: 18 Epsilon 0.2988
Episode: 609
Episode: 610
Episode: 611
SOLVED! Episode 611 Steps: 1 Epsilon 0.2988
Episode: 612
Episode: 613
SOLVED! Episode 613 Steps: 1 Epsilon 0.2988
Episode: 614
SOLVED! Episode 614 Steps: 62 Epsilon 0.2988
Episode: 615
SOLVED! Episode 615 Steps: 2 Epsilon 0.2988
Episode: 616
Episode: 617
Episode: 618
Episode: 619
Episode: 620
Episode: 621
Episode: 622
Episode: 623
Episode: 624
Episode: 625
SOLVED! Episode 625 Steps: 2 Epsilon 0.2988
Episode: 626
Episode: 627
Episode: 628
SOLVED! Episode 628 Steps: 3 Epsilon 0.2988
Episode: 629
Episode: 630
Ep

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 32
defaultdict(<class 'int'>, {3: 14, 1: 12, 2: 6})
******************************


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Stochastic
******************************
Solved: 36
defaultdict(<class 'int'>, {3: 15, 20: 2, 6: 1, 2: 8, 4: 1, 1: 7, 12: 1, 14: 1})
******************************
Episode: 700
Episode: 701
Episode: 702
SOLVED! Episode 702 Steps: 6 Epsilon 0.2988
Episode: 703
Episode: 704
Episode: 705
SOLVED! Episode 705 Steps: 35 Epsilon 0.2988
Episode: 706
Episode: 707
Episode: 708
Episode: 709
Episode: 710
Episode: 711
SOLVED! Episode 711 Steps: 3 Epsilon 0.2988
Episode: 712
Episode: 713
Episode: 714
Episode: 715
Episode: 716
Episode: 717
Episode: 718
SOLVED! Episode 718 Steps: 3 Epsilon 0.2988
Episode: 719
Episode: 720
Episode: 721
Episode: 722
Episode: 723
Episode: 724
Episode: 725
SOLVED! Episode 725 Steps: 3 Epsilon 0.2988
Episode: 726
Episode: 727
Episode: 728
SOLVED! Episode 728 Steps: 1 Epsilon 0.2988
Episode: 729
Episode: 730
Episode: 731
Episode: 732
SOLVED! Episode 732 Steps: 3 Epsilon 0.2988
Episode: 733
SOLVED! Episode 733 Steps: 3 Epsilon 0.2988
Episode:

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 28
defaultdict(<class 'int'>, {3: 13, 1: 12, 2: 3})
******************************


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Stochastic
******************************
Solved: 35
defaultdict(<class 'int'>, {4: 3, 1: 14, 3: 10, 5: 2, 6: 1, 2: 2, 9: 1, 7: 1, 16: 1})
******************************
Episode: 800
Episode: 801
Episode: 802
Episode: 803
Episode: 804
Episode: 805
SOLVED! Episode 805 Steps: 3 Epsilon 0.2988
Episode: 806
SOLVED! Episode 806 Steps: 3 Epsilon 0.2988
Episode: 807
Episode: 808
Episode: 809
Episode: 810
SOLVED! Episode 810 Steps: 3 Epsilon 0.2988
Episode: 811
Episode: 812
Episode: 813
SOLVED! Episode 813 Steps: 42 Epsilon 0.2988
Episode: 814
Episode: 815
Episode: 816
Episode: 817
Episode: 818
Episode: 819
Episode: 820
Episode: 821
SOLVED! Episode 821 Steps: 74 Epsilon 0.2988
Episode: 822
Episode: 823
SOLVED! Episode 823 Steps: 2 Epsilon 0.2988
Episode: 824
Episode: 825
SOLVED! Episode 825 Steps: 99 Epsilon 0.2988
Episode: 826
Episode: 827
SOLVED! Episode 827 Steps: 3 Epsilon 0.2988
Episode: 828
SOLVED! Episode 828 Steps: 2 Epsilon 0.2988
Episode: 829
Episode: 

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 30
defaultdict(<class 'int'>, {3: 15, 2: 6, 1: 9})
******************************


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Stochastic
******************************
Solved: 33
defaultdict(<class 'int'>, {3: 12, 5: 3, 11: 1, 2: 8, 10: 1, 7: 1, 1: 6, 20: 1})
******************************
Episode: 900
Episode: 901
Episode: 902
SOLVED! Episode 902 Steps: 3 Epsilon 0.2988
Episode: 903
Episode: 904
Episode: 905
Episode: 906
Episode: 907
Episode: 908
Episode: 909
SOLVED! Episode 909 Steps: 30 Epsilon 0.2988
Episode: 910
Episode: 911
SOLVED! Episode 911 Steps: 2 Epsilon 0.2988
Episode: 912
Episode: 913
Episode: 914
Episode: 915
Episode: 916
Episode: 917
Episode: 918
Episode: 919
Episode: 920
SOLVED! Episode 920 Steps: 2 Epsilon 0.2988
Episode: 921
Episode: 922
SOLVED! Episode 922 Steps: 3 Epsilon 0.2988
Episode: 923
SOLVED! Episode 923 Steps: 1 Epsilon 0.2988
Episode: 924
Episode: 925
SOLVED! Episode 925 Steps: 30 Epsilon 0.2988
Episode: 926
SOLVED! Episode 926 Steps: 7 Epsilon 0.2988
Episode: 927
Episode: 928
Episode: 929
Episode: 930
Episode: 931
Episode: 932
Episode: 933
Episode

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 29
defaultdict(<class 'int'>, {3: 13, 2: 7, 1: 9})
******************************


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Stochastic
******************************
Solved: 35
defaultdict(<class 'int'>, {3: 13, 19: 1, 2: 7, 9: 1, 1: 8, 7: 1, 4: 2, 18: 1, 16: 1})
******************************
Episode: 1000
SOLVED! Episode 1000 Steps: 3 Epsilon 0.2988
Episode: 1001
SOLVED! Episode 1001 Steps: 3 Epsilon 0.2988
Episode: 1002
Episode: 1003
Episode: 1004
Episode: 1005
SOLVED! Episode 1005 Steps: 2 Epsilon 0.2988
Episode: 1006
Episode: 1007
Episode: 1008
SOLVED! Episode 1008 Steps: 79 Epsilon 0.2988
Episode: 1009
Episode: 1010
SOLVED! Episode 1010 Steps: 6 Epsilon 0.2988
Episode: 1011
Episode: 1012
Episode: 1013
SOLVED! Episode 1013 Steps: 3 Epsilon 0.2988
Episode: 1014
SOLVED! Episode 1014 Steps: 26 Epsilon 0.2988
Episode: 1015
Episode: 1016
SOLVED! Episode 1016 Steps: 3 Epsilon 0.2988
Episode: 1017
SOLVED! Episode 1017 Steps: 2 Epsilon 0.2988
Episode: 1018
Episode: 1019
Episode: 1020
Episode: 1021
Episode: 1022
Episode: 1023
Episode: 1024
SOLVED! Episode 1024 Steps: 45 Epsilon 0

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 25
defaultdict(<class 'int'>, {3: 13, 1: 7, 2: 5})
******************************


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Stochastic
******************************
Solved: 27
defaultdict(<class 'int'>, {3: 10, 1: 5, 2: 5, 12: 2, 7: 1, 10: 1, 4: 1, 9: 1, 6: 1})
******************************
Episode: 1100
Episode: 1101
SOLVED! Episode 1101 Steps: 1 Epsilon 0.2988
Episode: 1102
Episode: 1103
SOLVED! Episode 1103 Steps: 2 Epsilon 0.2988
Episode: 1104
Episode: 1105
SOLVED! Episode 1105 Steps: 3 Epsilon 0.2988
Episode: 1106
Episode: 1107
Episode: 1108
SOLVED! Episode 1108 Steps: 3 Epsilon 0.2988
Episode: 1109
Episode: 1110
SOLVED! Episode 1110 Steps: 45 Epsilon 0.2988
Episode: 1111
Episode: 1112
Episode: 1113
Episode: 1114
Episode: 1115
Episode: 1116
Episode: 1117
Episode: 1118
SOLVED! Episode 1118 Steps: 14 Epsilon 0.2988
Episode: 1119
Episode: 1120
SOLVED! Episode 1120 Steps: 1 Epsilon 0.2988
Episode: 1121
SOLVED! Episode 1121 Steps: 1 Epsilon 0.2988
Episode: 1122
Episode: 1123
Episode: 1124
Episode: 1125
Episode: 1126
Episode: 1127
Episode: 1128
Episode: 1129
Episode: 1130
Ep

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 21
defaultdict(<class 'int'>, {3: 9, 2: 5, 1: 7})
******************************


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Stochastic
******************************
Solved: 28
defaultdict(<class 'int'>, {3: 9, 10: 1, 16: 1, 2: 5, 7: 4, 1: 4, 5: 1, 9: 1, 14: 1, 4: 1})
******************************
Episode: 1200
SOLVED! Episode 1200 Steps: 42 Epsilon 0.2988
Episode: 1201
Episode: 1202
Episode: 1203
Episode: 1204
Episode: 1205
SOLVED! Episode 1205 Steps: 1 Epsilon 0.2988
Episode: 1206
SOLVED! Episode 1206 Steps: 52 Epsilon 0.2988
Episode: 1207
Episode: 1208
Episode: 1209
Episode: 1210
SOLVED! Episode 1210 Steps: 3 Epsilon 0.2988
Episode: 1211
Episode: 1212
SOLVED! Episode 1212 Steps: 1 Epsilon 0.2988
Episode: 1213
Episode: 1214
Episode: 1215
Episode: 1216
Episode: 1217
Episode: 1218
SOLVED! Episode 1218 Steps: 7 Epsilon 0.2988
Episode: 1219
Episode: 1220
Episode: 1221
Episode: 1222
SOLVED! Episode 1222 Steps: 36 Epsilon 0.2988
Episode: 1223
Episode: 1224
Episode: 1225
Episode: 1226
Episode: 1227
Episode: 1228
Episode: 1229
Episode: 1230
SOLVED! Episode 1230 Steps: 2 Epsilon 0.

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 24
defaultdict(<class 'int'>, {3: 14, 1: 7, 2: 3})
******************************


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Stochastic
******************************
Solved: 32
defaultdict(<class 'int'>, {3: 9, 15: 1, 11: 1, 2: 5, 1: 6, 5: 3, 4: 1, 7: 1, 12: 1, 9: 1, 6: 2, 16: 1})
******************************
Episode: 1300
Episode: 1301
Episode: 1302
Episode: 1303
Episode: 1304
Episode: 1305
Episode: 1306
SOLVED! Episode 1306 Steps: 26 Epsilon 0.2988
Episode: 1307
Episode: 1308
Episode: 1309
Episode: 1310
SOLVED! Episode 1310 Steps: 79 Epsilon 0.2988
Episode: 1311
Episode: 1312
Episode: 1313
SOLVED! Episode 1313 Steps: 1 Epsilon 0.2988
Episode: 1314
Episode: 1315
Episode: 1316
Episode: 1317
Episode: 1318
Episode: 1319
SOLVED! Episode 1319 Steps: 2 Epsilon 0.2988
Episode: 1320
Episode: 1321
Episode: 1322
Episode: 1323
Episode: 1324
Episode: 1325
Episode: 1326
Episode: 1327
SOLVED! Episode 1327 Steps: 3 Epsilon 0.2988
Episode: 1328
Episode: 1329
Episode: 1330
SOLVED! Episode 1330 Steps: 3 Epsilon 0.2988
Episode: 1331
Episode: 1332
SOLVED! Episode 1332 Steps: 1 Epsilon 0.2988


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 37
defaultdict(<class 'int'>, {3: 18, 1: 12, 2: 7})
******************************


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Stochastic
******************************
Solved: 40
defaultdict(<class 'int'>, {3: 16, 1: 9, 2: 5, 4: 4, 13: 1, 5: 2, 14: 1, 17: 1, 6: 1})
******************************
Episode: 1400
SOLVED! Episode 1400 Steps: 1 Epsilon 0.2988
Episode: 1401
SOLVED! Episode 1401 Steps: 1 Epsilon 0.2988
Episode: 1402
SOLVED! Episode 1402 Steps: 3 Epsilon 0.2988
Episode: 1403
SOLVED! Episode 1403 Steps: 2 Epsilon 0.2988
Episode: 1404
Episode: 1405
SOLVED! Episode 1405 Steps: 3 Epsilon 0.2988
Episode: 1406
Episode: 1407
Episode: 1408
Episode: 1409
Episode: 1410
SOLVED! Episode 1410 Steps: 47 Epsilon 0.2988
Episode: 1411
Episode: 1412
SOLVED! Episode 1412 Steps: 1 Epsilon 0.2988
Episode: 1413
Episode: 1414
SOLVED! Episode 1414 Steps: 2 Epsilon 0.2988
Episode: 1415
SOLVED! Episode 1415 Steps: 2 Epsilon 0.2988
Episode: 1416
Episode: 1417
Episode: 1418
Episode: 1419
Episode: 1420
SOLVED! Episode 1420 Steps: 54 Epsilon 0.2988
Episode: 1421
Episode: 1422
SOLVED! Episode 1422 St

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 34
defaultdict(<class 'int'>, {3: 16, 2: 8, 1: 10})
******************************


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Stochastic
******************************
Solved: 40
defaultdict(<class 'int'>, {3: 13, 14: 2, 1: 9, 11: 1, 2: 10, 7: 1, 9: 1, 4: 2, 20: 1})
******************************
Episode: 1500
SOLVED! Episode 1500 Steps: 3 Epsilon 0.2988
Episode: 1501
Episode: 1502
Episode: 1503
Episode: 1504
Episode: 1505
SOLVED! Episode 1505 Steps: 54 Epsilon 0.2988
Episode: 1506
Episode: 1507
Episode: 1508
SOLVED! Episode 1508 Steps: 3 Epsilon 0.2988
Episode: 1509
Episode: 1510
Episode: 1511
Episode: 1512
Episode: 1513
SOLVED! Episode 1513 Steps: 2 Epsilon 0.2988
Episode: 1514
SOLVED! Episode 1514 Steps: 7 Epsilon 0.2988
Episode: 1515
Episode: 1516
Episode: 1517
SOLVED! Episode 1517 Steps: 3 Epsilon 0.2988
Episode: 1518
SOLVED! Episode 1518 Steps: 33 Epsilon 0.2988
Episode: 1519
SOLVED! Episode 1519 Steps: 2 Epsilon 0.2988
Episode: 1520
SOLVED! Episode 1520 Steps: 42 Epsilon 0.2988
Episode: 1521
Episode: 1522
Episode: 1523
Episode: 1524
SOLVED! Episode 1524 Steps: 56 Epsilon

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 27
defaultdict(<class 'int'>, {3: 12, 2: 6, 1: 9})
******************************


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Stochastic
******************************
Solved: 34
defaultdict(<class 'int'>, {3: 13, 7: 3, 2: 7, 1: 6, 9: 1, 12: 1, 20: 2, 4: 1})
******************************
Episode: 1600
SOLVED! Episode 1600 Steps: 2 Epsilon 0.2988
Episode: 1601
SOLVED! Episode 1601 Steps: 18 Epsilon 0.2988
Episode: 1602
SOLVED! Episode 1602 Steps: 74 Epsilon 0.2988
Episode: 1603
SOLVED! Episode 1603 Steps: 3 Epsilon 0.2988
Episode: 1604
Episode: 1605
SOLVED! Episode 1605 Steps: 29 Epsilon 0.2988
Episode: 1606
Episode: 1607
Episode: 1608
Episode: 1609
Episode: 1610
Episode: 1611
Episode: 1612
SOLVED! Episode 1612 Steps: 13 Epsilon 0.2988
Episode: 1613
Episode: 1614
Episode: 1615
SOLVED! Episode 1615 Steps: 2 Epsilon 0.2988
Episode: 1616
Episode: 1617
Episode: 1618
SOLVED! Episode 1618 Steps: 3 Epsilon 0.2988
Episode: 1619
SOLVED! Episode 1619 Steps: 6 Epsilon 0.2988
Episode: 1620
Episode: 1621
Episode: 1622
SOLVED! Episode 1622 Steps: 1 Epsilon 0.2988
Episode: 1623
SOLVED! Episod

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 33
defaultdict(<class 'int'>, {3: 16, 2: 7, 1: 10})
******************************


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Stochastic
******************************
Solved: 37
defaultdict(<class 'int'>, {3: 16, 4: 2, 7: 1, 2: 6, 1: 9, 5: 2, 16: 1})
******************************
Episode: 1700
Episode: 1701
Episode: 1702
Episode: 1703
Episode: 1704
SOLVED! Episode 1704 Steps: 1 Epsilon 0.2988
Episode: 1705
Episode: 1706
SOLVED! Episode 1706 Steps: 36 Epsilon 0.2988
Episode: 1707
SOLVED! Episode 1707 Steps: 63 Epsilon 0.2988
Episode: 1708
Episode: 1709
SOLVED! Episode 1709 Steps: 3 Epsilon 0.2988
Episode: 1710
Episode: 1711
Episode: 1712
Episode: 1713
Episode: 1714
SOLVED! Episode 1714 Steps: 33 Epsilon 0.2988
Episode: 1715
Episode: 1716
SOLVED! Episode 1716 Steps: 92 Epsilon 0.2988
Episode: 1717
Episode: 1718
SOLVED! Episode 1718 Steps: 8 Epsilon 0.2988
Episode: 1719
Episode: 1720
SOLVED! Episode 1720 Steps: 2 Epsilon 0.2988
Episode: 1721
Episode: 1722
SOLVED! Episode 1722 Steps: 3 Epsilon 0.2988
Episode: 1723
SOLVED! Episode 1723 Steps: 1 Epsilon 0.2988
Episode: 1724
Episode

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 26
defaultdict(<class 'int'>, {3: 11, 1: 9, 2: 6})
******************************


HBox(children=(FloatProgress(value=0.0), HTML(value='')))