# Final Project - Reinforcements Learning 
Hello dear students,<br> this is the template notebook. Please click on the "File" tab and then on "Save a copy into drive".

---
<br>

### Name and ID:
Student 1: Avraham Raviv, 204355390
<br>
Student 2: Yevgeni Berkovitch, 317079234
<br><br>
<img src="https://play-lh.googleusercontent.com/e_oKlKPISbgdzut1H9opevS7-LTB8-8lsmpCdMkhlnqFenZhpjxbLmx7l158-xQQCIY">

### https://github.com/mpSchrader/gym-sokoban

# Installs

In [1]:
%%capture
!sudo apt-get update
!sudo apt-get install -y xvfb ffmpeg freeglut3-dev
!pip install 'imageio==2.4.0'
!pip install gym
!pip install pygame
!apt-get install python-opengl -y
!apt install xvfb -y
!pip install pyvirtualdisplay
!pip install piglet
!pip install gym
!apt-get install python-opengl -y
!apt install xvfb -y
!pip install gym_sokoban

!imageio_download_bin ffmpeg

# Imports

In [2]:
import random

import numpy as np
import matplotlib.pyplot as plt

import base64
import imageio
from pyvirtualdisplay import Display
from IPython.display import HTML

import gym
from gym import error, spaces, utils
from soko_pap import *

from collections import deque

from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten

import tensorflow
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers.schedules import ExponentialDecay

from tqdm.notebook import tqdm
from collections import defaultdict

In [3]:
%matplotlib inline

In [4]:
imageio.plugins.ffmpeg.download()

In [5]:
from gym import logger as gymlogger
gymlogger.set_level(40) # error only

In [6]:
import warnings
warnings.filterwarnings('ignore')

# Display utils
The cell below contains the video display configuration. No need to make changes here.

In [7]:
def embed_mp4(filename):
    """Embeds an mp4 file in the notebook."""
    video = open(filename,'rb').read()
    b64 = base64.b64encode(video)
    tag = '''
    <video width="640" height="480" controls>
    <source src="data:video/mp4;base64,{0}" type="video/mp4">
    Your browser does not support the video tag.
    </video>'''.format(b64.decode())

    return HTML(tag)

# Utils

In [8]:
def get_distances_for_target(room_state, target):
    distances = np.zeros(shape=room_state.shape)
    visited_cells = set()
    cell_queue = deque()

    visited_cells.add(target)
    cell_queue.appendleft(target)

    while len(cell_queue) != 0:
        cell = cell_queue.pop()
        distance = distances[cell[0]][cell[1]]
        for x,y in ((1,0), (-1,-0), (0,1), (0,-1)):
            next_cell_x, next_cell_y = cell[0]+x, cell[1]+y
            if room_state[next_cell_x][next_cell_y] != 0 and not (next_cell_x, next_cell_y) in visited_cells:
                distances[next_cell_x][next_cell_y] = distance + 1
                visited_cells.add((next_cell_x, next_cell_y))
                cell_queue.appendleft((next_cell_x, next_cell_y))
                
    return distances

def get_maze_info(room_state):
    targets = []
    for i in range(room_state.shape[0]):
        for j in range(room_state.shape[1]):
            if room_state[i][j] == 2:
                targets.append((i, j))

    distances0 = get_distances_for_target(room_state, targets[0])
    distances1 = get_distances_for_target(room_state, targets[1])
    common_distances = np.minimum(distances0, distances1)
    
    maze_info = {}
    maze_info['target0'] = targets[0]
    maze_info['target1'] = targets[1]
    maze_info['distances0'] = distances0
    maze_info['distances1'] = distances1
    maze_info['common_distances'] = common_distances
    return maze_info

def calc_distances(room_state, distances):
    boxes = []
    for i in range(room_state.shape[0]):
        for j in range(room_state.shape[1]):            
            if room_state[i][j] == 4:
                boxes.append((i,j))
    if len(boxes) == 2:
        return distances[boxes[0][0]][boxes[0][1]] + distances[boxes[1][0]][boxes[1][1]]
    
    return distances[boxes[0][0]][boxes[0][1]]

def box2target_change_reward(room_state, next_room_state, maze_info):
    if np.array_equal(room_state, next_room_state):
        return -5.0
    
    target0 = maze_info['target0']
    target1 = maze_info['target1']
    distances0 = maze_info['distances0']
    distances1 = maze_info['distances1']
    common_distances = maze_info['common_distances']
    
    relevant_distances = common_distances    
    
    if room_state[target0[0]][target0[1]] == 3:
        relevant_distances = distances1
    elif room_state[target1[0]][target1[1]] == 3:
        relevant_distances = distances0
    
    change_reward = 0.0      
    t2b = calc_distances(room_state, relevant_distances)
    n_t2b = calc_distances(next_room_state, relevant_distances)
    if n_t2b < t2b:
        change_reward += 5.0
    elif n_t2b > t2b:
        change_reward -= 5.0
        
    return change_reward  

# Solution

In [19]:
class SOK_Agent:
    def __init__(self):
        # Construct DQN models
        self.state_size = (112,112,1) 
        self.action_size = 8
        self.model = self._build_model()
        self.target_model = self._build_model()
        self.target_model.set_weights(self.model.get_weights())
        self.batch_size = 8
        
        # Replay buffers
        self.replay_buffer = deque(maxlen=5000)
        self.prioritized_replay_buffer = deque(maxlen=1000)
        
        # Hyperparameters
        self.gamma = 0.9
        self.epsilon = 0.2   
        self.epsilon_min = 0.2
        self.epsilon_decay = 0.9995
        self.replay_rate = 20
        self.update_beta = 0.9995
        
        self.action_rotation_map = {
            0: 2,
            1: 3,
            2: 1,
            3: 0,
            4: 6,
            5: 7,
            6: 5,
            7: 4
        }

    def _build_model(self):
        model = Sequential()
        model.add(Conv2D(32, (16,16), strides=(16,16), input_shape=self.state_size, activation='relu'))
        model.add(Conv2D(64, (3,3), activation='relu'))
        model.add(Conv2D(64, (3,3), activation='relu'))
        model.add(Flatten())
        model.add(Dense(512, activation='relu'))  
        model.add(Dense(64, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        
        lr_schedule = ExponentialDecay(1e-5, decay_steps=10000, decay_rate=0.99, staircase=False)
        model.compile(optimizer=Adam(learning_rate=lr_schedule), loss='mse')        
        return model

    def remember(self, state, action, reward, next_state, done):
        self.replay_buffer.append([state, action, reward, next_state, done])    
        
    def copy_to_prioritized_buffer(self, n):
        for i in range(n):
            self.prioritized_replay_buffer.append(self.replay_buffer[-1-i])  

    def act(self, state, stochastic=False):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        
        act_values = self.model.predict(state, verbose=0)[0]
        
        if stochastic:
            act_probs = np.exp(act_values)/np.exp(act_values).sum()
            return np.random.choice(np.arange(self.action_size), size=1, p=act_probs)[0]
              
        return np.argmax(act_values) 

    def replay(self): 
        if len(self.replay_buffer) < self.batch_size:
            return
        
        if len(self.prioritized_replay_buffer) < self.batch_size//2:
            minibatch = random.sample(self.replay_buffer, self.batch_size) 
        else:    
            minibatch = random.sample(self.replay_buffer, self.batch_size//2) 
            minibatch.extend(random.sample(self.prioritized_replay_buffer, self.batch_size//2))
        
        states = np.zeros((self.batch_size*4, self.state_size[0], self.state_size[1]))
        actions = np.zeros(self.batch_size*4, dtype=int)
        rewards = np.zeros(self.batch_size*4)
        next_states = np.zeros((self.batch_size*4, self.state_size[0], self.state_size[1]))
        statuses = np.zeros(self.batch_size*4)
        targets = np.zeros((self.batch_size*4, self.action_size)) 
        
        for i, (state, action, reward, next_state, done) in enumerate(minibatch): 
            for rot in range(4):  
                ind = i*4+rot
                if rot != 0:
                    state = np.rot90(state, axes=(1,2))
                    next_state = np.rot90(next_state, axes=(1,2))
                    action = self.action_rotation_map.get(action)

                states[ind] = state.copy()
                actions[ind] = action
                rewards[ind] = reward
                next_states[ind] = next_state.copy()
                statuses[ind] = 1 if done else 0          
        
        targets = self.model.predict(states) 
        max_actions = np.argmax(self.model.predict(next_states), axis=1)
        next_rewards = self.target_model.predict(next_states)
        
        ind = 0
        for action, reward, next_reward, max_action, done in zip(actions, rewards, next_rewards, max_actions, statuses):  
            if not done:
                reward += self.gamma * next_reward[max_action]
            targets[ind][action] = reward
            ind += 1
        
        self.model.fit(states, targets, epochs=10, verbose=0) 
        
        self.update_target_model()        
    
        if self.epsilon > self.epsilon_min:
            self.epsilon = self.epsilon * self.epsilon_decay    
        
    def update_target_model(self):
        model_w = self.model.get_weights()
        target_model_w = self.target_model.get_weights()
        updated_target_model_w = []
        for i in range(len(model_w)):
            updated_target_model_w.append(self.update_beta*target_model_w[i] + (1-self.update_beta)*model_w[i])
        self.target_model.set_weights(updated_target_model_w)    
            
    def load(self, name):
        self.model.load_weights(name)
        self.target_model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)

In [20]:
def process_frame(frame):
    f = frame.mean(axis=2)
    f = f / 255
    return np.expand_dims(f, axis=0)

## Training

#### Test Suite

In [21]:
def init_sok(r, max_steps=40):
    random.seed(r)
    sok = PushAndPullSokobanEnv(dim_room=(7, 7), num_boxes=2)
    sok.set_maxsteps(max_steps)
    return sok

In [22]:
def test_agent(e, cur_record, stochastic=False):
    current_epsilon = agent.epsilon
    agent.epsilon = 0.0
    num_solved = 0

    for t in tqdm(range(100)):    
        sok = init_sok(t, 30)
        steps = 0

        state = sok.get_image('rgb_array')
        done = False
        while not done:
            steps += 1
            action = agent.act(process_frame(state), stochastic)
            if action < 4:
                action += 1
            else:
                action += 5
            state, reward, done, info = sok.step(action)

        if sok.boxes_on_target == 2:            
            num_solved += 1
            
    agent.epsilon = current_epsilon    
    print("Episode %d Epsilon %.3f Learning Rate %.6f Solved: %d" % (
        e+1, 
        agent.epsilon, 
        agent.model.optimizer._decayed_lr(tensorflow.float32).numpy(),
        num_solved))
    
    if num_solved > cur_record:
        agent.save("models\Q3_06A_%d.h5" % (num_solved)) 
        cur_record = num_solved
        
    return num_solved, cur_record 

In [None]:
agent = SOK_Agent()

agent.load("models\Q3_06A_69.h5")

running_puzzles = 0
running_solved = 0
solved_in_train = []
solved_in_test = []
test_record = 69

for e in range(50000):
    sok = init_sok(e+100)
    random.seed(e)
    running_puzzles += 1
    
    state = process_frame(sok.get_image('rgb_array'))
    room_state = sok.room_state.copy() 
    maze_info = get_maze_info(room_state)
    
    for step in range(sok.max_steps):
        action = agent.act(state, stochastic=True)
        if action < 4:
            next_state, reward, done, _ = sok.step(action+1) 
        else:
            next_state, reward, done, _ = sok.step(action+5)         
        
        next_state = process_frame(next_state)        
        next_room_state = sok.room_state
        
        if not done:
            reward += box2target_change_reward(room_state, next_room_state, maze_info)
        
        agent.remember(state, action, reward, next_state, done)
        
        state = next_state.copy() 
        room_state = next_room_state.copy()                
        
        if e >= 200 and (step+1) % agent.replay_rate == 0:
            agent.replay()            
        
        if done: 
            if sok.boxes_on_target == 2:  
                agent.copy_to_prioritized_buffer(step+1)  
                running_solved += 1
                
            if (e+1) % 10 == 0 and e > 0:
                print(f"{running_solved} | {running_puzzles}") 

                if (e+1) % 100 == 0:
                    solved_in_train.append(running_solved)
                    running_puzzles = 0
                    running_solved = 0
                    
            break
            
    if (e+1) % 100 == 0 and e > 0:
        solved_tests, test_record = test_agent(e, test_record, stochastic=False) 
        solved_in_test.append(solved_tests)

8 | 10
17 | 20
22 | 30
27 | 40
33 | 50
42 | 60
49 | 70
55 | 80
64 | 90
71 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 100 Epsilon 0.200 Learning Rate 0.000010 Solved: 69
7 | 10
13 | 20
17 | 30
25 | 40
34 | 50
43 | 60
49 | 70
56 | 80
62 | 90
[SOKOBAN] Retry . . .
71 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 200 Epsilon 0.200 Learning Rate 0.000010 Solved: 69
7 | 10
13 | 20
22 | 30
30 | 40
36 | 50
[SOKOBAN] Retry . . .
43 | 60
[SOKOBAN] Retry . . .
53 | 70
59 | 80
65 | 90
72 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 300 Epsilon 0.200 Learning Rate 0.000010 Solved: 65
7 | 10
13 | 20
19 | 30
27 | 40
35 | 50
40 | 60
45 | 70
52 | 80
57 | 90
63 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 400 Epsilon 0.200 Learning Rate 0.000010 Solved: 64
6 | 10
15 | 20
22 | 30
28 | 40
34 | 50
[SOKOBAN] Retry . . .
42 | 60
[SOKOBAN] Retry . . .
50 | 70
57 | 80
63 | 90
72 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 500 Epsilon 0.200 Learning Rate 0.000010 Solved: 67
8 | 10
[SOKOBAN] Retry . . .
15 | 20
23 | 30
30 | 40
[SOKOBAN] Retry . . .
35 | 50
40 | 60
47 | 70
[SOKOBAN] Retry . . .
55 | 80
61 | 90
67 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 600 Epsilon 0.200 Learning Rate 0.000010 Solved: 63
7 | 10
16 | 20
24 | 30
31 | 40
37 | 50
[SOKOBAN] Retry . . .
41 | 60
44 | 70
53 | 80
59 | 90
65 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 700 Epsilon 0.200 Learning Rate 0.000010 Solved: 64
6 | 10
[SOKOBAN] Retry . . .
12 | 20
17 | 30
24 | 40
30 | 50
38 | 60
46 | 70
51 | 80
[SOKOBAN] Retry . . .
58 | 90
62 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 800 Epsilon 0.200 Learning Rate 0.000010 Solved: 63
8 | 10
12 | 20
[SOKOBAN] Retry . . .
21 | 30
26 | 40
34 | 50
42 | 60
46 | 70
53 | 80
58 | 90
64 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 900 Epsilon 0.200 Learning Rate 0.000010 Solved: 65
7 | 10
[SOKOBAN] Retry . . .
15 | 20
21 | 30
27 | 40
33 | 50
41 | 60
49 | 70
56 | 80
62 | 90
70 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 1000 Epsilon 0.200 Learning Rate 0.000010 Solved: 64
7 | 10
[SOKOBAN] Retry . . .
[SOKOBAN] Retry . . .
[SOKOBAN] Retry . . .
16 | 20
23 | 30
30 | 40
37 | 50
42 | 60
48 | 70
55 | 80
59 | 90
64 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 1100 Epsilon 0.200 Learning Rate 0.000010 Solved: 65
8 | 10
15 | 20
20 | 30
25 | 40
34 | 50
41 | 60
49 | 70
56 | 80
61 | 90
68 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 1200 Epsilon 0.200 Learning Rate 0.000010 Solved: 66
5 | 10
12 | 20
18 | 30
24 | 40
[SOKOBAN] Retry . . .
30 | 50
36 | 60
44 | 70
51 | 80
60 | 90
[SOKOBAN] Retry . . .
67 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 1300 Epsilon 0.200 Learning Rate 0.000010 Solved: 64
6 | 10
10 | 20
[SOKOBAN] Retry . . .
17 | 30
[SOKOBAN] Retry . . .
22 | 40
29 | 50
33 | 60
43 | 70
48 | 80
55 | 90
62 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 1400 Epsilon 0.200 Learning Rate 0.000010 Solved: 64
8 | 10
12 | 20
19 | 30
[SOKOBAN] Retry . . .
26 | 40
32 | 50
37 | 60
41 | 70
48 | 80
[SOKOBAN] Retry . . .
52 | 90
59 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 1500 Epsilon 0.200 Learning Rate 0.000010 Solved: 65
8 | 10
13 | 20
18 | 30
[SOKOBAN] Retry . . .
23 | 40
29 | 50
[SOKOBAN] Retry . . .
38 | 60
45 | 70
53 | 80
59 | 90
67 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 1600 Epsilon 0.200 Learning Rate 0.000010 Solved: 63
8 | 10
14 | 20
19 | 30
26 | 40
[SOKOBAN] Retry . . .
34 | 50
39 | 60
43 | 70
47 | 80
51 | 90
[SOKOBAN] Retry . . .
59 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 1700 Epsilon 0.200 Learning Rate 0.000010 Solved: 63
[SOKOBAN] Retry . . .
7 | 10
11 | 20
18 | 30
26 | 40
33 | 50
[SOKOBAN] Retry . . .
39 | 60
46 | 70
[SOKOBAN] Retry . . .
52 | 80
[SOKOBAN] Retry . . .
58 | 90
66 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 1800 Epsilon 0.200 Learning Rate 0.000010 Solved: 64
7 | 10
[SOKOBAN] Retry . . .
14 | 20
[SOKOBAN] Retry . . .
21 | 30
[SOKOBAN] Retry . . .
30 | 40
[SOKOBAN] Retry . . .
37 | 50
44 | 60
51 | 70
[SOKOBAN] Retry . . .
59 | 80
64 | 90
73 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 1900 Epsilon 0.200 Learning Rate 0.000010 Solved: 63
6 | 10
13 | 20
[SOKOBAN] Retry . . .
18 | 30
[SOKOBAN] Retry . . .
26 | 40
33 | 50
39 | 60
46 | 70
[SOKOBAN] Retry . . .
53 | 80
[SOKOBAN] Retry . . .
59 | 90
66 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 2000 Epsilon 0.200 Learning Rate 0.000010 Solved: 64
4 | 10
[SOKOBAN] Retry . . .
12 | 20
[SOKOBAN] Retry . . .
20 | 30
26 | 40
34 | 50
41 | 60
50 | 70
56 | 80
60 | 90
67 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 2100 Epsilon 0.200 Learning Rate 0.000010 Solved: 64
4 | 10
10 | 20
[SOKOBAN] Retry . . .
14 | 30
21 | 40
28 | 50
35 | 60
37 | 70
46 | 80
53 | 90
57 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 2200 Epsilon 0.200 Learning Rate 0.000010 Solved: 64
6 | 10
14 | 20
21 | 30
[SOKOBAN] Retry . . .
27 | 40
35 | 50
43 | 60
51 | 70
59 | 80
66 | 90
72 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 2300 Epsilon 0.200 Learning Rate 0.000010 Solved: 65
7 | 10
12 | 20
19 | 30
25 | 40
32 | 50
[SOKOBAN] Retry . . .
38 | 60
[SOKOBAN] Retry . . .
44 | 70
49 | 80
58 | 90
67 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 2400 Epsilon 0.200 Learning Rate 0.000010 Solved: 65
8 | 10
14 | 20
21 | 30
27 | 40
34 | 50
41 | 60
47 | 70
54 | 80
62 | 90
[SOKOBAN] Retry . . .
71 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 2500 Epsilon 0.200 Learning Rate 0.000010 Solved: 64
8 | 10
14 | 20
[SOKOBAN] Retry . . .
21 | 30
26 | 40
34 | 50
40 | 60
45 | 70
51 | 80
59 | 90
63 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 2600 Epsilon 0.200 Learning Rate 0.000010 Solved: 66
6 | 10
12 | 20
19 | 30
25 | 40
32 | 50
40 | 60
47 | 70
54 | 80
60 | 90
68 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 2700 Epsilon 0.200 Learning Rate 0.000010 Solved: 63
6 | 10
13 | 20
21 | 30
29 | 40
35 | 50
[SOKOBAN] Retry . . .
41 | 60
49 | 70
57 | 80
61 | 90
[SOKOBAN] Retry . . .
69 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 2800 Epsilon 0.200 Learning Rate 0.000010 Solved: 65
8 | 10
14 | 20
19 | 30
[SOKOBAN] Retry . . .
26 | 40
31 | 50
36 | 60
45 | 70
52 | 80
60 | 90
66 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 2900 Epsilon 0.200 Learning Rate 0.000010 Solved: 66
9 | 10
19 | 20
26 | 30
32 | 40
39 | 50
45 | 60
52 | 70
61 | 80
66 | 90
72 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 3000 Epsilon 0.200 Learning Rate 0.000010 Solved: 66
8 | 10
12 | 20
19 | 30
[SOKOBAN] Retry . . .
27 | 40
32 | 50
39 | 60
48 | 70
54 | 80
[SOKOBAN] Retry . . .
61 | 90
69 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 3100 Epsilon 0.200 Learning Rate 0.000010 Solved: 65
6 | 10
12 | 20
19 | 30
25 | 40
31 | 50
37 | 60
44 | 70
51 | 80
59 | 90
[SOKOBAN] Retry . . .
64 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 3200 Epsilon 0.200 Learning Rate 0.000010 Solved: 66
8 | 10
13 | 20
19 | 30
26 | 40
[SOKOBAN] Retry . . .
33 | 50
40 | 60
45 | 70
53 | 80
60 | 90
[SOKOBAN] Retry . . .
67 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 3300 Epsilon 0.200 Learning Rate 0.000010 Solved: 65
8 | 10
14 | 20
21 | 30
27 | 40
32 | 50
39 | 60
46 | 70
51 | 80
56 | 90
61 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 3400 Epsilon 0.200 Learning Rate 0.000010 Solved: 65
6 | 10
15 | 20
20 | 30
29 | 40
34 | 50
[SOKOBAN] Retry . . .
39 | 60
47 | 70
56 | 80
63 | 90
68 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 3500 Epsilon 0.200 Learning Rate 0.000010 Solved: 65
7 | 10
13 | 20
20 | 30
28 | 40
35 | 50
43 | 60
50 | 70
57 | 80
67 | 90
[SOKOBAN] Retry . . .
74 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 3600 Epsilon 0.200 Learning Rate 0.000010 Solved: 65
5 | 10
14 | 20
19 | 30
27 | 40
35 | 50
43 | 60
48 | 70
55 | 80
60 | 90
67 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 3700 Epsilon 0.200 Learning Rate 0.000010 Solved: 65
7 | 10
15 | 20
20 | 30
28 | 40
33 | 50
[SOKOBAN] Retry . . .
39 | 60
45 | 70
51 | 80
60 | 90
66 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 3800 Epsilon 0.200 Learning Rate 0.000010 Solved: 63
[SOKOBAN] Retry . . .
4 | 10
10 | 20
16 | 30
24 | 40
30 | 50
37 | 60
43 | 70
50 | 80
56 | 90
64 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 3900 Epsilon 0.200 Learning Rate 0.000010 Solved: 65
6 | 10
9 | 20
15 | 30
22 | 40
30 | 50
[SOKOBAN] Retry . . .
[SOKOBAN] Retry . . .
38 | 60
46 | 70
[SOKOBAN] Retry . . .
52 | 80
58 | 90
66 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 4000 Epsilon 0.200 Learning Rate 0.000010 Solved: 65
[SOKOBAN] Retry . . .
[SOKOBAN] Retry . . .
5 | 10
[SOKOBAN] Retry . . .
14 | 20
22 | 30
26 | 40
33 | 50
37 | 60
42 | 70
48 | 80
57 | 90
67 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 4100 Epsilon 0.200 Learning Rate 0.000010 Solved: 65
7 | 10
12 | 20
[SOKOBAN] Retry . . .
19 | 30
[SOKOBAN] Retry . . .
[SOKOBAN] Retry . . .
25 | 40
29 | 50
35 | 60
42 | 70
48 | 80
54 | 90
58 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 4200 Epsilon 0.200 Learning Rate 0.000010 Solved: 65
6 | 10
14 | 20
20 | 30
23 | 40
29 | 50
34 | 60
43 | 70
51 | 80
56 | 90
64 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 4300 Epsilon 0.200 Learning Rate 0.000010 Solved: 64
7 | 10
15 | 20
21 | 30
29 | 40
37 | 50
44 | 60
51 | 70
57 | 80
62 | 90
66 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 4400 Epsilon 0.200 Learning Rate 0.000010 Solved: 65
6 | 10
11 | 20
14 | 30
20 | 40
28 | 50
35 | 60
42 | 70
49 | 80
[SOKOBAN] Retry . . .
56 | 90
[SOKOBAN] Retry . . .
63 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 4500 Epsilon 0.200 Learning Rate 0.000010 Solved: 66
6 | 10
12 | 20
18 | 30
[SOKOBAN] Retry . . .
26 | 40
34 | 50
38 | 60
43 | 70
[SOKOBAN] Retry . . .
49 | 80
55 | 90
61 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 4600 Epsilon 0.200 Learning Rate 0.000010 Solved: 63
4 | 10
13 | 20
21 | 30
27 | 40
36 | 50
44 | 60
52 | 70
57 | 80
64 | 90
71 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 4700 Epsilon 0.200 Learning Rate 0.000010 Solved: 65
7 | 10
14 | 20
20 | 30
29 | 40
[SOKOBAN] Retry . . .
35 | 50
44 | 60
50 | 70
56 | 80
64 | 90
71 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 4800 Epsilon 0.200 Learning Rate 0.000010 Solved: 64
5 | 10
11 | 20
20 | 30
28 | 40
[SOKOBAN] Retry . . .
35 | 50
42 | 60
48 | 70
54 | 80
60 | 90
65 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 4900 Epsilon 0.200 Learning Rate 0.000010 Solved: 64
6 | 10
12 | 20
18 | 30
27 | 40
34 | 50
40 | 60
47 | 70
53 | 80
59 | 90
65 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 5000 Epsilon 0.200 Learning Rate 0.000010 Solved: 65
8 | 10
14 | 20
22 | 30
28 | 40
35 | 50
43 | 60
50 | 70
57 | 80
63 | 90
70 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 5100 Epsilon 0.200 Learning Rate 0.000010 Solved: 64
6 | 10
15 | 20
21 | 30
26 | 40
33 | 50
38 | 60
46 | 70
54 | 80
61 | 90
70 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 5200 Epsilon 0.200 Learning Rate 0.000010 Solved: 65
7 | 10
[SOKOBAN] Retry . . .
[SOKOBAN] Retry . . .
15 | 20
[SOKOBAN] Retry . . .
21 | 30
[SOKOBAN] Retry . . .
27 | 40
[SOKOBAN] Retry . . .
31 | 50
37 | 60
42 | 70
50 | 80
57 | 90
64 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 5300 Epsilon 0.200 Learning Rate 0.000010 Solved: 67
7 | 10
15 | 20
21 | 30
25 | 40
[SOKOBAN] Retry . . .
[SOKOBAN] Retry . . .
32 | 50
41 | 60
50 | 70
58 | 80
63 | 90
71 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 5400 Epsilon 0.200 Learning Rate 0.000010 Solved: 67
5 | 10
13 | 20
21 | 30
27 | 40
33 | 50
39 | 60
[SOKOBAN] Retry . . .
45 | 70
53 | 80
59 | 90
64 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 5500 Epsilon 0.200 Learning Rate 0.000010 Solved: 67
7 | 10
13 | 20
20 | 30
29 | 40
37 | 50
46 | 60
53 | 70
60 | 80
67 | 90
74 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 5600 Epsilon 0.200 Learning Rate 0.000010 Solved: 67
2 | 10
6 | 20
[SOKOBAN] Retry . . .
12 | 30
18 | 40
22 | 50
30 | 60
36 | 70
44 | 80
50 | 90
58 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 5700 Epsilon 0.200 Learning Rate 0.000010 Solved: 66
6 | 10
15 | 20
[SOKOBAN] Retry . . .
22 | 30
[SOKOBAN] Retry . . .
28 | 40
34 | 50
38 | 60
44 | 70
[SOKOBAN] Retry . . .
50 | 80
55 | 90
63 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 5800 Epsilon 0.200 Learning Rate 0.000010 Solved: 66
8 | 10
16 | 20
23 | 30
32 | 40
38 | 50
44 | 60
52 | 70
60 | 80
69 | 90
74 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 5900 Epsilon 0.200 Learning Rate 0.000010 Solved: 65
7 | 10
12 | 20
18 | 30
21 | 40
25 | 50
32 | 60
38 | 70
[SOKOBAN] Retry . . .
45 | 80
[SOKOBAN] Retry . . .
50 | 90
58 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 6000 Epsilon 0.200 Learning Rate 0.000010 Solved: 67
7 | 10
15 | 20
22 | 30
29 | 40
37 | 50
44 | 60
52 | 70
[SOKOBAN] Retry . . .
60 | 80
[SOKOBAN] Retry . . .
64 | 90
72 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 6100 Epsilon 0.200 Learning Rate 0.000010 Solved: 67
6 | 10
14 | 20
20 | 30
[SOKOBAN] Retry . . .
27 | 40
33 | 50
40 | 60
47 | 70
55 | 80
63 | 90
70 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 6200 Epsilon 0.200 Learning Rate 0.000010 Solved: 66
5 | 10
11 | 20
17 | 30
26 | 40
[SOKOBAN] Retry . . .
34 | 50
42 | 60
49 | 70
55 | 80
65 | 90
71 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 6300 Epsilon 0.200 Learning Rate 0.000010 Solved: 67
7 | 10
13 | 20
22 | 30
26 | 40
[SOKOBAN] Retry . . .
32 | 50
37 | 60
42 | 70
51 | 80
58 | 90
63 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 6400 Epsilon 0.200 Learning Rate 0.000009 Solved: 65
7 | 10
[SOKOBAN] Retry . . .
16 | 20
22 | 30
29 | 40
37 | 50
44 | 60
49 | 70
55 | 80
57 | 90
63 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 6500 Epsilon 0.200 Learning Rate 0.000009 Solved: 64
8 | 10
17 | 20
22 | 30
30 | 40
39 | 50
[SOKOBAN] Retry . . .
46 | 60
[SOKOBAN] Retry . . .
53 | 70
[SOKOBAN] Retry . . .
60 | 80
66 | 90
73 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 6600 Epsilon 0.200 Learning Rate 0.000009 Solved: 66
[SOKOBAN] Retry . . .
[SOKOBAN] Retry . . .
5 | 10
14 | 20
20 | 30
26 | 40
32 | 50
38 | 60
47 | 70
54 | 80
59 | 90
66 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 6700 Epsilon 0.200 Learning Rate 0.000009 Solved: 65
6 | 10
12 | 20
20 | 30
28 | 40
35 | 50
42 | 60
49 | 70
56 | 80
64 | 90
[SOKOBAN] Retry . . .
70 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 6800 Epsilon 0.200 Learning Rate 0.000009 Solved: 65
8 | 10
15 | 20
22 | 30
29 | 40
35 | 50
40 | 60
47 | 70
[SOKOBAN] Retry . . .
54 | 80
62 | 90
68 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 6900 Epsilon 0.200 Learning Rate 0.000009 Solved: 65
7 | 10
15 | 20
[SOKOBAN] Retry . . .
22 | 30
24 | 40
32 | 50
39 | 60
43 | 70
49 | 80
55 | 90
61 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 7000 Epsilon 0.200 Learning Rate 0.000009 Solved: 64
3 | 10
[SOKOBAN] Retry . . .
11 | 20
15 | 30
21 | 40
30 | 50
37 | 60
42 | 70
48 | 80
56 | 90
61 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 7100 Epsilon 0.200 Learning Rate 0.000009 Solved: 66
6 | 10
13 | 20
21 | 30
[SOKOBAN] Retry . . .
[SOKOBAN] Retry . . .
29 | 40
36 | 50
[SOKOBAN] Retry . . .
41 | 60
47 | 70
54 | 80
[SOKOBAN] Retry . . .
60 | 90
65 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 7200 Epsilon 0.200 Learning Rate 0.000009 Solved: 66
6 | 10
14 | 20
22 | 30
30 | 40
37 | 50
43 | 60
48 | 70
54 | 80
61 | 90
68 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 7300 Epsilon 0.200 Learning Rate 0.000009 Solved: 66
6 | 10
13 | 20
19 | 30
25 | 40
34 | 50
41 | 60
[SOKOBAN] Retry . . .
45 | 70
55 | 80
64 | 90
71 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 7400 Epsilon 0.200 Learning Rate 0.000009 Solved: 66
6 | 10
14 | 20
22 | 30
30 | 40
34 | 50
[SOKOBAN] Retry . . .
42 | 60
47 | 70
53 | 80
60 | 90
67 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 7500 Epsilon 0.200 Learning Rate 0.000009 Solved: 67
6 | 10
13 | 20
20 | 30
27 | 40
[SOKOBAN] Retry . . .
34 | 50
38 | 60
44 | 70
50 | 80
57 | 90
66 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 7600 Epsilon 0.200 Learning Rate 0.000009 Solved: 66
3 | 10
[SOKOBAN] Retry . . .
7 | 20
15 | 30
[SOKOBAN] Retry . . .
23 | 40
32 | 50
41 | 60
50 | 70
57 | 80
65 | 90
69 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 7700 Epsilon 0.200 Learning Rate 0.000009 Solved: 65
9 | 10
16 | 20
[SOKOBAN] Retry . . .
23 | 30
28 | 40
[SOKOBAN] Retry . . .
32 | 50
41 | 60
48 | 70
[SOKOBAN] Retry . . .
53 | 80
59 | 90
65 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 7800 Epsilon 0.200 Learning Rate 0.000009 Solved: 65
7 | 10
[SOKOBAN] Retry . . .
11 | 20
16 | 30
24 | 40
30 | 50
36 | 60
44 | 70
49 | 80
55 | 90
62 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 7900 Epsilon 0.200 Learning Rate 0.000009 Solved: 65
7 | 10
14 | 20
[SOKOBAN] Retry . . .
18 | 30
25 | 40
32 | 50
39 | 60
[SOKOBAN] Retry . . .
46 | 70
51 | 80
[SOKOBAN] Retry . . .
[SOKOBAN] Retry . . .
55 | 90
58 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 8000 Epsilon 0.200 Learning Rate 0.000009 Solved: 65
8 | 10
13 | 20
[SOKOBAN] Retry . . .
[SOKOBAN] Retry . . .
19 | 30
[SOKOBAN] Retry . . .
27 | 40
35 | 50
40 | 60
46 | 70
52 | 80
57 | 90
63 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 8100 Epsilon 0.200 Learning Rate 0.000009 Solved: 67
7 | 10
15 | 20
[SOKOBAN] Retry . . .
23 | 30
31 | 40
37 | 50
45 | 60
54 | 70
60 | 80
65 | 90
73 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 8200 Epsilon 0.200 Learning Rate 0.000009 Solved: 65
7 | 10
15 | 20
[SOKOBAN] Retry . . .
23 | 30
30 | 40
36 | 50
43 | 60
52 | 70
58 | 80
64 | 90
71 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 8300 Epsilon 0.200 Learning Rate 0.000009 Solved: 65
8 | 10
15 | 20
22 | 30
27 | 40
[SOKOBAN] Retry . . .
31 | 50
[SOKOBAN] Retry . . .
37 | 60
[SOKOBAN] Retry . . .
43 | 70
50 | 80
58 | 90
67 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 8400 Epsilon 0.200 Learning Rate 0.000009 Solved: 63
5 | 10
11 | 20
18 | 30
25 | 40
32 | 50
37 | 60
47 | 70
53 | 80
58 | 90
66 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 8500 Epsilon 0.200 Learning Rate 0.000009 Solved: 64
6 | 10
[SOKOBAN] Retry . . .
14 | 20
22 | 30
[SOKOBAN] Retry . . .
28 | 40
35 | 50
43 | 60
47 | 70
[SOKOBAN] Retry . . .
55 | 80
63 | 90
69 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 8600 Epsilon 0.200 Learning Rate 0.000009 Solved: 65
6 | 10
14 | 20
23 | 30
31 | 40
37 | 50
[SOKOBAN] Retry . . .
45 | 60
52 | 70
59 | 80
62 | 90
69 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 8700 Epsilon 0.200 Learning Rate 0.000009 Solved: 63
7 | 10
17 | 20
24 | 30
30 | 40
37 | 50
44 | 60
50 | 70
58 | 80
65 | 90
70 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 8800 Epsilon 0.200 Learning Rate 0.000009 Solved: 65
6 | 10
10 | 20
18 | 30
26 | 40
[SOKOBAN] Retry . . .
34 | 50
[SOKOBAN] Retry . . .
39 | 60
46 | 70
55 | 80
[SOKOBAN] Retry . . .
61 | 90
70 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 8900 Epsilon 0.200 Learning Rate 0.000009 Solved: 64
7 | 10
15 | 20
18 | 30
25 | 40
34 | 50
41 | 60
50 | 70
56 | 80
63 | 90
71 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 9000 Epsilon 0.200 Learning Rate 0.000009 Solved: 63
[SOKOBAN] Retry . . .
8 | 10
16 | 20
21 | 30
29 | 40
35 | 50
42 | 60
47 | 70
53 | 80
59 | 90
66 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 9100 Epsilon 0.200 Learning Rate 0.000009 Solved: 64
6 | 10
10 | 20
14 | 30
20 | 40
26 | 50
[SOKOBAN] Retry . . .
32 | 60
40 | 70
49 | 80
56 | 90
61 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 9200 Epsilon 0.200 Learning Rate 0.000009 Solved: 64
6 | 10
10 | 20
[SOKOBAN] Retry . . .
17 | 30
21 | 40
29 | 50
38 | 60
43 | 70
52 | 80
[SOKOBAN] Retry . . .
59 | 90
67 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 9300 Epsilon 0.200 Learning Rate 0.000009 Solved: 64
5 | 10
12 | 20
20 | 30
[SOKOBAN] Retry . . .
23 | 40
29 | 50
36 | 60
42 | 70
45 | 80
52 | 90
56 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 9400 Epsilon 0.200 Learning Rate 0.000009 Solved: 64
8 | 10
16 | 20
24 | 30
[SOKOBAN] Retry . . .
31 | 40
34 | 50
42 | 60
48 | 70
53 | 80
61 | 90
69 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 9500 Epsilon 0.200 Learning Rate 0.000009 Solved: 64
7 | 10
[SOKOBAN] Retry . . .
13 | 20
21 | 30
28 | 40
35 | 50
[SOKOBAN] Retry . . .
43 | 60
[SOKOBAN] Retry . . .
52 | 70
59 | 80
[SOKOBAN] Retry . . .
63 | 90
69 | 100


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

[SOKOBAN] Retry . . .

Episode 9600 Epsilon 0.200 Learning Rate 0.000009 Solved: 67
7 | 10
12 | 20
20 | 30
28 | 40
36 | 50
[SOKOBAN] Retry . . .
45 | 60
