# Final Project - Reinforcements Learning 
Hello dear students,<br> this is the template notebook. Please click on the "File" tab and then on "Save a copy into drive".

---
<br>

### Name and ID:
Student 1: Avraham Raviv, 204355390
<br>
Student 2: Yevgeni Berkovitch, 317079234
<br><br>
<img src="https://play-lh.googleusercontent.com/e_oKlKPISbgdzut1H9opevS7-LTB8-8lsmpCdMkhlnqFenZhpjxbLmx7l158-xQQCIY">

### https://github.com/mpSchrader/gym-sokoban

# Installs

In [1]:
%%capture
!sudo apt-get update
!sudo apt-get install -y xvfb ffmpeg freeglut3-dev
!pip install 'imageio==2.4.0'
!pip install gym
!pip install pygame
!apt-get install python-opengl -y
!apt install xvfb -y
!pip install pyvirtualdisplay
!pip install piglet
!pip install gym
!apt-get install python-opengl -y
!apt install xvfb -y
!pip install gym_sokoban

!imageio_download_bin ffmpeg

# Imports

In [2]:
import random
import time

import numpy as np
import scipy as scp
import matplotlib.pyplot as plt

import base64
import imageio
from pyvirtualdisplay import Display
from IPython.display import HTML

import gym
from gym import error, spaces, utils
from soko_pap import *

from collections import deque
from queue import PriorityQueue

from keras.models import Sequential
from keras.layers import Dense

from tqdm.notebook import tqdm
from collections import defaultdict

In [3]:
%matplotlib inline

In [4]:
imageio.plugins.ffmpeg.download()

In [5]:
from gym import logger as gymlogger
gymlogger.set_level(40) # error only

# Display utils
The cell below contains the video display configuration. No need to make changes here.

In [6]:
def embed_mp4(filename):
    """Embeds an mp4 file in the notebook."""
    video = open(filename,'rb').read()
    b64 = base64.b64encode(video)
    tag = '''
    <video width="640" height="480" controls>
    <source src="data:video/mp4;base64,{0}" type="video/mp4">
    Your browser does not support the video tag.
    </video>'''.format(b64.decode())

    return HTML(tag)

# Utils

In [7]:
def get_distances(room_state):
    for i in range(room_state.shape[0]):
        for j in range(room_state.shape[1]):
            if room_state[i][j] == 2:
                target = (i, j)

    distances = np.zeros(shape=room_state.shape)
    visited_cells = set()
    cell_queue = deque()

    visited_cells.add(target)
    cell_queue.appendleft(target)

    while len(cell_queue) != 0:
        cell = cell_queue.pop()
        distance = distances[cell[0]][cell[1]]
        for x,y in ((1,0), (-1,-0), (0,1), (0,-1)):
            next_cell_x, next_cell_y = cell[0]+x, cell[1]+y
            if room_state[next_cell_x][next_cell_y] != 0 and not (next_cell_x, next_cell_y) in visited_cells:
                distances[next_cell_x][next_cell_y] = distance + 1
                visited_cells.add((next_cell_x, next_cell_y))
                cell_queue.appendleft((next_cell_x, next_cell_y))
                
    return distances   

def calc_distances(room_state, distances):
    box = None
    mover = None
    for i in range(room_state.shape[0]):
        for j in range(room_state.shape[1]):            
            if room_state[i][j] == 4:
                box = (i,j)
            
            if room_state[i][j] == 5:
                mover = (i,j)
    
    return mover, box, distances[box[0]][box[1]]   

def box2target_change_reward(room_state, next_room_state, distances):
    if np.array_equal(room_state, next_room_state):
        return -10.0
    
    mover, box, t2b = calc_distances(room_state, distances)
    n_mover, n_box, n_t2b = calc_distances(next_room_state, distances)
    
    change_reward = 0.0
    if n_t2b < t2b:
        change_reward += 10.0
    elif n_t2b > t2b:
        change_reward -= 10.0
        
    m2b = np.sqrt((mover[0]-box[0])**2 + (mover[1]-box[1])**2)
    n_m2b = np.sqrt((n_mover[0]-n_box[0])**2 + (n_mover[1]-n_box[1])**2)
    
    if n_m2b < m2b and m2b >= 2:
        change_reward += 10.0
    elif n_m2b > m2b and n_m2b >= 2:
        change_reward -= 10.0
        
    return change_reward   

# Solution

In [8]:
class SOK_Agent:
    def __init__(self):
        # Construct DQN models
        self.state_size = (25,) 
        self.action_size = 8
        self.model = self._build_model()
        self.target_model = self._build_model()
        self.target_model.set_weights(self.model.get_weights())
        self.batch_size = 8
        
        # Replay buffers
        self.replay_buffer = deque(maxlen=5000)
        self.prioritized_replay_buffer = deque(maxlen=500)
        
        # Hyperparameters
        self.gamma = 0.9
        self.epsilon = 1.0   
        self.epsilon_min = 0.3
        self.epsilon_decay = 0.995
        self.replay_rate = 10
        self.update_beta = 0.999

        self.verbosity = 100 

    def _build_model(self):
        model = Sequential()
        model.add(Dense(25, input_shape=self.state_size, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer="adam")        
        return model

    def remember(self, state, action, reward, next_state, done):
        self.replay_buffer.append([state, action, reward, next_state, done])    
        
    def copy_to_prioritized_buffer(self, n):
        for i in range(n):
            self.prioritized_replay_buffer.append(self.replay_buffer[-1-i])  

    def act(self, state, stochastic=False):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        
        act_values = self.model.predict(state, verbose=0)[0]
        
        if stochastic:
            act_probs = np.exp(act_values)/np.exp(act_values).sum()
            return np.random.choice(np.arange(self.action_size), size=1, p=act_probs)[0]
              
        return np.argmax(act_values) 

    def replay(self): 
        if len(self.replay_buffer) < self.batch_size:
            return
        
        if len(self.prioritized_replay_buffer) < self.batch_size//2:
            minibatch = random.sample(self.replay_buffer, self.batch_size) 
        else:    
            minibatch = random.sample(self.replay_buffer, self.batch_size//2) 
            minibatch.extend(random.sample(self.prioritized_replay_buffer, self.batch_size//2))
        
        states = np.zeros((self.batch_size, self.state_size[0]))
        actions = np.zeros(self.batch_size, dtype=int)
        rewards = np.zeros(self.batch_size)
        next_states = np.zeros((self.batch_size, self.state_size[0]))
        statuses = np.zeros(self.batch_size)
        targets = np.zeros((self.batch_size, self.action_size)) 
        
        for i, (state, action, reward, next_state, done) in enumerate(minibatch): 
            states[i] = state.copy()
            actions[i] = action
            rewards[i] = reward
            next_states[i] = next_state.copy()
            statuses[i] = 1 if done else 0    
        
        targets = self.model.predict(states) 
        max_actions = np.argmax(self.model.predict(next_states), axis=1)
        next_rewards = self.target_model.predict(next_states)
        
        ind = 0
        for action, reward, next_reward, max_action, done in zip(actions, rewards, next_rewards, max_actions, statuses):  
            if not done:
                reward += self.gamma * next_reward[max_action]
            targets[ind][action] = reward
            ind += 1
        
        self.model.fit(states, targets, epochs=10, verbose=0) 
        
        self.update_target_model()        
    
        if self.epsilon > self.epsilon_min:
            self.epsilon = self.epsilon * self.epsilon_decay
        
    def update_target_model(self):
        model_w = self.model.get_weights()
        target_model_w = self.target_model.get_weights()
        updated_target_model_w = []
        for i in range(len(model_w)):
            updated_target_model_w.append(self.update_beta*target_model_w[i] + (1-self.update_beta)*model_w[i])
        self.target_model.set_weights(updated_target_model_w)    
            
    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)

In [9]:
def process_frame(frame):
    f = frame[16:96, 16:96, 0]   
    f = f.reshape(5, 16, 5, 16).max(axis=(1, 3))
    f = f.flatten()
    f = f / 255
    return np.expand_dims(f, axis=0)

## Training

#### Test Suite

In [10]:
def test_agent(stochastic=False):
    current_epsilon = agent.epsilon
    agent.epsilon = 0.0
    num_solved = 0
    solved_in_steps = defaultdict(int)

    for t in tqdm(range(100)):    
        random.seed(t)
        sok = PushAndPullSokobanEnv(dim_room=(7, 7), num_boxes=1)
        sok.set_maxsteps(20)
        steps = 0

        state = sok.get_image('rgb_array')
        done = False
        while not done:
            steps += 1
            action = agent.act(process_frame(state), stochastic)
            if action < 4:
                action += 1
            else:
                action += 5
            state, reward, done, info = sok.step(action)

        if 3 in sok.room_state:            
            num_solved += 1
            solved_in_steps[steps] += 1
    
    agent.epsilon = current_epsilon
    print("*" * 30)
    print("Stochastic" if stochastic else "Deterministic")
    print("*" * 30)
    print("Solved: %d" % num_solved)
    print("=" * 30)
    print(solved_in_steps)
    print("*" * 30)

In [11]:
max_episodes = 10000
max_steps = 50

def init_sok(r):
    random.seed(r+100)
    sok = PushAndPullSokobanEnv(dim_room=(7, 7), num_boxes=1)
    sok.set_maxsteps(max_steps)
    return sok

In [None]:
agent = SOK_Agent()

steps_per_episode = []

for e in range(max_episodes):
    if e % 100 == 0 and e > 0:
        test_agent(stochastic=False)
        
    print("Episode: %d" % (e+1))
    
    sok = init_sok(e)
    random.seed(e)
    
    state = process_frame(sok.get_image('rgb_array'))
    room_state = sok.room_state.copy() 
    distances = get_distances(room_state)
    
    for step in range(sok.max_steps):
        action = agent.act(state)
        if action < 4:
            next_state, reward, done, _ = sok.step(action+1) 
        else:
            next_state, reward, done, _ = sok.step(action+5)         
        
        next_state = process_frame(next_state)        
        next_room_state = sok.room_state
        
        if not done:
            reward += box2target_change_reward(room_state, next_room_state, distances)
        
        agent.remember(state, action, reward, next_state, done)
        
        state = next_state.copy() 
        room_state = next_room_state.copy()                
        
        if (step+1) % agent.replay_rate == 0:
            agent.replay()            
        
        if done:   
            steps_per_episode.append(step+1)
            
            if 3 in sok.room_state:                
                print("SOLVED! Episode %d Steps: %d Epsilon %.4f" % (e+1, step+1, agent.epsilon)) 
                agent.copy_to_prioritized_buffer(step+1)
            
            #agent.save("exp1_episode%d.h5" % (e))            
            break

Episode: 1
Episode: 2
Episode: 3
Episode: 4
Episode: 5
Episode: 6
SOLVED! Episode 6 Steps: 31 Epsilon 0.8691
Episode: 7
Episode: 8
SOLVED! Episode 8 Steps: 7 Epsilon 0.8475
Episode: 9
Episode: 10
Episode: 11
Episode: 12
SOLVED! Episode 12 Steps: 10 Epsilon 0.7822
Episode: 13
Episode: 14
SOLVED! Episode 14 Steps: 15 Epsilon 0.7590
Episode: 15
SOLVED! Episode 15 Steps: 1 Epsilon 0.7590
Episode: 16
Episode: 17
Episode: 18
Episode: 19
Episode: 20
Episode: 21
SOLVED! Episode 21 Steps: 3 Epsilon 0.6696
Episode: 22
Episode: 23
SOLVED! Episode 23 Steps: 11 Epsilon 0.6498
Episode: 24
Episode: 25
Episode: 26
Episode: 27
Episode: 28
Episode: 29
SOLVED! Episode 29 Steps: 12 Epsilon 0.5704
Episode: 30
SOLVED! Episode 30 Steps: 15 Epsilon 0.5676
Episode: 31
Episode: 32
Episode: 33
Episode: 34
Episode: 35
Episode: 36
Episode: 37
Episode: 38
Episode: 39
Episode: 40
Episode: 41
Episode: 42
SOLVED! Episode 42 Steps: 3 Epsilon 0.4308
Episode: 43
Episode: 44
Episode: 45
Episode: 46
Episode: 47
SOLVED! Epi

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 17
defaultdict(<class 'int'>, {3: 9, 2: 3, 1: 5})
******************************
Episode: 101
Episode: 102
Episode: 103
Episode: 104
Episode: 105
SOLVED! Episode 105 Steps: 3 Epsilon 0.2988
Episode: 106
Episode: 107
SOLVED! Episode 107 Steps: 2 Epsilon 0.2988
Episode: 108
SOLVED! Episode 108 Steps: 3 Epsilon 0.2988
Episode: 109
Episode: 110
Episode: 111
Episode: 112
Episode: 113
Episode: 114
Episode: 115
SOLVED! Episode 115 Steps: 1 Epsilon 0.2988
Episode: 116
Episode: 117
SOLVED! Episode 117 Steps: 3 Epsilon 0.2988
Episode: 118
Episode: 119
Episode: 120
Episode: 121
SOLVED! Episode 121 Steps: 3 Epsilon 0.2988
Episode: 122
Episode: 123
Episode: 124
Episode: 125
SOLVED! Episode 125 Steps: 3 Epsilon 0.2988
Episode: 126
SOLVED! Episode 126 Steps: 50 Epsilon 0.2988
Episode: 127
Episode: 128
Episode: 129
Episode: 130
SOLVED! Episode 130 Steps: 1 Epsilon 0.2988
Episode: 131
Episode: 132
Episode: 133
Episode:

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 24
defaultdict(<class 'int'>, {3: 11, 2: 4, 1: 9})
******************************
Episode: 201
SOLVED! Episode 201 Steps: 2 Epsilon 0.2988
Episode: 202
Episode: 203
Episode: 204
Episode: 205
Episode: 206
Episode: 207
Episode: 208
Episode: 209
Episode: 210
SOLVED! Episode 210 Steps: 2 Epsilon 0.2988
Episode: 211
Episode: 212
Episode: 213
Episode: 214
Episode: 215
Episode: 216
Episode: 217
SOLVED! Episode 217 Steps: 3 Epsilon 0.2988
Episode: 218
SOLVED! Episode 218 Steps: 45 Epsilon 0.2988
Episode: 219
SOLVED! Episode 219 Steps: 12 Epsilon 0.2988
Episode: 220
Episode: 221
SOLVED! Episode 221 Steps: 6 Epsilon 0.2988
Episode: 222
SOLVED! Episode 222 Steps: 1 Epsilon 0.2988
Episode: 223
Episode: 224
SOLVED! Episode 224 Steps: 1 Epsilon 0.2988
Episode: 225
Episode: 226
Episode: 227
Episode: 228
Episode: 229
Episode: 230
Episode: 231
Episode: 232
Episode: 233
Episode: 234
SOLVED! Episode 234 Steps: 1 Epsilon 

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 29
defaultdict(<class 'int'>, {3: 15, 1: 9, 2: 5})
******************************
Episode: 301
SOLVED! Episode 301 Steps: 1 Epsilon 0.2988
Episode: 302
Episode: 303
SOLVED! Episode 303 Steps: 13 Epsilon 0.2988
Episode: 304
Episode: 305
Episode: 306
Episode: 307
Episode: 308
Episode: 309
Episode: 310
Episode: 311
Episode: 312
Episode: 313
Episode: 314
Episode: 315
Episode: 316
Episode: 317
Episode: 318
Episode: 319
SOLVED! Episode 319 Steps: 2 Epsilon 0.2988
Episode: 320
Episode: 321
Episode: 322
Episode: 323
SOLVED! Episode 323 Steps: 14 Epsilon 0.2988
Episode: 324
Episode: 325
SOLVED! Episode 325 Steps: 1 Epsilon 0.2988
Episode: 326
Episode: 327
Episode: 328
Episode: 329
Episode: 330
Episode: 331
SOLVED! Episode 331 Steps: 2 Epsilon 0.2988
Episode: 332
Episode: 333
Episode: 334
Episode: 335
SOLVED! Episode 335 Steps: 6 Epsilon 0.2988
Episode: 336
SOLVED! Episode 336 Steps: 32 Epsilon 0.2988
Episode: 3

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 26
defaultdict(<class 'int'>, {3: 16, 1: 6, 2: 4})
******************************
Episode: 401
Episode: 402
Episode: 403
Episode: 404
SOLVED! Episode 404 Steps: 3 Epsilon 0.2988
Episode: 405
SOLVED! Episode 405 Steps: 3 Epsilon 0.2988
Episode: 406
SOLVED! Episode 406 Steps: 1 Epsilon 0.2988
Episode: 407
Episode: 408
Episode: 409
Episode: 410
Episode: 411
Episode: 412
Episode: 413
Episode: 414
SOLVED! Episode 414 Steps: 3 Epsilon 0.2988
Episode: 415
Episode: 416
Episode: 417
Episode: 418
Episode: 419
SOLVED! Episode 419 Steps: 3 Epsilon 0.2988
Episode: 420
Episode: 421
Episode: 422
Episode: 423
Episode: 424
SOLVED! Episode 424 Steps: 1 Epsilon 0.2988
Episode: 425
SOLVED! Episode 425 Steps: 50 Epsilon 0.2988
Episode: 426
Episode: 427
Episode: 428
Episode: 429
SOLVED! Episode 429 Steps: 2 Epsilon 0.2988
Episode: 430
Episode: 431
SOLVED! Episode 431 Steps: 45 Epsilon 0.2988
Episode: 432
Episode: 433
Episod

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 23
defaultdict(<class 'int'>, {3: 11, 1: 9, 2: 3})
******************************
Episode: 501
SOLVED! Episode 501 Steps: 2 Epsilon 0.2988
Episode: 502
Episode: 503
Episode: 504
Episode: 505
Episode: 506
Episode: 507
SOLVED! Episode 507 Steps: 14 Epsilon 0.2988
Episode: 508
Episode: 509
Episode: 510
Episode: 511
SOLVED! Episode 511 Steps: 32 Epsilon 0.2988
Episode: 512
SOLVED! Episode 512 Steps: 2 Epsilon 0.2988
Episode: 513
SOLVED! Episode 513 Steps: 1 Epsilon 0.2988
Episode: 514
Episode: 515
Episode: 516
Episode: 517
Episode: 518
Episode: 519
Episode: 520
Episode: 521
SOLVED! Episode 521 Steps: 1 Epsilon 0.2988
Episode: 522
Episode: 523
SOLVED! Episode 523 Steps: 3 Epsilon 0.2988
Episode: 524
SOLVED! Episode 524 Steps: 46 Epsilon 0.2988
Episode: 525
Episode: 526
Episode: 527
Episode: 528
Episode: 529
SOLVED! Episode 529 Steps: 8 Epsilon 0.2988
Episode: 530
Episode: 531
SOLVED! Episode 531 Steps: 1 Ep

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 27
defaultdict(<class 'int'>, {3: 12, 1: 10, 2: 5})
******************************
Episode: 601
SOLVED! Episode 601 Steps: 3 Epsilon 0.2988
Episode: 602
Episode: 603
SOLVED! Episode 603 Steps: 1 Epsilon 0.2988
Episode: 604
Episode: 605
Episode: 606
Episode: 607
Episode: 608
SOLVED! Episode 608 Steps: 1 Epsilon 0.2988
Episode: 609
SOLVED! Episode 609 Steps: 1 Epsilon 0.2988
Episode: 610
Episode: 611
Episode: 612
SOLVED! Episode 612 Steps: 1 Epsilon 0.2988
Episode: 613
Episode: 614
SOLVED! Episode 614 Steps: 1 Epsilon 0.2988
Episode: 615
Episode: 616
SOLVED! Episode 616 Steps: 34 Epsilon 0.2988
Episode: 617
Episode: 618
Episode: 619
Episode: 620
Episode: 621
SOLVED! Episode 621 Steps: 3 Epsilon 0.2988
Episode: 622
Episode: 623
Episode: 624
Episode: 625
Episode: 626
SOLVED! Episode 626 Steps: 2 Epsilon 0.2988
Episode: 627
Episode: 628
Episode: 629
SOLVED! Episode 629 Steps: 3 Epsilon 0.2988
Episode: 630
E

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 30
defaultdict(<class 'int'>, {3: 12, 1: 12, 2: 6})
******************************
Episode: 701
Episode: 702
Episode: 703
SOLVED! Episode 703 Steps: 6 Epsilon 0.2988
Episode: 704
Episode: 705
Episode: 706
SOLVED! Episode 706 Steps: 35 Epsilon 0.2988
Episode: 707
Episode: 708
Episode: 709
Episode: 710
Episode: 711
Episode: 712
SOLVED! Episode 712 Steps: 3 Epsilon 0.2988
Episode: 713
Episode: 714
Episode: 715
Episode: 716
Episode: 717
Episode: 718
Episode: 719
SOLVED! Episode 719 Steps: 3 Epsilon 0.2988
Episode: 720
Episode: 721
Episode: 722
SOLVED! Episode 722 Steps: 2 Epsilon 0.2988
Episode: 723
Episode: 724
Episode: 725
Episode: 726
Episode: 727
Episode: 728
Episode: 729
SOLVED! Episode 729 Steps: 30 Epsilon 0.2988
Episode: 730
Episode: 731
Episode: 732
Episode: 733
SOLVED! Episode 733 Steps: 3 Epsilon 0.2988
Episode: 734
SOLVED! Episode 734 Steps: 3 Epsilon 0.2988
Episode: 735
Episode: 736
Episode: 7

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 27
defaultdict(<class 'int'>, {3: 13, 2: 7, 1: 7})
******************************
Episode: 801
Episode: 802
Episode: 803
Episode: 804
Episode: 805
Episode: 806
SOLVED! Episode 806 Steps: 3 Epsilon 0.2988
Episode: 807
SOLVED! Episode 807 Steps: 3 Epsilon 0.2988
Episode: 808
Episode: 809
SOLVED! Episode 809 Steps: 11 Epsilon 0.2988
Episode: 810
Episode: 811
Episode: 812
Episode: 813
Episode: 814
SOLVED! Episode 814 Steps: 42 Epsilon 0.2988
Episode: 815
Episode: 816
Episode: 817
Episode: 818
Episode: 819
Episode: 820
Episode: 821
Episode: 822
Episode: 823
Episode: 824
SOLVED! Episode 824 Steps: 2 Epsilon 0.2988
Episode: 825
Episode: 826
Episode: 827
SOLVED! Episode 827 Steps: 13 Epsilon 0.2988
Episode: 828
SOLVED! Episode 828 Steps: 3 Epsilon 0.2988
Episode: 829
SOLVED! Episode 829 Steps: 2 Epsilon 0.2988
Episode: 830
Episode: 831
Episode: 832
Episode: 833
SOLVED! Episode 833 Steps: 7 Epsilon 0.2988
Episo

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


******************************
Deterministic
******************************
Solved: 29
defaultdict(<class 'int'>, {3: 15, 2: 6, 1: 8})
******************************
Episode: 901
Episode: 902
Episode: 903
SOLVED! Episode 903 Steps: 3 Epsilon 0.2988
Episode: 904
Episode: 905
Episode: 906
Episode: 907
Episode: 908
Episode: 909
Episode: 910
SOLVED! Episode 910 Steps: 33 Epsilon 0.2988
Episode: 911
Episode: 912
SOLVED! Episode 912 Steps: 2 Epsilon 0.2988
Episode: 913
Episode: 914
Episode: 915
Episode: 916
Episode: 917
Episode: 918
Episode: 919
Episode: 920
Episode: 921
SOLVED! Episode 921 Steps: 2 Epsilon 0.2988
Episode: 922
SOLVED! Episode 922 Steps: 17 Epsilon 0.2988
Episode: 923
Episode: 924
Episode: 925
Episode: 926
Episode: 927
Episode: 928
Episode: 929
Episode: 930
Episode: 931
Episode: 932
Episode: 933
Episode: 934
Episode: 935
Episode: 936
Episode: 937
Episode: 938
SOLVED! Episode 938 Steps: 2 Epsilon 0.2988
Episode: 939
Episode: 940
Episode: 941
Episode: 942
Episode: 943
Episode: