# Final Project - Reinforcements Learning 
Hello dear students,<br> this is the template notebook. Please click on the "File" tab and then on "Save a copy into drive".

---
<br>

### Name and ID:
Student 1: Avraham Raviv, 204355390
<br>
Student 2: Yevgeni Berkovitch, 317079234
<br><br>
<img src="https://play-lh.googleusercontent.com/e_oKlKPISbgdzut1H9opevS7-LTB8-8lsmpCdMkhlnqFenZhpjxbLmx7l158-xQQCIY">

### https://github.com/mpSchrader/gym-sokoban

# Installs

In [1]:
%%capture
!sudo apt-get update
!sudo apt-get install -y xvfb ffmpeg freeglut3-dev
!pip install 'imageio==2.4.0'
!pip install gym
!pip install pygame
!apt-get install python-opengl -y
!apt install xvfb -y
!pip install pyvirtualdisplay
!pip install piglet
!pip install gym
!apt-get install python-opengl -y
!apt install xvfb -y
!pip install gym_sokoban

!imageio_download_bin ffmpeg

# Imports

In [2]:
import random
import time

import numpy as np
import matplotlib.pyplot as plt

import base64
import imageio
from pyvirtualdisplay import Display
from IPython.display import HTML

import gym
from gym import error, spaces, utils
from soko_pap import *

from collections import deque
from queue import PriorityQueue

from keras.models import Sequential
from keras.layers import Conv2D, Dense, Flatten

In [3]:
%matplotlib inline

In [4]:
imageio.plugins.ffmpeg.download()

In [5]:
from gym import logger as gymlogger
gymlogger.set_level(40) # error only

# Display utils
The cell below contains the video display configuration. No need to make changes here.

In [6]:
def embed_mp4(filename):
    """Embeds an mp4 file in the notebook."""
    video = open(filename,'rb').read()
    b64 = base64.b64encode(video)
    tag = '''
    <video width="640" height="480" controls>
    <source src="data:video/mp4;base64,{0}" type="video/mp4">
    Your browser does not support the video tag.
    </video>'''.format(b64.decode())

    return HTML(tag)

# Utils

In [7]:
def get_distances(room_state):
    for i in range(room_state.shape[0]):
        for j in range(room_state.shape[1]):
            if room_state[i][j] == 2:
                target = (i, j)

    distances = np.zeros(shape=room_state.shape)
    visited_cells = set()
    cell_queue = deque()

    visited_cells.add(target)
    cell_queue.appendleft(target)

    while len(cell_queue) != 0:
        cell = cell_queue.pop()
        distance = distances[cell[0]][cell[1]]
        for x,y in ((1,0), (-1,-0), (0,1), (0,-1)):
            next_cell_x, next_cell_y = cell[0]+x, cell[1]+y
            if room_state[next_cell_x][next_cell_y] != 0 and not (next_cell_x, next_cell_y) in visited_cells:
                distances[next_cell_x][next_cell_y] = distance + 1
                visited_cells.add((next_cell_x, next_cell_y))
                cell_queue.appendleft((next_cell_x, next_cell_y))
                
    return distances   

def calc_distances(room_state, distances):
    box = None
    mover = None
    for i in range(room_state.shape[0]):
        for j in range(room_state.shape[1]):            
            if room_state[i][j] == 4:
                box = (i,j)
            
            if room_state[i][j] == 5:
                mover = (i,j)
    
    return mover, box, distances[box[0]][box[1]]   

def box2target_change_reward(room_state, next_room_state, distances):
    if np.array_equal(room_state, next_room_state):
        return -0.5
    
    mover, box, t2b = calc_distances(room_state, distances)
    n_mover, n_box, n_t2b = calc_distances(next_room_state, distances)
    
    change_reward = 0.0
    if n_t2b < t2b:
        change_reward += 1.0
    elif n_t2b > t2b:
        change_reward -= 1.0
        
    m2b = np.sqrt((mover[0]-box[0])**2 + (mover[1]-box[1])**2)
    n_m2b = np.sqrt((n_mover[0]-n_box[0])**2 + (n_mover[1]-n_box[1])**2)
    
    if n_m2b < m2b and m2b >= 2:
        change_reward += 0.25
    elif n_m2b > m2b and n_m2b >= 2:
        change_reward -= 0.25
        
    return change_reward   

# Solution

In [13]:
class SOK_Agent:
    def __init__(self):
        # Construct DQN models
        self.state_size = (5, 5, 1) 
        self.action_size = 8
        self.model = self._build_model()
        self.target_model = self._build_model()
        self.target_model.set_weights(self.model.get_weights())
        self.batch_size = 8
        
        # Replay buffers
        self.replay_buffer = deque(maxlen=1500)
        self.prioritized_replay_buffer = deque(maxlen=500)
        self.prioritized_replay_batch = 20        
        
        # Hyperparameters
        self.gamma = 0.9
        self.epsilon = 1.0   
        self.epsilon_min = 0.05
        self.epsilon_decay = 0.99995
        self.replay_rate = 10
        self.update_beta = 0.99

        self.verbosity = 100 
        
        self.action_rotation_map = {
            0: 2,
            1: 3,
            2: 1,
            3: 0,
            4: 6,
            5: 7,
            6: 5,
            7: 4
        }

    def _build_model(self):
        model = Sequential()
        model.add(Conv2D(64, (3, 3), input_shape=self.state_size))
        model.add(Flatten())
        model.add(Dense(512, activation='relu'))
        model.add(Dense(64, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer="adam") 
        return model

    def remember(self, state, action, reward, next_state, done):
        self.replay_buffer.append([state, action, reward, next_state, done])    
        
    def copy_to_prioritized_buffer(self, n):
        for i in range(n):
            self.prioritized_replay_buffer.append(self.replay_buffer[-1-i])  

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        
        act_values = self.model.predict(state, verbose=0)
        return np.argmax(act_values[0]) 

    def replay(self):        
        minibatch = random.sample(self.replay_buffer, self.batch_size//2) 
        minibatch.extend(random.sample(self.prioritized_replay_buffer, self.batch_size//2))
        
        states = np.zeros((self.batch_size*4, self.state_size[0], self.state_size[1]))
        actions = np.zeros(self.batch_size*4, dtype=int)
        rewards = np.zeros(self.batch_size*4)
        next_states = np.zeros((self.batch_size*4, self.state_size[0], self.state_size[1]))
        statuses = np.zeros(self.batch_size*4)
        targets = np.zeros((self.batch_size*4, self.action_size)) 
        
        for i, (state, action, reward, next_state, done) in enumerate(minibatch): 
            for rot in range(4):  
                ind = i*4+rot
                if rot != 0:
                    state = np.rot90(state, axes=(1,2))
                    next_state = np.rot90(next_state, axes=(1,2))
                    action = self.action_rotation_map.get(action)

                states[ind] = state.copy()
                actions[ind] = action
                rewards[ind] = reward
                next_states[ind] = next_state.copy()
                statuses[ind] = 1 if done else 0        

        targets = self.model.predict(states) 
        max_actions = np.argmax(self.model.predict(next_states), axis=1)
        next_rewards = self.target_model.predict(next_states)
        
        ind = 0
        for action, reward, next_reward, max_action, done in zip(actions, rewards, next_rewards, max_actions, statuses):  
            if not done:
                reward += self.gamma * next_reward[max_action]
            targets[ind][action] = reward
            ind += 1
        
        self.model.fit(states, targets, epochs=1, verbose=0)
                
    def update_epsilon(self):
        if self.epsilon > self.epsilon_min:
            self.epsilon = self.epsilon * self.epsilon_decay
        
    def update_target_model(self):
        model_w = self.model.get_weights()
        target_model_w = self.target_model.get_weights()
        updated_target_model_w = []
        for i in range(len(model_w)):
            updated_target_model_w.append(self.update_beta*target_model_w[i] + (1-self.update_beta)*model_w[i])
        self.target_model.set_weights(updated_target_model_w)    
            
    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)

In [14]:
def process_frame(frame):
    f = frame[16:96, 16:96, 0]   
    f = f.reshape(5, 16, 5, 16).max(axis=(1, 3))
    #f = f.flatten()
    f = f / 255
    return np.expand_dims(f, axis=0)

## Training

In [19]:
max_episodes = 25000
max_steps = 100

def init_sok(r):
    random.seed(r%100)
    sok = PushAndPullSokobanEnv(dim_room=(7, 7), num_boxes=1)
    sok.set_maxsteps(max_steps)
    return sok

In [20]:
agent = SOK_Agent()

continuous_successes_goal = 100
continuous_successes = 0

total_steps = 0
solved_puzzles = []
steps_per_episode = []

puzzle_run = 0
for e in range(max_episodes):
    if continuous_successes >= continuous_successes_goal:
        print("Agent training finished!")
        break
    
    print("Episode: %d" % (e))
    
    sok = init_sok(e)
    state = process_frame(sok.get_image('rgb_array'))
    random.seed(e)
    
    room_state = sok.room_state.copy() 
    distances = get_distances(room_state)
    
    for step in range(sok.max_steps):
        total_steps += 1
        current_epsilon = agent.epsilon
        if current_epsilon < 0.1 and step >= 20:
            agent.epsilon = 0.9 
        action = agent.act(state)
        agent.epsilon = current_epsilon
        
        if action < 4:
            next_state, reward, done, _ = sok.step(action+1) 
        else:
            next_state, reward, done, _ = sok.step(action+5)         
        
        next_state = process_frame(next_state)        
        next_room_state = sok.room_state
        
        if not done:
            reward += box2target_change_reward(room_state, next_room_state, distances)
        
        agent.remember(state, action, reward, next_state, done)
        
        state = next_state.copy() 
        room_state = next_room_state.copy()
                
        if e >= 100:
            if (total_steps+1) % agent.replay_rate == 0:
                agent.replay() 
                agent.update_target_model()
                agent.update_epsilon()
        
        if done:            
            if 3 in sok.room_state:
                puzzle_run += 1
                continuous_successes += 1
                print("SOLVED! Steps: %d {%d}" % (step+1, puzzle_run)) 
                agent.copy_to_prioritized_buffer(min(agent.prioritized_replay_batch, step+1))
            else:
                continuous_successes = 0
                
            if (e+1) % 100 == 0:
                solved_puzzles.append(puzzle_run)
                print("*"*50)
                print("%s" % solved_puzzles[-10:])
                print("*"*50)
                puzzle_run = 0
                
            steps_per_episode.append(step+1)
            agent.save("saved_models\episode%d.h5" % (e))
            
            break

Episode: 0
Episode: 1
Episode: 2
Episode: 3
SOLVED! Steps: 54 {1}
Episode: 4
SOLVED! Steps: 1 {2}
Episode: 5
SOLVED! Steps: 28 {3}
Episode: 6
Episode: 7
SOLVED! Steps: 25 {4}
Episode: 8
Episode: 9
SOLVED! Steps: 74 {5}
Episode: 10
Episode: 11
SOLVED! Steps: 27 {6}
Episode: 12
Episode: 13
Episode: 14
Episode: 15
SOLVED! Steps: 54 {7}
Episode: 16
Episode: 17
Episode: 18
Episode: 19
Episode: 20
Episode: 21
Episode: 22
Episode: 23
Episode: 24
Episode: 25
Episode: 26
Episode: 27
Episode: 28
Episode: 29
SOLVED! Steps: 12 {8}
Episode: 30
SOLVED! Steps: 59 {9}
Episode: 31
Episode: 32
Episode: 33
Episode: 34
Episode: 35
Episode: 36
Episode: 37
SOLVED! Steps: 11 {10}
Episode: 38
Episode: 39
SOLVED! Steps: 12 {11}
Episode: 40
Episode: 41
Episode: 42
Episode: 43
Episode: 44
Episode: 45
Episode: 46
SOLVED! Steps: 43 {12}
Episode: 47
Episode: 48
Episode: 49
Episode: 50
Episode: 51
Episode: 52
SOLVED! Steps: 2 {13}
Episode: 53
SOLVED! Steps: 6 {14}
Episode: 54
SOLVED! Steps: 22 {15}
Episode: 55
SOLVE

Episode: 388
Episode: 389
Episode: 390
SOLVED! Steps: 13 {33}
Episode: 391
SOLVED! Steps: 83 {34}
Episode: 392
Episode: 393
SOLVED! Steps: 8 {35}
Episode: 394
Episode: 395
Episode: 396
SOLVED! Steps: 24 {36}
Episode: 397
Episode: 398
SOLVED! Steps: 17 {37}
Episode: 399
**************************************************
[30, 29, 40, 37]
**************************************************
Episode: 400
SOLVED! Steps: 38 {1}
Episode: 401
SOLVED! Steps: 26 {2}
Episode: 402
Episode: 403
SOLVED! Steps: 17 {3}
Episode: 404
SOLVED! Steps: 38 {4}
Episode: 405
Episode: 406
Episode: 407
SOLVED! Steps: 38 {5}
Episode: 408
SOLVED! Steps: 14 {6}
Episode: 409
SOLVED! Steps: 82 {7}
Episode: 410
SOLVED! Steps: 35 {8}
Episode: 411
SOLVED! Steps: 4 {9}
Episode: 412
SOLVED! Steps: 7 {10}
Episode: 413
Episode: 414
SOLVED! Steps: 15 {11}
Episode: 415
SOLVED! Steps: 2 {12}
Episode: 416
SOLVED! Steps: 2 {13}
Episode: 417
SOLVED! Steps: 53 {14}
Episode: 418
Episode: 419
Episode: 420
SOLVED! Steps: 1 {15}
Episode

Episode: 726
Episode: 727
Episode: 728
Episode: 729
SOLVED! Steps: 11 {12}
Episode: 730
Episode: 731
Episode: 732
Episode: 733
Episode: 734
Episode: 735
Episode: 736
SOLVED! Steps: 12 {13}
Episode: 737
SOLVED! Steps: 1 {14}
Episode: 738
Episode: 739
SOLVED! Steps: 6 {15}
Episode: 740
SOLVED! Steps: 3 {16}
Episode: 741
Episode: 742
Episode: 743
SOLVED! Steps: 2 {17}
Episode: 744
Episode: 745
SOLVED! Steps: 51 {18}
Episode: 746
SOLVED! Steps: 2 {19}
Episode: 747
Episode: 748
Episode: 749
Episode: 750
Episode: 751
Episode: 752
SOLVED! Steps: 1 {20}
Episode: 753
SOLVED! Steps: 1 {21}
Episode: 754
Episode: 755
SOLVED! Steps: 1 {22}
Episode: 756
Episode: 757
SOLVED! Steps: 99 {23}
Episode: 758
Episode: 759
SOLVED! Steps: 75 {24}
Episode: 760
Episode: 761
Episode: 762
Episode: 763
SOLVED! Steps: 1 {25}
Episode: 764
Episode: 765
Episode: 766
Episode: 767
SOLVED! Steps: 76 {26}
Episode: 768
Episode: 769
Episode: 770
SOLVED! Steps: 3 {27}
Episode: 771
SOLVED! Steps: 7 {28}
Episode: 772
SOLVED! S

SOLVED! Steps: 44 {29}
Episode: 1058
SOLVED! Steps: 99 {30}
Episode: 1059
SOLVED! Steps: 53 {31}
Episode: 1060
Episode: 1061
Episode: 1062
Episode: 1063
SOLVED! Steps: 1 {32}
Episode: 1064
SOLVED! Steps: 8 {33}
Episode: 1065
Episode: 1066
Episode: 1067
Episode: 1068
Episode: 1069
Episode: 1070
SOLVED! Steps: 3 {34}
Episode: 1071
Episode: 1072
SOLVED! Steps: 2 {35}
Episode: 1073
SOLVED! Steps: 6 {36}
Episode: 1074
SOLVED! Steps: 3 {37}
Episode: 1075
SOLVED! Steps: 3 {38}
Episode: 1076
SOLVED! Steps: 1 {39}
Episode: 1077
Episode: 1078
Episode: 1079
Episode: 1080
SOLVED! Steps: 7 {40}
Episode: 1081
Episode: 1082
Episode: 1083
Episode: 1084
SOLVED! Steps: 6 {41}
Episode: 1085
Episode: 1086
Episode: 1087
SOLVED! Steps: 1 {42}
Episode: 1088
SOLVED! Steps: 16 {43}
Episode: 1089
Episode: 1090
SOLVED! Steps: 1 {44}
Episode: 1091
Episode: 1092
SOLVED! Steps: 3 {45}
Episode: 1093
Episode: 1094
Episode: 1095
Episode: 1096
SOLVED! Steps: 8 {46}
Episode: 1097
Episode: 1098
SOLVED! Steps: 1 {47}
Epis

SOLVED! Steps: 3 {31}
Episode: 1371
SOLVED! Steps: 7 {32}
Episode: 1372
SOLVED! Steps: 50 {33}
Episode: 1373
SOLVED! Steps: 38 {34}
Episode: 1374
SOLVED! Steps: 38 {35}
Episode: 1375
SOLVED! Steps: 3 {36}
Episode: 1376
SOLVED! Steps: 1 {37}
Episode: 1377
Episode: 1378
Episode: 1379
SOLVED! Steps: 23 {38}
Episode: 1380
SOLVED! Steps: 73 {39}
Episode: 1381
Episode: 1382
Episode: 1383
SOLVED! Steps: 6 {40}
Episode: 1384
SOLVED! Steps: 52 {41}
Episode: 1385
Episode: 1386
Episode: 1387
SOLVED! Steps: 1 {42}
Episode: 1388
SOLVED! Steps: 7 {43}
Episode: 1389
Episode: 1390
SOLVED! Steps: 1 {44}
Episode: 1391
SOLVED! Steps: 3 {45}
Episode: 1392
SOLVED! Steps: 3 {46}
Episode: 1393
SOLVED! Steps: 7 {47}
Episode: 1394
Episode: 1395
Episode: 1396
SOLVED! Steps: 13 {48}
Episode: 1397
Episode: 1398
SOLVED! Steps: 1 {49}
Episode: 1399
SOLVED! Steps: 3 {50}
**************************************************
[41, 43, 49, 43, 46, 48, 48, 51, 52, 50]
**************************************************
Epis

SOLVED! Steps: 89 {34}
Episode: 1672
SOLVED! Steps: 2 {35}
Episode: 1673
SOLVED! Steps: 6 {36}
Episode: 1674
SOLVED! Steps: 3 {37}
Episode: 1675
SOLVED! Steps: 3 {38}
Episode: 1676
SOLVED! Steps: 1 {39}
Episode: 1677
Episode: 1678
Episode: 1679
SOLVED! Steps: 82 {40}
Episode: 1680
Episode: 1681
Episode: 1682
SOLVED! Steps: 29 {41}
Episode: 1683
SOLVED! Steps: 3 {42}
Episode: 1684
SOLVED! Steps: 12 {43}
Episode: 1685
Episode: 1686
Episode: 1687
SOLVED! Steps: 1 {44}
Episode: 1688
SOLVED! Steps: 7 {45}
Episode: 1689
Episode: 1690
Episode: 1691
SOLVED! Steps: 3 {46}
Episode: 1692
SOLVED! Steps: 13 {47}
Episode: 1693
SOLVED! Steps: 18 {48}
Episode: 1694
Episode: 1695
Episode: 1696
SOLVED! Steps: 6 {49}
Episode: 1697
Episode: 1698
SOLVED! Steps: 1 {50}
Episode: 1699
SOLVED! Steps: 3 {51}
**************************************************
[43, 46, 48, 48, 51, 52, 50, 56, 53, 51]
**************************************************
Episode: 1700
SOLVED! Steps: 3 {1}
Episode: 1701
SOLVED! Steps:

Episode: 1974
SOLVED! Steps: 3 {41}
Episode: 1975
SOLVED! Steps: 3 {42}
Episode: 1976
SOLVED! Steps: 1 {43}
Episode: 1977
Episode: 1978
Episode: 1979
Episode: 1980
SOLVED! Steps: 4 {44}
Episode: 1981
Episode: 1982
Episode: 1983
SOLVED! Steps: 1 {45}
Episode: 1984
SOLVED! Steps: 62 {46}
Episode: 1985
Episode: 1986
Episode: 1987
SOLVED! Steps: 1 {47}
Episode: 1988
SOLVED! Steps: 11 {48}
Episode: 1989
SOLVED! Steps: 7 {49}
Episode: 1990
SOLVED! Steps: 35 {50}
Episode: 1991
SOLVED! Steps: 3 {51}
Episode: 1992
SOLVED! Steps: 3 {52}
Episode: 1993
Episode: 1994
SOLVED! Steps: 49 {53}
Episode: 1995
Episode: 1996
Episode: 1997
SOLVED! Steps: 7 {54}
Episode: 1998
SOLVED! Steps: 1 {55}
Episode: 1999
SOLVED! Steps: 3 {56}
**************************************************
[48, 51, 52, 50, 56, 53, 51, 54, 50, 56]
**************************************************
Episode: 2000
SOLVED! Steps: 3 {1}
Episode: 2001
SOLVED! Steps: 6 {2}
Episode: 2002
Episode: 2003
SOLVED! Steps: 1 {3}
Episode: 2004
SOLV

SOLVED! Steps: 3 {39}
Episode: 2265
Episode: 2266
Episode: 2267
Episode: 2268
Episode: 2269
Episode: 2270
SOLVED! Steps: 3 {40}
Episode: 2271
SOLVED! Steps: 2 {41}
Episode: 2272
SOLVED! Steps: 2 {42}
Episode: 2273
SOLVED! Steps: 2 {43}
Episode: 2274
SOLVED! Steps: 3 {44}
Episode: 2275
SOLVED! Steps: 3 {45}
Episode: 2276
SOLVED! Steps: 1 {46}
Episode: 2277
Episode: 2278
Episode: 2279
SOLVED! Steps: 35 {47}
Episode: 2280
SOLVED! Steps: 2 {48}
Episode: 2281
Episode: 2282
Episode: 2283
SOLVED! Steps: 1 {49}
Episode: 2284
SOLVED! Steps: 2 {50}
Episode: 2285
Episode: 2286
Episode: 2287
SOLVED! Steps: 1 {51}
Episode: 2288
SOLVED! Steps: 7 {52}
Episode: 2289
SOLVED! Steps: 4 {53}
Episode: 2290
SOLVED! Steps: 1 {54}
Episode: 2291
SOLVED! Steps: 3 {55}
Episode: 2292
SOLVED! Steps: 87 {56}
Episode: 2293
SOLVED! Steps: 16 {57}
Episode: 2294
SOLVED! Steps: 49 {58}
Episode: 2295
Episode: 2296
SOLVED! Steps: 3 {59}
Episode: 2297
SOLVED! Steps: 4 {60}
Episode: 2298
SOLVED! Steps: 1 {61}
Episode: 2299


SOLVED! Steps: 1 {31}
Episode: 2556
SOLVED! Steps: 1 {32}
Episode: 2557
SOLVED! Steps: 4 {33}
Episode: 2558
Episode: 2559
SOLVED! Steps: 20 {34}
Episode: 2560
Episode: 2561
SOLVED! Steps: 3 {35}
Episode: 2562
SOLVED! Steps: 73 {36}
Episode: 2563
SOLVED! Steps: 1 {37}
Episode: 2564
SOLVED! Steps: 3 {38}
Episode: 2565
Episode: 2566
Episode: 2567
Episode: 2568
Episode: 2569
Episode: 2570
SOLVED! Steps: 3 {39}
Episode: 2571
SOLVED! Steps: 2 {40}
Episode: 2572
SOLVED! Steps: 2 {41}
Episode: 2573
SOLVED! Steps: 2 {42}
Episode: 2574
SOLVED! Steps: 3 {43}
Episode: 2575
SOLVED! Steps: 3 {44}
Episode: 2576
SOLVED! Steps: 1 {45}
Episode: 2577
Episode: 2578
Episode: 2579
Episode: 2580
Episode: 2581
Episode: 2582
Episode: 2583
SOLVED! Steps: 1 {46}
Episode: 2584
SOLVED! Steps: 2 {47}
Episode: 2585
Episode: 2586
Episode: 2587
SOLVED! Steps: 1 {48}
Episode: 2588
SOLVED! Steps: 4 {49}
Episode: 2589
SOLVED! Steps: 6 {50}
Episode: 2590
SOLVED! Steps: 1 {51}
Episode: 2591
SOLVED! Steps: 3 {52}
Episode: 2

Episode: 2843
SOLVED! Steps: 2 {24}
Episode: 2844
Episode: 2845
Episode: 2846
SOLVED! Steps: 2 {25}
Episode: 2847
Episode: 2848
Episode: 2849
Episode: 2850
Episode: 2851
SOLVED! Steps: 1 {26}
Episode: 2852
SOLVED! Steps: 1 {27}
Episode: 2853
SOLVED! Steps: 1 {28}
Episode: 2854
Episode: 2855
SOLVED! Steps: 1 {29}
Episode: 2856
SOLVED! Steps: 1 {30}
Episode: 2857
Episode: 2858
Episode: 2859
Episode: 2860
Episode: 2861
SOLVED! Steps: 3 {31}
Episode: 2862
Episode: 2863
SOLVED! Steps: 1 {32}
Episode: 2864
SOLVED! Steps: 3 {33}
Episode: 2865
Episode: 2866
Episode: 2867
Episode: 2868
Episode: 2869
Episode: 2870
SOLVED! Steps: 3 {34}
Episode: 2871
SOLVED! Steps: 2 {35}
Episode: 2872
SOLVED! Steps: 2 {36}
Episode: 2873
SOLVED! Steps: 2 {37}
Episode: 2874
SOLVED! Steps: 3 {38}
Episode: 2875
SOLVED! Steps: 3 {39}
Episode: 2876
SOLVED! Steps: 1 {40}
Episode: 2877
Episode: 2878
Episode: 2879
SOLVED! Steps: 51 {41}
Episode: 2880
SOLVED! Steps: 2 {42}
Episode: 2881
Episode: 2882
Episode: 2883
SOLVED!

Episode: 3138
Episode: 3139
SOLVED! Steps: 1 {23}
Episode: 3140
SOLVED! Steps: 3 {24}
Episode: 3141
Episode: 3142
Episode: 3143
SOLVED! Steps: 2 {25}
Episode: 3144
Episode: 3145
Episode: 3146
SOLVED! Steps: 2 {26}
Episode: 3147
Episode: 3148
Episode: 3149
Episode: 3150
Episode: 3151
SOLVED! Steps: 1 {27}
Episode: 3152
SOLVED! Steps: 1 {28}
Episode: 3153
SOLVED! Steps: 1 {29}
Episode: 3154
Episode: 3155
SOLVED! Steps: 1 {30}
Episode: 3156
SOLVED! Steps: 1 {31}
Episode: 3157
SOLVED! Steps: 4 {32}
Episode: 3158
Episode: 3159
SOLVED! Steps: 90 {33}
Episode: 3160
Episode: 3161
SOLVED! Steps: 3 {34}
Episode: 3162
Episode: 3163
SOLVED! Steps: 1 {35}
Episode: 3164
SOLVED! Steps: 3 {36}
Episode: 3165
Episode: 3166
Episode: 3167
Episode: 3168
Episode: 3169
SOLVED! Steps: 15 {37}
Episode: 3170
SOLVED! Steps: 3 {38}
Episode: 3171
SOLVED! Steps: 2 {39}
Episode: 3172
SOLVED! Steps: 2 {40}
Episode: 3173
SOLVED! Steps: 2 {41}
Episode: 3174
SOLVED! Steps: 3 {42}
Episode: 3175
SOLVED! Steps: 3 {43}
Epis

Episode: 3427
Episode: 3428
Episode: 3429
SOLVED! Steps: 1 {18}
Episode: 3430
SOLVED! Steps: 3 {19}
Episode: 3431
Episode: 3432
Episode: 3433
SOLVED! Steps: 80 {20}
Episode: 3434
Episode: 3435
SOLVED! Steps: 2 {21}
Episode: 3436
SOLVED! Steps: 3 {22}
Episode: 3437
SOLVED! Steps: 1 {23}
Episode: 3438
Episode: 3439
SOLVED! Steps: 1 {24}
Episode: 3440
SOLVED! Steps: 3 {25}
Episode: 3441
Episode: 3442
Episode: 3443
SOLVED! Steps: 2 {26}
Episode: 3444
SOLVED! Steps: 67 {27}
Episode: 3445
SOLVED! Steps: 61 {28}
Episode: 3446
SOLVED! Steps: 2 {29}
Episode: 3447
Episode: 3448
Episode: 3449
Episode: 3450
SOLVED! Steps: 34 {30}
Episode: 3451
SOLVED! Steps: 1 {31}
Episode: 3452
SOLVED! Steps: 1 {32}
Episode: 3453
SOLVED! Steps: 1 {33}
Episode: 3454
Episode: 3455
SOLVED! Steps: 1 {34}
Episode: 3456
SOLVED! Steps: 1 {35}
Episode: 3457
SOLVED! Steps: 3 {36}
Episode: 3458
SOLVED! Steps: 3 {37}
Episode: 3459
Episode: 3460
Episode: 3461
SOLVED! Steps: 3 {38}
Episode: 3462
SOLVED! Steps: 75 {39}
Episode

Episode: 3716
SOLVED! Steps: 1 {11}
Episode: 3717
SOLVED! Steps: 1 {12}
Episode: 3718
Episode: 3719
Episode: 3720
SOLVED! Steps: 1 {13}
Episode: 3721
SOLVED! Steps: 3 {14}
Episode: 3722
SOLVED! Steps: 1 {15}
Episode: 3723
SOLVED! Steps: 58 {16}
Episode: 3724
Episode: 3725
SOLVED! Steps: 3 {17}
Episode: 3726
Episode: 3727
Episode: 3728
Episode: 3729
SOLVED! Steps: 1 {18}
Episode: 3730
SOLVED! Steps: 3 {19}
Episode: 3731
Episode: 3732
Episode: 3733
Episode: 3734
Episode: 3735
SOLVED! Steps: 2 {20}
Episode: 3736
SOLVED! Steps: 3 {21}
Episode: 3737
SOLVED! Steps: 1 {22}
Episode: 3738
Episode: 3739
SOLVED! Steps: 25 {23}
Episode: 3740
SOLVED! Steps: 3 {24}
Episode: 3741
Episode: 3742
Episode: 3743
SOLVED! Steps: 2 {25}
Episode: 3744
Episode: 3745
Episode: 3746
SOLVED! Steps: 17 {26}
Episode: 3747
Episode: 3748
Episode: 3749
Episode: 3750
Episode: 3751
SOLVED! Steps: 1 {27}
Episode: 3752
SOLVED! Steps: 1 {28}
Episode: 3753
SOLVED! Steps: 1 {29}
Episode: 3754
Episode: 3755
SOLVED! Steps: 1 {3

Episode: 4006
Episode: 4007
SOLVED! Steps: 16 {5}
Episode: 4008
SOLVED! Steps: 2 {6}
Episode: 4009
Episode: 4010
SOLVED! Steps: 3 {7}
Episode: 4011
SOLVED! Steps: 2 {8}
Episode: 4012
SOLVED! Steps: 2 {9}
Episode: 4013
Episode: 4014
Episode: 4015
SOLVED! Steps: 1 {10}
Episode: 4016
SOLVED! Steps: 1 {11}
Episode: 4017
SOLVED! Steps: 1 {12}
Episode: 4018
Episode: 4019
Episode: 4020
SOLVED! Steps: 1 {13}
Episode: 4021
SOLVED! Steps: 3 {14}
Episode: 4022
SOLVED! Steps: 1 {15}
Episode: 4023
Episode: 4024
Episode: 4025
SOLVED! Steps: 3 {16}
Episode: 4026
Episode: 4027
Episode: 4028
Episode: 4029
SOLVED! Steps: 1 {17}
Episode: 4030
SOLVED! Steps: 3 {18}
Episode: 4031
SOLVED! Steps: 55 {19}
Episode: 4032
Episode: 4033
Episode: 4034
Episode: 4035
SOLVED! Steps: 2 {20}
Episode: 4036
SOLVED! Steps: 3 {21}
Episode: 4037
SOLVED! Steps: 1 {22}
Episode: 4038
Episode: 4039
SOLVED! Steps: 1 {23}
Episode: 4040
SOLVED! Steps: 3 {24}
Episode: 4041
Episode: 4042
Episode: 4043
SOLVED! Steps: 2 {25}
Episode: 

SOLVED! Steps: 3 {1}
Episode: 4301
SOLVED! Steps: 3 {2}
Episode: 4302
Episode: 4303
SOLVED! Steps: 1 {3}
Episode: 4304
SOLVED! Steps: 1 {4}
Episode: 4305
Episode: 4306
Episode: 4307
SOLVED! Steps: 3 {5}
Episode: 4308
SOLVED! Steps: 2 {6}
Episode: 4309
Episode: 4310
SOLVED! Steps: 3 {7}
Episode: 4311
SOLVED! Steps: 2 {8}
Episode: 4312
SOLVED! Steps: 2 {9}
Episode: 4313
Episode: 4314
Episode: 4315
SOLVED! Steps: 1 {10}
Episode: 4316
SOLVED! Steps: 1 {11}
Episode: 4317
SOLVED! Steps: 1 {12}
Episode: 4318
Episode: 4319
Episode: 4320
SOLVED! Steps: 1 {13}
Episode: 4321
SOLVED! Steps: 3 {14}
Episode: 4322
SOLVED! Steps: 1 {15}
Episode: 4323
SOLVED! Steps: 18 {16}
Episode: 4324
Episode: 4325
SOLVED! Steps: 3 {17}
Episode: 4326
Episode: 4327
Episode: 4328
SOLVED! Steps: 8 {18}
Episode: 4329
SOLVED! Steps: 1 {19}
Episode: 4330
SOLVED! Steps: 3 {20}
Episode: 4331
Episode: 4332
Episode: 4333
Episode: 4334
Episode: 4335
SOLVED! Steps: 2 {21}
Episode: 4336
SOLVED! Steps: 3 {22}
Episode: 4337
SOLVED

SOLVED! Steps: 3 {56}
Episode: 4595
Episode: 4596
SOLVED! Steps: 3 {57}
Episode: 4597
SOLVED! Steps: 3 {58}
Episode: 4598
SOLVED! Steps: 1 {59}
Episode: 4599
SOLVED! Steps: 3 {60}
**************************************************
[56, 59, 58, 59, 57, 58, 59, 59, 59, 60]
**************************************************
Episode: 4600
SOLVED! Steps: 3 {1}
Episode: 4601
SOLVED! Steps: 3 {2}
Episode: 4602
Episode: 4603
SOLVED! Steps: 7 {3}
Episode: 4604
SOLVED! Steps: 1 {4}
Episode: 4605
Episode: 4606
Episode: 4607
SOLVED! Steps: 3 {5}
Episode: 4608
SOLVED! Steps: 2 {6}
Episode: 4609
Episode: 4610
SOLVED! Steps: 3 {7}
Episode: 4611
SOLVED! Steps: 2 {8}
Episode: 4612
SOLVED! Steps: 2 {9}
Episode: 4613
Episode: 4614
SOLVED! Steps: 93 {10}
Episode: 4615
SOLVED! Steps: 1 {11}
Episode: 4616
SOLVED! Steps: 1 {12}
Episode: 4617
SOLVED! Steps: 1 {13}
Episode: 4618
Episode: 4619
Episode: 4620
SOLVED! Steps: 1 {14}
Episode: 4621
SOLVED! Steps: 3 {15}
Episode: 4622
SOLVED! Steps: 1 {16}
Episode: 46

Episode: 4882
Episode: 4883
SOLVED! Steps: 1 {47}
Episode: 4884
SOLVED! Steps: 2 {48}
Episode: 4885
Episode: 4886
Episode: 4887
SOLVED! Steps: 1 {49}
Episode: 4888
SOLVED! Steps: 3 {50}
Episode: 4889
SOLVED! Steps: 3 {51}
Episode: 4890
SOLVED! Steps: 1 {52}
Episode: 4891
SOLVED! Steps: 3 {53}
Episode: 4892
SOLVED! Steps: 3 {54}
Episode: 4893
SOLVED! Steps: 3 {55}
Episode: 4894
SOLVED! Steps: 3 {56}
Episode: 4895
Episode: 4896
SOLVED! Steps: 3 {57}
Episode: 4897
SOLVED! Steps: 3 {58}
Episode: 4898
SOLVED! Steps: 1 {59}
Episode: 4899
SOLVED! Steps: 3 {60}
**************************************************
[59, 57, 58, 59, 59, 59, 60, 59, 62, 60]
**************************************************
Episode: 4900
SOLVED! Steps: 3 {1}
Episode: 4901
SOLVED! Steps: 3 {2}
Episode: 4902
Episode: 4903
SOLVED! Steps: 1 {3}
Episode: 4904
SOLVED! Steps: 1 {4}
Episode: 4905
Episode: 4906
Episode: 4907
SOLVED! Steps: 3 {5}
Episode: 4908
SOLVED! Steps: 2 {6}
Episode: 4909
SOLVED! Steps: 76 {7}
Episode: 

KeyboardInterrupt: 

## Test Generalization

#### Learned Policy

In [21]:
agent.epsilon = 0.0
num_solved = 0

for t in range(100):    
    random.seed(t+100)
    sok = PushAndPullSokobanEnv(dim_room=(7, 7), num_boxes=1)
    sok.set_maxsteps(100)
    
    steps = 0
        
    state = sok.get_image('rgb_array')
    done = False
    while not done:
        steps += 1
        action = agent.act(process_frame(state))

        if action < 4:
            action += 1
        else:
            action += 5

        state, reward, done, info = sok.step(action)
        
    solved = False
    if 3 in sok.room_state:
        solved = True
        num_solved += 1
    
    print("Puzzle # %d: %d (%s) [%d/%d]" % ((t+1), steps, "YES" if solved else "NOT", num_solved, (t+1)))

Puzzle # 1: 100 (NOT) [0/1]
Puzzle # 2: 2 (YES) [1/2]
Puzzle # 3: 2 (YES) [2/3]
Puzzle # 4: 100 (NOT) [2/4]
Puzzle # 5: 100 (NOT) [2/5]
Puzzle # 6: 3 (YES) [3/6]
Puzzle # 7: 3 (YES) [4/7]
Puzzle # 8: 2 (YES) [5/8]
Puzzle # 9: 100 (NOT) [5/9]
Puzzle # 10: 100 (NOT) [5/10]
Puzzle # 11: 2 (YES) [6/11]
Puzzle # 12: 100 (NOT) [6/12]
Puzzle # 13: 100 (NOT) [6/13]
Puzzle # 14: 3 (YES) [7/14]
Puzzle # 15: 100 (NOT) [7/15]
Puzzle # 16: 100 (NOT) [7/16]
Puzzle # 17: 100 (NOT) [7/17]
Puzzle # 18: 100 (NOT) [7/18]
Puzzle # 19: 2 (YES) [8/19]
Puzzle # 20: 100 (NOT) [8/20]
Puzzle # 21: 100 (NOT) [8/21]
Puzzle # 22: 100 (NOT) [8/22]
Puzzle # 23: 100 (NOT) [8/23]
Puzzle # 24: 100 (NOT) [8/24]
Puzzle # 25: 100 (NOT) [8/25]
Puzzle # 26: 100 (NOT) [8/26]
Puzzle # 27: 100 (NOT) [8/27]
Puzzle # 28: 100 (NOT) [8/28]
Puzzle # 29: 100 (NOT) [8/29]
Puzzle # 30: 1 (YES) [9/30]
Puzzle # 31: 100 (NOT) [9/31]
Puzzle # 32: 100 (NOT) [9/32]
Puzzle # 33: 3 (YES) [10/33]
Puzzle # 34: 100 (NOT) [10/34]
Puzzle # 35: 100

#### Random Policy

In [22]:
agent.epsilon = 1.0
num_solved = 0

for t in range(100):    
    random.seed(t+100)
    sok = PushAndPullSokobanEnv(dim_room=(7, 7), num_boxes=1)
    sok.set_maxsteps(100)
    
    steps = 0
        
    state = sok.get_image('rgb_array')
    done = False
    while not done:
        steps += 1
        action = agent.act(process_frame(state))

        if action < 4:
            action += 1
        else:
            action += 5

        state, reward, done, info = sok.step(action)
        
    solved = False
    if 3 in sok.room_state:
        solved = True
        num_solved += 1
    
    print("Puzzle # %d: %d (%s) [%d/%d]" % ((t+1), steps, "YES" if solved else "NOT", num_solved, (t+1)))

Puzzle # 1: 100 (NOT) [0/1]
Puzzle # 2: 100 (NOT) [0/2]
Puzzle # 3: 100 (NOT) [0/3]
Puzzle # 4: 100 (NOT) [0/4]
Puzzle # 5: 100 (NOT) [0/5]
Puzzle # 6: 100 (NOT) [0/6]
Puzzle # 7: 100 (NOT) [0/7]
Puzzle # 8: 46 (YES) [1/8]
Puzzle # 9: 26 (YES) [2/9]
Puzzle # 10: 100 (NOT) [2/10]
Puzzle # 11: 9 (YES) [3/11]
Puzzle # 12: 100 (NOT) [3/12]
Puzzle # 13: 100 (NOT) [3/13]
Puzzle # 14: 100 (NOT) [3/14]
Puzzle # 15: 1 (YES) [4/15]
Puzzle # 16: 100 (NOT) [4/16]
Puzzle # 17: 100 (NOT) [4/17]
Puzzle # 18: 100 (NOT) [4/18]
Puzzle # 19: 89 (YES) [5/19]
Puzzle # 20: 10 (YES) [6/20]
Puzzle # 21: 100 (NOT) [6/21]
Puzzle # 22: 4 (YES) [7/22]
Puzzle # 23: 100 (NOT) [7/23]
Puzzle # 24: 100 (NOT) [7/24]
Puzzle # 25: 100 (NOT) [7/25]
Puzzle # 26: 100 (NOT) [7/26]
Puzzle # 27: 100 (NOT) [7/27]
Puzzle # 28: 100 (NOT) [7/28]
Puzzle # 29: 100 (NOT) [7/29]
Puzzle # 30: 11 (YES) [8/30]
Puzzle # 31: 100 (NOT) [8/31]
Puzzle # 32: 100 (NOT) [8/32]
Puzzle # 33: 100 (NOT) [8/33]
Puzzle # 34: 100 (NOT) [8/34]
Puzzle # 