# Final Project - Reinforcements Learning 
Hello dear students,<br> this is the template notebook. Please click on the "File" tab and then on "Save a copy into drive".

---
<br>

### Name and ID:
Student 1: Avraham Raviv, 204355390
<br>
Student 2: Yevgeni Berkovitch, 317079234
<br><br>
<img src="https://play-lh.googleusercontent.com/e_oKlKPISbgdzut1H9opevS7-LTB8-8lsmpCdMkhlnqFenZhpjxbLmx7l158-xQQCIY">

### https://github.com/mpSchrader/gym-sokoban

# Installs

In [1]:
%%capture
!sudo apt-get update
!sudo apt-get install -y xvfb ffmpeg freeglut3-dev
!pip install 'imageio==2.4.0'
!pip install gym
!pip install pygame
!apt-get install python-opengl -y
!apt install xvfb -y
!pip install pyvirtualdisplay
!pip install piglet
!pip install gym
!apt-get install python-opengl -y
!apt install xvfb -y
!pip install gym_sokoban

!imageio_download_bin ffmpeg

# Imports

In [2]:
import random
import time

import numpy as np
import matplotlib.pyplot as plt

import base64
import imageio
from pyvirtualdisplay import Display
from IPython.display import HTML

import gym
from gym import error, spaces, utils
from soko_pap import *

from collections import deque
from queue import PriorityQueue

from keras.models import Sequential
from keras.layers import Conv2D, Dense, Flatten

In [3]:
%matplotlib inline

In [4]:
imageio.plugins.ffmpeg.download()

In [5]:
from gym import logger as gymlogger
gymlogger.set_level(40) # error only

# Display utils
The cell below contains the video display configuration. No need to make changes here.

In [6]:
def embed_mp4(filename):
    """Embeds an mp4 file in the notebook."""
    video = open(filename,'rb').read()
    b64 = base64.b64encode(video)
    tag = '''
    <video width="640" height="480" controls>
    <source src="data:video/mp4;base64,{0}" type="video/mp4">
    Your browser does not support the video tag.
    </video>'''.format(b64.decode())

    return HTML(tag)

# Utils

In [7]:
def get_distances(room_state):
    for i in range(room_state.shape[0]):
        for j in range(room_state.shape[1]):
            if room_state[i][j] == 2:
                target = (i, j)

    distances = np.zeros(shape=room_state.shape)
    visited_cells = set()
    cell_queue = deque()

    visited_cells.add(target)
    cell_queue.appendleft(target)

    while len(cell_queue) != 0:
        cell = cell_queue.pop()
        distance = distances[cell[0]][cell[1]]
        for x,y in ((1,0), (-1,-0), (0,1), (0,-1)):
            next_cell_x, next_cell_y = cell[0]+x, cell[1]+y
            if room_state[next_cell_x][next_cell_y] != 0 and not (next_cell_x, next_cell_y) in visited_cells:
                distances[next_cell_x][next_cell_y] = distance + 1
                visited_cells.add((next_cell_x, next_cell_y))
                cell_queue.appendleft((next_cell_x, next_cell_y))
                
    return distances   

def calc_distances(room_state, distances):
    box = None
    mover = None
    for i in range(room_state.shape[0]):
        for j in range(room_state.shape[1]):            
            if room_state[i][j] == 4:
                box = (i,j)
            
            if room_state[i][j] == 5:
                mover = (i,j)
    
    return mover, box, distances[box[0]][box[1]]   

def box2target_change_reward(room_state, next_room_state, distances):
    if np.array_equal(room_state, next_room_state):
        return -1.0
    
    mover, box, t2b = calc_distances(room_state, distances)
    n_mover, n_box, n_t2b = calc_distances(next_room_state, distances)
    
    change_reward = 0.0
    if n_t2b < t2b:
        change_reward += 5.0
    elif n_t2b > t2b:
        change_reward -= 5.0
        
    m2b = np.sqrt((mover[0]-box[0])**2 + (mover[1]-box[1])**2)
    n_m2b = np.sqrt((n_mover[0]-n_box[0])**2 + (n_mover[1]-n_box[1])**2)
    
    if n_m2b < m2b and m2b >= 2:
        change_reward += 1.0
    elif n_m2b > m2b and n_m2b >= 2:
        change_reward -= 1.0
        
    return change_reward  

# Solution

In [8]:
class SOK_Agent:
    def __init__(self):
        # Construct DQN models
        self.state_size = (7, 7, 1) 
        self.action_size = 8
        self.model = self._build_model()
        self.target_model = self._build_model()
        self.target_model.set_weights(self.model.get_weights())
        self.batch_size = 8
        
        # Replay buffers
        self.replay_buffer = deque(maxlen=10000)
        self.prioritized_replay_buffer = deque(maxlen=500)
        self.prioritized_replay_batch = 20        
        
        # Hyperparameters
        self.gamma = 0.9
        self.epsilon = 1.0   
        self.epsilon_min = 0.05
        self.epsilon_decay = 0.99995
        self.replay_rate = 10
        self.update_beta = 0.999

        self.verbosity = 100 
        
        self.action_rotation_map = {
            0: 2,
            1: 3,
            2: 1,
            3: 0,
            4: 6,
            5: 7,
            6: 5,
            7: 4
        }

    def _build_model(self):
        model = Sequential()
        model.add(Conv2D(32, (3, 3), input_shape=self.state_size))
        model.add(Conv2D(64, (3, 3), padding='same'))
        model.add(Conv2D(64, (3, 3)))
        model.add(Flatten())
        model.add(Dense(512, activation='relu'))
        model.add(Dense(64, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer="adam") 
        return model

    def remember(self, state, action, reward, next_state, done):
        self.replay_buffer.append([state, action, reward, next_state, done])    
        
    def copy_to_prioritized_buffer(self, n):
        for i in range(n):
            self.prioritized_replay_buffer.append(self.replay_buffer[-1-i])  

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        
        act_values = self.model.predict(state, verbose=0)
        return np.argmax(act_values[0]) 

    def replay(self):        
        minibatch = random.sample(self.replay_buffer, self.batch_size//2) 
        minibatch.extend(random.sample(self.prioritized_replay_buffer, self.batch_size//2))
        
        states = np.zeros((self.batch_size*4, self.state_size[0], self.state_size[1]))
        actions = np.zeros(self.batch_size*4, dtype=int)
        rewards = np.zeros(self.batch_size*4)
        next_states = np.zeros((self.batch_size*4, self.state_size[0], self.state_size[1]))
        statuses = np.zeros(self.batch_size*4)
        targets = np.zeros((self.batch_size*4, self.action_size)) 
        
        for i, (state, action, reward, next_state, done) in enumerate(minibatch): 
            for rot in range(4):  
                ind = i*4+rot
                if rot != 0:
                    state = np.rot90(state, axes=(1,2))
                    next_state = np.rot90(next_state, axes=(1,2))
                    action = self.action_rotation_map.get(action)

                states[ind] = state.copy()
                actions[ind] = action
                rewards[ind] = reward
                next_states[ind] = next_state.copy()
                statuses[ind] = 1 if done else 0        

        targets = self.model.predict(states) 
        max_actions = np.argmax(self.model.predict(next_states), axis=1)
        next_rewards = self.target_model.predict(next_states)
        
        ind = 0
        for action, reward, next_reward, max_action, done in zip(actions, rewards, next_rewards, max_actions, statuses):  
            if not done:
                reward += self.gamma * next_reward[max_action]
            targets[ind][action] = reward
            ind += 1
        
        self.model.fit(states, targets, epochs=1, verbose=0)
                
    def update_epsilon(self):
        if self.epsilon > self.epsilon_min:
            self.epsilon = self.epsilon * self.epsilon_decay
        
    def update_target_model(self):
        model_w = self.model.get_weights()
        target_model_w = self.target_model.get_weights()
        updated_target_model_w = []
        for i in range(len(model_w)):
            updated_target_model_w.append(self.update_beta*target_model_w[i] + (1-self.update_beta)*model_w[i])
        self.target_model.set_weights(updated_target_model_w)    
            
    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)

In [9]:
def process_frame(frame):
    f = frame[:, :, 0]   
    f = f.reshape(7, 16, 7, 16).max(axis=(1, 3))
    #f = f.flatten()
    f = f / 255
    return np.expand_dims(f, axis=0)

## Training

In [10]:
max_episodes = 25000
max_steps = 100

def init_sok(r):
    random.seed(r%100)
    sok = PushAndPullSokobanEnv(dim_room=(7, 7), num_boxes=1)
    sok.set_maxsteps(max_steps)
    return sok

In [11]:
agent = SOK_Agent()

continuous_successes_goal = 100
continuous_successes = 0

total_steps = 0
solved_puzzles = []
steps_per_episode = []

puzzle_run = 0
for e in range(max_episodes):
    if continuous_successes >= continuous_successes_goal:
        print("Agent training finished!")
        break
    
    print("Episode: %d" % (e))
    
    sok = init_sok(e)
    state = process_frame(sok.get_image('rgb_array'))
    random.seed(e)
    
    room_state = sok.room_state.copy() 
    distances = get_distances(room_state)
    
    for step in range(sok.max_steps):
        total_steps += 1
        current_epsilon = agent.epsilon
        if current_epsilon < 0.1 and step >= 20:
            agent.epsilon = 0.9 
        action = agent.act(state)
        agent.epsilon = current_epsilon
        
        if action < 4:
            next_state, reward, done, _ = sok.step(action+1) 
        else:
            next_state, reward, done, _ = sok.step(action+5)         
        
        next_state = process_frame(next_state)        
        next_room_state = sok.room_state
        
        if not done:
            reward += box2target_change_reward(room_state, next_room_state, distances)
        
        agent.remember(state, action, reward, next_state, done)
        
        state = next_state.copy() 
        room_state = next_room_state.copy()
                
        if e >= 100:
            if (total_steps+1) % agent.replay_rate == 0:
                agent.replay() 
                agent.update_target_model()
                agent.update_epsilon()
        
        if done:            
            if 3 in sok.room_state:
                puzzle_run += 1
                continuous_successes += 1
                print("SOLVED! Steps: %d {%d}" % (step+1, puzzle_run))
            else:
                agent.copy_to_prioritized_buffer(min(agent.prioritized_replay_batch, step+1))
                continuous_successes = 0
                
            if (e+1) % 100 == 0:
                solved_puzzles.append(puzzle_run)
                print("*"*50)
                print("%s" % solved_puzzles[-10:])
                print("*"*50)
                puzzle_run = 0
                
            steps_per_episode.append(step+1)
            #agent.save("saved_models\episode%d.h5" % (e))
            
            break

Episode: 0
Episode: 1
Episode: 2
Episode: 3
SOLVED! Steps: 54 {1}
Episode: 4
SOLVED! Steps: 1 {2}
Episode: 5
SOLVED! Steps: 28 {3}
Episode: 6
Episode: 7
SOLVED! Steps: 25 {4}
Episode: 8
Episode: 9
SOLVED! Steps: 74 {5}
Episode: 10
Episode: 11
SOLVED! Steps: 27 {6}
Episode: 12
Episode: 13
Episode: 14
Episode: 15
SOLVED! Steps: 54 {7}
Episode: 16
Episode: 17
Episode: 18
Episode: 19
Episode: 20
Episode: 21
Episode: 22
Episode: 23
Episode: 24
Episode: 25
Episode: 26
Episode: 27
Episode: 28
Episode: 29
SOLVED! Steps: 12 {8}
Episode: 30
SOLVED! Steps: 59 {9}
Episode: 31
Episode: 32
Episode: 33
Episode: 34
Episode: 35
Episode: 36
Episode: 37
SOLVED! Steps: 11 {10}
Episode: 38
Episode: 39
SOLVED! Steps: 12 {11}
Episode: 40
Episode: 41
Episode: 42
Episode: 43
Episode: 44
Episode: 45
Episode: 46
SOLVED! Steps: 43 {12}
Episode: 47
Episode: 48
Episode: 49
Episode: 50
Episode: 51
Episode: 52
SOLVED! Steps: 2 {13}
Episode: 53
SOLVED! Steps: 6 {14}
Episode: 54
SOLVED! Steps: 22 {15}
Episode: 55
SOLVE

Episode: 390
SOLVED! Steps: 11 {33}
Episode: 391
Episode: 392
Episode: 393
SOLVED! Steps: 8 {34}
Episode: 394
Episode: 395
Episode: 396
Episode: 397
Episode: 398
SOLVED! Steps: 7 {35}
Episode: 399
**************************************************
[30, 34, 35, 35]
**************************************************
Episode: 400
Episode: 401
SOLVED! Steps: 38 {1}
Episode: 402
Episode: 403
SOLVED! Steps: 30 {2}
Episode: 404
SOLVED! Steps: 58 {3}
Episode: 405
Episode: 406
Episode: 407
Episode: 408
Episode: 409
Episode: 410
Episode: 411
SOLVED! Steps: 4 {4}
Episode: 412
SOLVED! Steps: 75 {5}
Episode: 413
Episode: 414
Episode: 415
SOLVED! Steps: 16 {6}
Episode: 416
SOLVED! Steps: 2 {7}
Episode: 417
SOLVED! Steps: 17 {8}
Episode: 418
Episode: 419
Episode: 420
SOLVED! Steps: 1 {9}
Episode: 421
Episode: 422
Episode: 423
Episode: 424
Episode: 425
Episode: 426
Episode: 427
Episode: 428
SOLVED! Steps: 9 {10}
Episode: 429
SOLVED! Steps: 22 {11}
Episode: 430
Episode: 431
SOLVED! Steps: 86 {12}
Episo

Episode: 758
Episode: 759
Episode: 760
Episode: 761
Episode: 762
Episode: 763
SOLVED! Steps: 62 {23}
Episode: 764
SOLVED! Steps: 60 {24}
Episode: 765
Episode: 766
Episode: 767
Episode: 768
Episode: 769
Episode: 770
SOLVED! Steps: 3 {25}
Episode: 771
SOLVED! Steps: 7 {26}
Episode: 772
Episode: 773
Episode: 774
SOLVED! Steps: 76 {27}
Episode: 775
Episode: 776
SOLVED! Steps: 6 {28}
Episode: 777
Episode: 778
Episode: 779
Episode: 780
SOLVED! Steps: 28 {29}
Episode: 781
Episode: 782
SOLVED! Steps: 90 {30}
Episode: 783
SOLVED! Steps: 6 {31}
Episode: 784
SOLVED! Steps: 59 {32}
Episode: 785
Episode: 786
Episode: 787
SOLVED! Steps: 8 {33}
Episode: 788
Episode: 789
SOLVED! Steps: 49 {34}
Episode: 790
SOLVED! Steps: 16 {35}
Episode: 791
Episode: 792
Episode: 793
SOLVED! Steps: 7 {36}
Episode: 794
SOLVED! Steps: 30 {37}
Episode: 795
Episode: 796
Episode: 797
Episode: 798
SOLVED! Steps: 7 {38}
Episode: 799
**************************************************
[30, 34, 35, 35, 30, 39, 37, 38]
*********

Episode: 1102
Episode: 1103
SOLVED! Steps: 12 {1}
Episode: 1104
SOLVED! Steps: 1 {2}
Episode: 1105
Episode: 1106
SOLVED! Steps: 80 {3}
Episode: 1107
SOLVED! Steps: 41 {4}
Episode: 1108
Episode: 1109
Episode: 1110
Episode: 1111
SOLVED! Steps: 2 {5}
Episode: 1112
Episode: 1113
Episode: 1114
Episode: 1115
Episode: 1116
SOLVED! Steps: 3 {6}
Episode: 1117
SOLVED! Steps: 18 {7}
Episode: 1118
Episode: 1119
Episode: 1120
SOLVED! Steps: 9 {8}
Episode: 1121
SOLVED! Steps: 47 {9}
Episode: 1122
SOLVED! Steps: 32 {10}
Episode: 1123
Episode: 1124
Episode: 1125
Episode: 1126
Episode: 1127
Episode: 1128
Episode: 1129
SOLVED! Steps: 4 {11}
Episode: 1130
SOLVED! Steps: 8 {12}
Episode: 1131
Episode: 1132
Episode: 1133
Episode: 1134
Episode: 1135
SOLVED! Steps: 97 {13}
Episode: 1136
SOLVED! Steps: 5 {14}
Episode: 1137
Episode: 1138
Episode: 1139
Episode: 1140
Episode: 1141
SOLVED! Steps: 28 {15}
Episode: 1142
Episode: 1143
SOLVED! Steps: 2 {16}
Episode: 1144
Episode: 1145
Episode: 1146
SOLVED! Steps: 2 {1

Episode: 1461
Episode: 1462
Episode: 1463
SOLVED! Steps: 77 {18}
Episode: 1464
Episode: 1465
Episode: 1466
Episode: 1467
Episode: 1468
Episode: 1469
Episode: 1470
SOLVED! Steps: 15 {19}
Episode: 1471
SOLVED! Steps: 15 {20}
Episode: 1472
SOLVED! Steps: 29 {21}
Episode: 1473
Episode: 1474
SOLVED! Steps: 17 {22}
Episode: 1475
Episode: 1476
SOLVED! Steps: 16 {23}
Episode: 1477
SOLVED! Steps: 99 {24}
Episode: 1478
Episode: 1479
SOLVED! Steps: 53 {25}
Episode: 1480
SOLVED! Steps: 6 {26}
Episode: 1481
Episode: 1482
Episode: 1483
SOLVED! Steps: 1 {27}
Episode: 1484
Episode: 1485
Episode: 1486
Episode: 1487
SOLVED! Steps: 39 {28}
Episode: 1488
Episode: 1489
SOLVED! Steps: 6 {29}
Episode: 1490
Episode: 1491
Episode: 1492
Episode: 1493
SOLVED! Steps: 24 {30}
Episode: 1494
SOLVED! Steps: 4 {31}
Episode: 1495
Episode: 1496
Episode: 1497
Episode: 1498
SOLVED! Steps: 5 {32}
Episode: 1499
**************************************************
[39, 37, 38, 37, 42, 42, 31, 36, 40, 32]
**********************

SOLVED! Steps: 3 {41}
**************************************************
[37, 42, 42, 31, 36, 40, 32, 41, 40, 41]
**************************************************
Episode: 1800
Episode: 1801
SOLVED! Steps: 6 {1}
Episode: 1802
Episode: 1803
SOLVED! Steps: 1 {2}
Episode: 1804
SOLVED! Steps: 37 {3}
Episode: 1805
Episode: 1806
SOLVED! Steps: 84 {4}
Episode: 1807
SOLVED! Steps: 48 {5}
Episode: 1808
SOLVED! Steps: 2 {6}
Episode: 1809
Episode: 1810
Episode: 1811
SOLVED! Steps: 2 {7}
Episode: 1812
SOLVED! Steps: 49 {8}
Episode: 1813
Episode: 1814
Episode: 1815
SOLVED! Steps: 22 {9}
Episode: 1816
SOLVED! Steps: 23 {10}
Episode: 1817
SOLVED! Steps: 15 {11}
Episode: 1818
Episode: 1819
Episode: 1820
SOLVED! Steps: 22 {12}
Episode: 1821
SOLVED! Steps: 97 {13}
Episode: 1822
SOLVED! Steps: 1 {14}
Episode: 1823
Episode: 1824
Episode: 1825
SOLVED! Steps: 4 {15}
Episode: 1826
Episode: 1827
Episode: 1828
SOLVED! Steps: 16 {16}
Episode: 1829
SOLVED! Steps: 1 {17}
Episode: 1830
Episode: 1831
SOLVED! Step

Episode: 2127
Episode: 2128
Episode: 2129
SOLVED! Steps: 1 {13}
Episode: 2130
SOLVED! Steps: 6 {14}
Episode: 2131
Episode: 2132
Episode: 2133
SOLVED! Steps: 15 {15}
Episode: 2134
Episode: 2135
SOLVED! Steps: 78 {16}
Episode: 2136
Episode: 2137
SOLVED! Steps: 8 {17}
Episode: 2138
Episode: 2139
SOLVED! Steps: 1 {18}
Episode: 2140
SOLVED! Steps: 3 {19}
Episode: 2141
Episode: 2142
Episode: 2143
SOLVED! Steps: 2 {20}
Episode: 2144
Episode: 2145
Episode: 2146
SOLVED! Steps: 2 {21}
Episode: 2147
Episode: 2148
Episode: 2149
Episode: 2150
Episode: 2151
SOLVED! Steps: 1 {22}
Episode: 2152
SOLVED! Steps: 1 {23}
Episode: 2153
SOLVED! Steps: 1 {24}
Episode: 2154
Episode: 2155
SOLVED! Steps: 1 {25}
Episode: 2156
SOLVED! Steps: 1 {26}
Episode: 2157
Episode: 2158
Episode: 2159
Episode: 2160
Episode: 2161
SOLVED! Steps: 17 {27}
Episode: 2162
Episode: 2163
SOLVED! Steps: 7 {28}
Episode: 2164
SOLVED! Steps: 3 {29}
Episode: 2165
Episode: 2166
Episode: 2167
Episode: 2168
Episode: 2169
Episode: 2170
SOLVED!

SOLVED! Steps: 1 {23}
Episode: 2452
Episode: 2453
SOLVED! Steps: 8 {24}
Episode: 2454
Episode: 2455
SOLVED! Steps: 1 {25}
Episode: 2456
SOLVED! Steps: 1 {26}
Episode: 2457
SOLVED! Steps: 9 {27}
Episode: 2458
SOLVED! Steps: 4 {28}
Episode: 2459
Episode: 2460
Episode: 2461
Episode: 2462
Episode: 2463
Episode: 2464
SOLVED! Steps: 3 {29}
Episode: 2465
Episode: 2466
Episode: 2467
Episode: 2468
Episode: 2469
Episode: 2470
SOLVED! Steps: 3 {30}
Episode: 2471
SOLVED! Steps: 29 {31}
Episode: 2472
SOLVED! Steps: 2 {32}
Episode: 2473
Episode: 2474
SOLVED! Steps: 3 {33}
Episode: 2475
SOLVED! Steps: 3 {34}
Episode: 2476
SOLVED! Steps: 34 {35}
Episode: 2477
Episode: 2478
Episode: 2479
Episode: 2480
SOLVED! Steps: 17 {36}
Episode: 2481
Episode: 2482
SOLVED! Steps: 47 {37}
Episode: 2483
SOLVED! Steps: 1 {38}
Episode: 2484
Episode: 2485
Episode: 2486
Episode: 2487
SOLVED! Steps: 1 {39}
Episode: 2488
SOLVED! Steps: 79 {40}
Episode: 2489
Episode: 2490
SOLVED! Steps: 45 {41}
Episode: 2491
Episode: 2492
Ep

SOLVED! Steps: 3 {37}
Episode: 2776
SOLVED! Steps: 28 {38}
Episode: 2777
Episode: 2778
SOLVED! Steps: 25 {39}
Episode: 2779
Episode: 2780
Episode: 2781
Episode: 2782
SOLVED! Steps: 79 {40}
Episode: 2783
SOLVED! Steps: 1 {41}
Episode: 2784
SOLVED! Steps: 2 {42}
Episode: 2785
Episode: 2786
Episode: 2787
SOLVED! Steps: 95 {43}
Episode: 2788
SOLVED! Steps: 32 {44}
Episode: 2789
SOLVED! Steps: 3 {45}
Episode: 2790
SOLVED! Steps: 1 {46}
Episode: 2791
Episode: 2792
Episode: 2793
SOLVED! Steps: 3 {47}
Episode: 2794
SOLVED! Steps: 14 {48}
Episode: 2795
Episode: 2796
SOLVED! Steps: 3 {49}
Episode: 2797
SOLVED! Steps: 3 {50}
Episode: 2798
SOLVED! Steps: 1 {51}
Episode: 2799
**************************************************
[44, 42, 40, 43, 42, 51, 44, 42, 47, 51]
**************************************************
Episode: 2800
Episode: 2801
SOLVED! Steps: 3 {1}
Episode: 2802
Episode: 2803
SOLVED! Steps: 1 {2}
Episode: 2804
SOLVED! Steps: 1 {3}
Episode: 2805
Episode: 2806
Episode: 2807
SOLVED! St

SOLVED! Steps: 3 {38}
Episode: 3089
SOLVED! Steps: 3 {39}
Episode: 3090
SOLVED! Steps: 32 {40}
Episode: 3091
Episode: 3092
Episode: 3093
Episode: 3094
Episode: 3095
Episode: 3096
SOLVED! Steps: 3 {41}
Episode: 3097
Episode: 3098
SOLVED! Steps: 1 {42}
Episode: 3099
**************************************************
[43, 42, 51, 44, 42, 47, 51, 50, 51, 42]
**************************************************
Episode: 3100
Episode: 3101
SOLVED! Steps: 3 {1}
Episode: 3102
Episode: 3103
SOLVED! Steps: 1 {2}
Episode: 3104
SOLVED! Steps: 1 {3}
Episode: 3105
Episode: 3106
SOLVED! Steps: 95 {4}
Episode: 3107
SOLVED! Steps: 3 {5}
Episode: 3108
SOLVED! Steps: 2 {6}
Episode: 3109
Episode: 3110
Episode: 3111
SOLVED! Steps: 2 {7}
Episode: 3112
SOLVED! Steps: 2 {8}
Episode: 3113
Episode: 3114
Episode: 3115
SOLVED! Steps: 1 {9}
Episode: 3116
SOLVED! Steps: 1 {10}
Episode: 3117
SOLVED! Steps: 88 {11}
Episode: 3118
Episode: 3119
Episode: 3120
SOLVED! Steps: 1 {12}
Episode: 3121
SOLVED! Steps: 3 {13}
Episo

SOLVED! Steps: 3 {5}
Episode: 3408
SOLVED! Steps: 2 {6}
Episode: 3409
Episode: 3410
Episode: 3411
SOLVED! Steps: 2 {7}
Episode: 3412
SOLVED! Steps: 2 {8}
Episode: 3413
Episode: 3414
Episode: 3415
SOLVED! Steps: 97 {9}
Episode: 3416
SOLVED! Steps: 1 {10}
Episode: 3417
SOLVED! Steps: 8 {11}
Episode: 3418
Episode: 3419
Episode: 3420
Episode: 3421
SOLVED! Steps: 3 {12}
Episode: 3422
SOLVED! Steps: 42 {13}
Episode: 3423
Episode: 3424
Episode: 3425
SOLVED! Steps: 3 {14}
Episode: 3426
Episode: 3427
Episode: 3428
Episode: 3429
SOLVED! Steps: 49 {15}
Episode: 3430
SOLVED! Steps: 3 {16}
Episode: 3431
Episode: 3432
Episode: 3433
SOLVED! Steps: 46 {17}
Episode: 3434
Episode: 3435
SOLVED! Steps: 2 {18}
Episode: 3436
SOLVED! Steps: 3 {19}
Episode: 3437
Episode: 3438
Episode: 3439
SOLVED! Steps: 29 {20}
Episode: 3440
SOLVED! Steps: 3 {21}
Episode: 3441
Episode: 3442
Episode: 3443
SOLVED! Steps: 2 {22}
Episode: 3444
Episode: 3445
Episode: 3446
SOLVED! Steps: 2 {23}
Episode: 3447
Episode: 3448
Episode:

Episode: 3728
Episode: 3729
SOLVED! Steps: 1 {16}
Episode: 3730
SOLVED! Steps: 3 {17}
Episode: 3731
Episode: 3732
Episode: 3733
Episode: 3734
Episode: 3735
Episode: 3736
SOLVED! Steps: 3 {18}
Episode: 3737
SOLVED! Steps: 1 {19}
Episode: 3738
Episode: 3739
SOLVED! Steps: 1 {20}
Episode: 3740
Episode: 3741
SOLVED! Steps: 18 {21}
Episode: 3742
SOLVED! Steps: 53 {22}
Episode: 3743
SOLVED! Steps: 2 {23}
Episode: 3744
Episode: 3745
Episode: 3746
SOLVED! Steps: 2 {24}
Episode: 3747
Episode: 3748
Episode: 3749
Episode: 3750
Episode: 3751
SOLVED! Steps: 1 {25}
Episode: 3752
SOLVED! Steps: 1 {26}
Episode: 3753
SOLVED! Steps: 1 {27}
Episode: 3754
Episode: 3755
SOLVED! Steps: 8 {28}
Episode: 3756
SOLVED! Steps: 1 {29}
Episode: 3757
Episode: 3758
Episode: 3759
Episode: 3760
Episode: 3761
Episode: 3762
Episode: 3763
Episode: 3764
SOLVED! Steps: 3 {30}
Episode: 3765
Episode: 3766
Episode: 3767
Episode: 3768
Episode: 3769
Episode: 3770
SOLVED! Steps: 3 {31}
Episode: 3771
Episode: 3772
Episode: 3773
SO

SOLVED! Steps: 88 {28}
Episode: 4042
Episode: 4043
SOLVED! Steps: 2 {29}
Episode: 4044
Episode: 4045
Episode: 4046
SOLVED! Steps: 2 {30}
Episode: 4047
Episode: 4048
Episode: 4049
Episode: 4050
Episode: 4051
SOLVED! Steps: 1 {31}
Episode: 4052
SOLVED! Steps: 1 {32}
Episode: 4053
SOLVED! Steps: 74 {33}
Episode: 4054
Episode: 4055
SOLVED! Steps: 1 {34}
Episode: 4056
SOLVED! Steps: 1 {35}
Episode: 4057
SOLVED! Steps: 3 {36}
Episode: 4058
SOLVED! Steps: 3 {37}
Episode: 4059
Episode: 4060
Episode: 4061
SOLVED! Steps: 3 {38}
Episode: 4062
Episode: 4063
Episode: 4064
SOLVED! Steps: 3 {39}
Episode: 4065
Episode: 4066
Episode: 4067
Episode: 4068
Episode: 4069
Episode: 4070
SOLVED! Steps: 3 {40}
Episode: 4071
SOLVED! Steps: 2 {41}
Episode: 4072
SOLVED! Steps: 2 {42}
Episode: 4073
SOLVED! Steps: 2 {43}
Episode: 4074
SOLVED! Steps: 3 {44}
Episode: 4075
SOLVED! Steps: 3 {45}
Episode: 4076
SOLVED! Steps: 1 {46}
Episode: 4077
Episode: 4078
Episode: 4079
SOLVED! Steps: 33 {47}
Episode: 4080
SOLVED! Ste

SOLVED! Steps: 1 {27}
Episode: 4352
SOLVED! Steps: 1 {28}
Episode: 4353
Episode: 4354
Episode: 4355
SOLVED! Steps: 1 {29}
Episode: 4356
SOLVED! Steps: 1 {30}
Episode: 4357
SOLVED! Steps: 3 {31}
Episode: 4358
SOLVED! Steps: 3 {32}
Episode: 4359
Episode: 4360
Episode: 4361
Episode: 4362
Episode: 4363
SOLVED! Steps: 1 {33}
Episode: 4364
SOLVED! Steps: 3 {34}
Episode: 4365
Episode: 4366
Episode: 4367
Episode: 4368
Episode: 4369
Episode: 4370
SOLVED! Steps: 3 {35}
Episode: 4371
SOLVED! Steps: 43 {36}
Episode: 4372
SOLVED! Steps: 2 {37}
Episode: 4373
Episode: 4374
SOLVED! Steps: 3 {38}
Episode: 4375
SOLVED! Steps: 3 {39}
Episode: 4376
SOLVED! Steps: 33 {40}
Episode: 4377
Episode: 4378
Episode: 4379
Episode: 4380
SOLVED! Steps: 2 {41}
Episode: 4381
Episode: 4382
Episode: 4383
SOLVED! Steps: 1 {42}
Episode: 4384
SOLVED! Steps: 2 {43}
Episode: 4385
Episode: 4386
Episode: 4387
SOLVED! Steps: 37 {44}
Episode: 4388
SOLVED! Steps: 3 {45}
Episode: 4389
SOLVED! Steps: 3 {46}
Episode: 4390
SOLVED! Ste

SOLVED! Steps: 2 {30}
Episode: 4672
SOLVED! Steps: 2 {31}
Episode: 4673
SOLVED! Steps: 2 {32}
Episode: 4674
SOLVED! Steps: 3 {33}
Episode: 4675
SOLVED! Steps: 3 {34}
Episode: 4676
SOLVED! Steps: 1 {35}
Episode: 4677
Episode: 4678
SOLVED! Steps: 52 {36}
Episode: 4679
Episode: 4680
SOLVED! Steps: 48 {37}
Episode: 4681
Episode: 4682
Episode: 4683
SOLVED! Steps: 1 {38}
Episode: 4684
SOLVED! Steps: 2 {39}
Episode: 4685
Episode: 4686
Episode: 4687
Episode: 4688
SOLVED! Steps: 3 {40}
Episode: 4689
SOLVED! Steps: 3 {41}
Episode: 4690
Episode: 4691
Episode: 4692
Episode: 4693
SOLVED! Steps: 3 {42}
Episode: 4694
Episode: 4695
Episode: 4696
SOLVED! Steps: 3 {43}
Episode: 4697
SOLVED! Steps: 3 {44}
Episode: 4698
SOLVED! Steps: 1 {45}
Episode: 4699
**************************************************
[47, 46, 48, 57, 48, 52, 50, 45, 53, 45]
**************************************************
Episode: 4700
SOLVED! Steps: 3 {1}
Episode: 4701
SOLVED! Steps: 3 {2}
Episode: 4702
Episode: 4703
SOLVED! Steps

SOLVED! Steps: 2 {37}
Episode: 4973
SOLVED! Steps: 2 {38}
Episode: 4974
SOLVED! Steps: 37 {39}
Episode: 4975
SOLVED! Steps: 3 {40}
Episode: 4976
SOLVED! Steps: 35 {41}
Episode: 4977
Episode: 4978
Episode: 4979
SOLVED! Steps: 70 {42}
Episode: 4980
SOLVED! Steps: 2 {43}
Episode: 4981
Episode: 4982
SOLVED! Steps: 53 {44}
Episode: 4983
SOLVED! Steps: 1 {45}
Episode: 4984
SOLVED! Steps: 2 {46}
Episode: 4985
Episode: 4986
Episode: 4987
Episode: 4988
SOLVED! Steps: 3 {47}
Episode: 4989
SOLVED! Steps: 3 {48}
Episode: 4990
SOLVED! Steps: 1 {49}
Episode: 4991
Episode: 4992
Episode: 4993
SOLVED! Steps: 3 {50}
Episode: 4994
SOLVED! Steps: 3 {51}
Episode: 4995
Episode: 4996
SOLVED! Steps: 3 {52}
Episode: 4997
SOLVED! Steps: 3 {53}
Episode: 4998
SOLVED! Steps: 1 {54}
Episode: 4999
**************************************************
[57, 48, 52, 50, 45, 53, 45, 55, 55, 54]
**************************************************
Episode: 5000
SOLVED! Steps: 3 {1}
Episode: 5001
SOLVED! Steps: 3 {2}
Episode: 

SOLVED! Steps: 2 {36}
Episode: 5272
SOLVED! Steps: 2 {37}
Episode: 5273
SOLVED! Steps: 2 {38}
Episode: 5274
SOLVED! Steps: 3 {39}
Episode: 5275
SOLVED! Steps: 3 {40}
Episode: 5276
SOLVED! Steps: 1 {41}
Episode: 5277
Episode: 5278
Episode: 5279
Episode: 5280
SOLVED! Steps: 2 {42}
Episode: 5281
Episode: 5282
Episode: 5283
SOLVED! Steps: 1 {43}
Episode: 5284
SOLVED! Steps: 2 {44}
Episode: 5285
Episode: 5286
Episode: 5287
SOLVED! Steps: 1 {45}
Episode: 5288
SOLVED! Steps: 3 {46}
Episode: 5289
SOLVED! Steps: 3 {47}
Episode: 5290
SOLVED! Steps: 1 {48}
Episode: 5291
Episode: 5292
Episode: 5293
SOLVED! Steps: 3 {49}
Episode: 5294
SOLVED! Steps: 23 {50}
Episode: 5295
Episode: 5296
SOLVED! Steps: 3 {51}
Episode: 5297
SOLVED! Steps: 3 {52}
Episode: 5298
SOLVED! Steps: 1 {53}
Episode: 5299
**************************************************
[50, 45, 53, 45, 55, 55, 54, 55, 56, 53]
**************************************************
Episode: 5300
SOLVED! Steps: 3 {1}
Episode: 5301
SOLVED! Steps: 3 {2

SOLVED! Steps: 2 {36}
Episode: 5573
SOLVED! Steps: 2 {37}
Episode: 5574
SOLVED! Steps: 3 {38}
Episode: 5575
SOLVED! Steps: 3 {39}
Episode: 5576
SOLVED! Steps: 1 {40}
Episode: 5577
Episode: 5578
SOLVED! Steps: 15 {41}
Episode: 5579
SOLVED! Steps: 52 {42}
Episode: 5580
SOLVED! Steps: 2 {43}
Episode: 5581
Episode: 5582
SOLVED! Steps: 86 {44}
Episode: 5583
SOLVED! Steps: 1 {45}
Episode: 5584
SOLVED! Steps: 2 {46}
Episode: 5585
Episode: 5586
Episode: 5587
SOLVED! Steps: 1 {47}
Episode: 5588
SOLVED! Steps: 3 {48}
Episode: 5589
SOLVED! Steps: 3 {49}
Episode: 5590
SOLVED! Steps: 1 {50}
Episode: 5591
SOLVED! Steps: 3 {51}
Episode: 5592
SOLVED! Steps: 3 {52}
Episode: 5593
SOLVED! Steps: 3 {53}
Episode: 5594
SOLVED! Steps: 3 {54}
Episode: 5595
Episode: 5596
SOLVED! Steps: 3 {55}
Episode: 5597
SOLVED! Steps: 3 {56}
Episode: 5598
SOLVED! Steps: 35 {57}
Episode: 5599
SOLVED! Steps: 3 {58}
**************************************************
[45, 55, 55, 54, 55, 56, 53, 53, 57, 58]
********************

Episode: 5869
Episode: 5870
SOLVED! Steps: 3 {34}
Episode: 5871
SOLVED! Steps: 2 {35}
Episode: 5872
SOLVED! Steps: 2 {36}
Episode: 5873
SOLVED! Steps: 2 {37}
Episode: 5874
SOLVED! Steps: 8 {38}
Episode: 5875
SOLVED! Steps: 3 {39}
Episode: 5876
SOLVED! Steps: 1 {40}
Episode: 5877
Episode: 5878
Episode: 5879
SOLVED! Steps: 78 {41}
Episode: 5880
SOLVED! Steps: 2 {42}
Episode: 5881
Episode: 5882
Episode: 5883
SOLVED! Steps: 1 {43}
Episode: 5884
SOLVED! Steps: 2 {44}
Episode: 5885
Episode: 5886
Episode: 5887
SOLVED! Steps: 1 {45}
Episode: 5888
SOLVED! Steps: 3 {46}
Episode: 5889
SOLVED! Steps: 3 {47}
Episode: 5890
SOLVED! Steps: 1 {48}
Episode: 5891
SOLVED! Steps: 3 {49}
Episode: 5892
SOLVED! Steps: 3 {50}
Episode: 5893
SOLVED! Steps: 3 {51}
Episode: 5894
SOLVED! Steps: 3 {52}
Episode: 5895
Episode: 5896
SOLVED! Steps: 3 {53}
Episode: 5897
SOLVED! Steps: 3 {54}
Episode: 5898
SOLVED! Steps: 35 {55}
Episode: 5899
SOLVED! Steps: 3 {56}
**************************************************
[54, 55

Episode: 6166
Episode: 6167
Episode: 6168
Episode: 6169
Episode: 6170
SOLVED! Steps: 3 {30}
Episode: 6171
SOLVED! Steps: 53 {31}
Episode: 6172
SOLVED! Steps: 2 {32}
Episode: 6173
SOLVED! Steps: 2 {33}
Episode: 6174
SOLVED! Steps: 3 {34}
Episode: 6175
SOLVED! Steps: 3 {35}
Episode: 6176
SOLVED! Steps: 7 {36}
Episode: 6177
Episode: 6178
Episode: 6179
SOLVED! Steps: 98 {37}
Episode: 6180
SOLVED! Steps: 2 {38}
Episode: 6181
Episode: 6182
Episode: 6183
SOLVED! Steps: 1 {39}
Episode: 6184
SOLVED! Steps: 2 {40}
Episode: 6185
Episode: 6186
Episode: 6187
SOLVED! Steps: 1 {41}
Episode: 6188
SOLVED! Steps: 3 {42}
Episode: 6189
SOLVED! Steps: 3 {43}
Episode: 6190
SOLVED! Steps: 1 {44}
Episode: 6191
SOLVED! Steps: 3 {45}
Episode: 6192
SOLVED! Steps: 3 {46}
Episode: 6193
SOLVED! Steps: 3 {47}
Episode: 6194
SOLVED! Steps: 3 {48}
Episode: 6195
Episode: 6196
SOLVED! Steps: 3 {49}
Episode: 6197
SOLVED! Steps: 3 {50}
Episode: 6198
SOLVED! Steps: 1 {51}
Episode: 6199
SOLVED! Steps: 3 {52}
****************

SOLVED! Steps: 2 {32}
Episode: 6472
Episode: 6473
SOLVED! Steps: 2 {33}
Episode: 6474
Episode: 6475
SOLVED! Steps: 3 {34}
Episode: 6476
SOLVED! Steps: 1 {35}
Episode: 6477
Episode: 6478
SOLVED! Steps: 23 {36}
Episode: 6479
Episode: 6480
SOLVED! Steps: 2 {37}
Episode: 6481
Episode: 6482
SOLVED! Steps: 68 {38}
Episode: 6483
SOLVED! Steps: 1 {39}
Episode: 6484
SOLVED! Steps: 2 {40}
Episode: 6485
Episode: 6486
Episode: 6487
SOLVED! Steps: 1 {41}
Episode: 6488
SOLVED! Steps: 3 {42}
Episode: 6489
SOLVED! Steps: 3 {43}
Episode: 6490
SOLVED! Steps: 1 {44}
Episode: 6491
SOLVED! Steps: 3 {45}
Episode: 6492
SOLVED! Steps: 3 {46}
Episode: 6493
SOLVED! Steps: 3 {47}
Episode: 6494
SOLVED! Steps: 3 {48}
Episode: 6495
Episode: 6496
SOLVED! Steps: 3 {49}
Episode: 6497
SOLVED! Steps: 3 {50}
Episode: 6498
SOLVED! Steps: 1 {51}
Episode: 6499
SOLVED! Steps: 3 {52}
**************************************************
[58, 52, 57, 56, 57, 56, 52, 52, 53, 52]
**************************************************
E

SOLVED! Steps: 3 {36}
Episode: 6775
SOLVED! Steps: 3 {37}
Episode: 6776
SOLVED! Steps: 28 {38}
Episode: 6777
Episode: 6778
Episode: 6779
SOLVED! Steps: 23 {39}
Episode: 6780
SOLVED! Steps: 2 {40}
Episode: 6781
Episode: 6782
Episode: 6783
SOLVED! Steps: 1 {41}
Episode: 6784
SOLVED! Steps: 2 {42}
Episode: 6785
Episode: 6786


KeyboardInterrupt: 

## Test Generalization

#### Learned Policy

In [12]:
agent.epsilon = 0.0
num_solved = 0

for t in range(100):    
    random.seed(t+100)
    sok = PushAndPullSokobanEnv(dim_room=(7, 7), num_boxes=1)
    sok.set_maxsteps(100)
    
    steps = 0
        
    state = sok.get_image('rgb_array')
    done = False
    while not done:
        steps += 1
        action = agent.act(process_frame(state))

        if action < 4:
            action += 1
        else:
            action += 5

        state, reward, done, info = sok.step(action)
        
    solved = False
    if 3 in sok.room_state:
        solved = True
        num_solved += 1
    
    print("Puzzle # %d: %d (%s) [%d/%d]" % ((t+1), steps, "YES" if solved else "NOT", num_solved, (t+1)))

Puzzle # 1: 100 (NOT) [0/1]
Puzzle # 2: 2 (YES) [1/2]
Puzzle # 3: 100 (NOT) [1/3]
Puzzle # 4: 100 (NOT) [1/4]
Puzzle # 5: 100 (NOT) [1/5]
Puzzle # 6: 3 (YES) [2/6]
Puzzle # 7: 3 (YES) [3/7]
Puzzle # 8: 2 (YES) [4/8]
Puzzle # 9: 100 (NOT) [4/9]
Puzzle # 10: 100 (NOT) [4/10]
Puzzle # 11: 100 (NOT) [4/11]
Puzzle # 12: 100 (NOT) [4/12]
Puzzle # 13: 100 (NOT) [4/13]
Puzzle # 14: 3 (YES) [5/14]
Puzzle # 15: 100 (NOT) [5/15]
Puzzle # 16: 100 (NOT) [5/16]
Puzzle # 17: 3 (YES) [6/17]
Puzzle # 18: 100 (NOT) [6/18]
Puzzle # 19: 100 (NOT) [6/19]
Puzzle # 20: 3 (YES) [7/20]
Puzzle # 21: 3 (YES) [8/21]
Puzzle # 22: 3 (YES) [9/22]
Puzzle # 23: 1 (YES) [10/23]
Puzzle # 24: 100 (NOT) [10/24]
Puzzle # 25: 100 (NOT) [10/25]
Puzzle # 26: 100 (NOT) [10/26]
Puzzle # 27: 100 (NOT) [10/27]
Puzzle # 28: 100 (NOT) [10/28]
Puzzle # 29: 100 (NOT) [10/29]
Puzzle # 30: 1 (YES) [11/30]
Puzzle # 31: 100 (NOT) [11/31]
Puzzle # 32: 3 (YES) [12/32]
Puzzle # 33: 3 (YES) [13/33]
Puzzle # 34: 100 (NOT) [13/34]
Puzzle # 35:

#### Random Policy

In [13]:
agent.epsilon = 1.0
num_solved = 0

for t in range(100):    
    random.seed(t+100)
    sok = PushAndPullSokobanEnv(dim_room=(7, 7), num_boxes=1)
    sok.set_maxsteps(100)
    
    steps = 0
        
    state = sok.get_image('rgb_array')
    done = False
    while not done:
        steps += 1
        action = agent.act(process_frame(state))

        if action < 4:
            action += 1
        else:
            action += 5

        state, reward, done, info = sok.step(action)
        
    solved = False
    if 3 in sok.room_state:
        solved = True
        num_solved += 1
    
    print("Puzzle # %d: %d (%s) [%d/%d]" % ((t+1), steps, "YES" if solved else "NOT", num_solved, (t+1)))

Puzzle # 1: 100 (NOT) [0/1]
Puzzle # 2: 100 (NOT) [0/2]
Puzzle # 3: 100 (NOT) [0/3]
Puzzle # 4: 100 (NOT) [0/4]
Puzzle # 5: 100 (NOT) [0/5]
Puzzle # 6: 100 (NOT) [0/6]
Puzzle # 7: 100 (NOT) [0/7]
Puzzle # 8: 46 (YES) [1/8]
Puzzle # 9: 26 (YES) [2/9]
Puzzle # 10: 100 (NOT) [2/10]
Puzzle # 11: 9 (YES) [3/11]
Puzzle # 12: 100 (NOT) [3/12]
Puzzle # 13: 100 (NOT) [3/13]
Puzzle # 14: 100 (NOT) [3/14]
Puzzle # 15: 1 (YES) [4/15]
Puzzle # 16: 100 (NOT) [4/16]
Puzzle # 17: 100 (NOT) [4/17]
Puzzle # 18: 100 (NOT) [4/18]
Puzzle # 19: 89 (YES) [5/19]
Puzzle # 20: 10 (YES) [6/20]
Puzzle # 21: 100 (NOT) [6/21]
Puzzle # 22: 4 (YES) [7/22]
Puzzle # 23: 100 (NOT) [7/23]
Puzzle # 24: 100 (NOT) [7/24]
Puzzle # 25: 100 (NOT) [7/25]
Puzzle # 26: 100 (NOT) [7/26]
Puzzle # 27: 100 (NOT) [7/27]
Puzzle # 28: 100 (NOT) [7/28]
Puzzle # 29: 100 (NOT) [7/29]
Puzzle # 30: 11 (YES) [8/30]
Puzzle # 31: 100 (NOT) [8/31]
Puzzle # 32: 100 (NOT) [8/32]
Puzzle # 33: 100 (NOT) [8/33]
Puzzle # 34: 100 (NOT) [8/34]
Puzzle # 