# Final Project - Reinforcements Learning 
Hello dear students,<br> this is the template notebook. Please click on the "File" tab and then on "Save a copy into drive".

---
<br>

### Name and ID:
Student 1: Avraham Raviv, 204355390
<br>
Student 2: Yevgeni Berkovitch, 317079234
<br><br>
<img src="https://play-lh.googleusercontent.com/e_oKlKPISbgdzut1H9opevS7-LTB8-8lsmpCdMkhlnqFenZhpjxbLmx7l158-xQQCIY">

### https://github.com/mpSchrader/gym-sokoban

# Installs

In [1]:
%%capture
!sudo apt-get update
!sudo apt-get install -y xvfb ffmpeg freeglut3-dev
!pip install 'imageio==2.4.0'
!pip install gym
!pip install pygame
!apt-get install python-opengl -y
!apt install xvfb -y
!pip install pyvirtualdisplay
!pip install piglet
!pip install gym
!apt-get install python-opengl -y
!apt install xvfb -y
!pip install gym_sokoban

!imageio_download_bin ffmpeg

# Imports

In [2]:
import random
import time

import numpy as np
import matplotlib.pyplot as plt

import base64
import imageio
from pyvirtualdisplay import Display
from IPython.display import HTML

import gym
from gym import error, spaces, utils
from soko_pap import *

from collections import deque
from queue import PriorityQueue

from keras.models import Sequential
from keras.layers import Dense

In [3]:
%matplotlib inline

In [4]:
imageio.plugins.ffmpeg.download()

In [5]:
from gym import logger as gymlogger
gymlogger.set_level(40) # error only

# Display utils
The cell below contains the video display configuration. No need to make changes here.

In [6]:
def embed_mp4(filename):
    """Embeds an mp4 file in the notebook."""
    video = open(filename,'rb').read()
    b64 = base64.b64encode(video)
    tag = '''
    <video width="640" height="480" controls>
    <source src="data:video/mp4;base64,{0}" type="video/mp4">
    Your browser does not support the video tag.
    </video>'''.format(b64.decode())

    return HTML(tag)

# Utils

In [7]:
def get_distances(room_state):
    for i in range(room_state.shape[0]):
        for j in range(room_state.shape[1]):
            if room_state[i][j] == 2:
                target = (i, j)

    distances = np.zeros(shape=room_state.shape)
    visited_cells = set()
    cell_queue = deque()

    visited_cells.add(target)
    cell_queue.appendleft(target)

    while len(cell_queue) != 0:
        cell = cell_queue.pop()
        distance = distances[cell[0]][cell[1]]
        for x,y in ((1,0), (-1,-0), (0,1), (0,-1)):
            next_cell_x, next_cell_y = cell[0]+x, cell[1]+y
            if room_state[next_cell_x][next_cell_y] != 0 and not (next_cell_x, next_cell_y) in visited_cells:
                distances[next_cell_x][next_cell_y] = distance + 1
                visited_cells.add((next_cell_x, next_cell_y))
                cell_queue.appendleft((next_cell_x, next_cell_y))
                
    return distances   

def calc_distances(room_state, distances):
    box = None
    mover = None
    for i in range(room_state.shape[0]):
        for j in range(room_state.shape[1]):            
            if room_state[i][j] == 4:
                box = (i,j)
            
            if room_state[i][j] == 5:
                mover = (i,j)
    
    return mover, box, distances[box[0]][box[1]]   

def box2target_change_reward(room_state, next_room_state, distances):
    mover, box, t2b = calc_distances(room_state, distances)
    n_mover, n_box, n_t2b = calc_distances(next_room_state, distances)
    
    change_reward = 0.0
    if n_t2b < t2b:
        change_reward += 1.0
    elif n_t2b > t2b:
        change_reward -= 1.0
        
    m2b = np.sqrt((mover[0]-box[0])**2 + (mover[1]-box[1])**2)
    n_m2b = np.sqrt((n_mover[0]-n_box[0])**2 + (n_mover[1]-n_box[1])**2)
    
    if n_m2b < m2b and m2b >= 2:
        change_reward += 0.25
    elif n_m2b > m2b and n_m2b >= 2:
        change_reward -= 0.25
        
    return change_reward   

# Solution

In [8]:
class SOK_Agent:
    def __init__(self):
        # Construct DQN models
        self.state_size = (25,) 
        self.action_size = 8
        self.model = self._build_model()
        self.target_model = self._build_model()
        self.target_model.set_weights(self.model.get_weights())
        self.batch_size = 8
        
        # Replay buffers
        self.replay_buffer = deque(maxlen=1500)
        self.prioritized_replay_buffer = deque(maxlen=500)
        self.prioritized_replay_batch = 50        
        
        # Hyperparameters
        self.gamma = 0.9
        self.epsilon = 1.0   
        self.epsilon_min = 0.3
        self.epsilon_decay = 0.995
        self.epsilon_update_rate = 10
        self.replay_rate = 10
        self.update_beta = 0.99

        self.verbosity = 100 

    def _build_model(self):
        model = Sequential()
        model.add(Dense(512, input_shape=self.state_size, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer="adam")        
        return model

    def remember(self, state, action, reward, next_state, done):
        self.replay_buffer.append([state, action, reward, next_state, done])    
        
    def copy_to_prioritized_buffer(self, n):
        for i in range(n):
            self.prioritized_replay_buffer.append(self.replay_buffer[-1-i])  

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        
        act_values = self.model.predict(state, verbose=0)
        return np.argmax(act_values[0]) 

    def replay(self):        
        minibatch = random.sample(self.replay_buffer, self.batch_size//2) 
        minibatch.extend(random.sample(self.prioritized_replay_buffer, self.batch_size//2))
        
        states = np.zeros((self.batch_size, self.state_size[0]))
        actions = np.zeros(self.batch_size, dtype=int)
        rewards = np.zeros(self.batch_size)
        next_states = np.zeros((self.batch_size, self.state_size[0]))
        statuses = np.zeros(self.batch_size)
        targets = np.zeros((self.batch_size, self.action_size)) 
        
        for i, (state, action, reward, next_state, done) in enumerate(minibatch): 
            states[i] = state.copy()
            actions[i] = action
            rewards[i] = reward
            next_states[i] = next_state.copy()
            statuses[i] = 1 if done else 0    
        
        targets = self.model.predict(states) 
        max_actions = np.argmax(self.model.predict(next_states), axis=1)
        next_rewards = self.target_model.predict(next_states)
        
        ind = 0
        for action, reward, next_reward, max_action, done in zip(actions, rewards, next_rewards, max_actions, statuses):  
            if not done:
                reward += self.gamma * next_reward[max_action]
            targets[ind][action] = reward
            ind += 1
        
        self.model.fit(states, targets, epochs=10, verbose=0) 
        
    def update_epsilon(self):
        if self.epsilon > self.epsilon_min:
            self.epsilon = self.epsilon * self.epsilon_decay
        
    def update_target_model(self):
        model_w = self.model.get_weights()
        target_model_w = self.target_model.get_weights()
        updated_target_model_w = []
        for i in range(len(model_w)):
            updated_target_model_w.append(self.update_beta*target_model_w[i] + (1-self.update_beta)*model_w[i])
        self.target_model.set_weights(updated_target_model_w)    
            
    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)

In [9]:
def process_frame(frame):
    f = frame[16:96, 16:96, 0]   
    f = f.reshape(5, 16, 5, 16).max(axis=(1, 3))
    f = f.flatten()
    f = f / 255
    return np.expand_dims(f, axis=0)

## Training

In [10]:
max_episodes = 10000
max_steps = 100

def init_sok(r):
    random.seed(r%10)
    sok = PushAndPullSokobanEnv(dim_room=(7, 7), num_boxes=1)
    sok.set_maxsteps(max_steps)
    return sok

In [11]:
agent = SOK_Agent()
successes_before_train = 20
successful_episodes = 0
continuous_successes_goal = 100
continuous_successes = 0

steps_per_episode = []

for e in range(max_episodes):
    if continuous_successes >= continuous_successes_goal:
        print("Agent training finished!")
        break
    
    print("Episode: %d" % (e))
    
    sok = init_sok(e)
    state = process_frame(sok.get_image('rgb_array'))
    random.seed(e)
    
    room_state = sok.room_state.copy() 
    distances = get_distances(room_state)
    
    for step in range(sok.max_steps):
        action = agent.act(state)
        if action < 4:
            next_state, reward, done, _ = sok.step(action+1) 
        else:
            next_state, reward, done, _ = sok.step(action+5)         
        
        next_state = process_frame(next_state)        
        next_room_state = sok.room_state
        
        if not done:
            reward += box2target_change_reward(room_state, next_room_state, distances)
        
        agent.remember(state, action, reward, next_state, done)
        
        state = next_state.copy() 
        room_state = next_room_state.copy()
                
        if successful_episodes >= successes_before_train:
            if (step+1) % agent.replay_rate == 0:
                agent.replay() 
                agent.update_target_model()
                agent.update_epsilon()
        
        if done:            
            if 3 in sok.room_state:
                successful_episodes += 1
                continuous_successes += 1
                print("SOLVED! Episode %d Steps: %d Epsilon %.4f" % (e, step+1, agent.epsilon)) 
                agent.copy_to_prioritized_buffer(min(agent.prioritized_replay_batch, step+1))
            else:
                continuous_successes = 0
                
            steps_per_episode.append(step+1)
            #agent.save("exp1_episode%d.h5" % (e))
            
            break

Episode: 0
Episode: 1
Episode: 2
Episode: 3
SOLVED! Episode 3 Steps: 54 Epsilon 1.0000
Episode: 4
SOLVED! Episode 4 Steps: 1 Epsilon 1.0000
Episode: 5
SOLVED! Episode 5 Steps: 28 Epsilon 1.0000
Episode: 6
Episode: 7
SOLVED! Episode 7 Steps: 25 Epsilon 1.0000
Episode: 8
Episode: 9
SOLVED! Episode 9 Steps: 74 Epsilon 1.0000
Episode: 10
Episode: 11
SOLVED! Episode 11 Steps: 53 Epsilon 1.0000
Episode: 12
Episode: 13
Episode: 14
SOLVED! Episode 14 Steps: 2 Epsilon 1.0000
Episode: 15
Episode: 16
Episode: 17
Episode: 18
SOLVED! Episode 18 Steps: 6 Epsilon 1.0000
Episode: 19
Episode: 20
Episode: 21
Episode: 22
Episode: 23
SOLVED! Episode 23 Steps: 2 Epsilon 1.0000
Episode: 24
SOLVED! Episode 24 Steps: 3 Epsilon 1.0000
Episode: 25
Episode: 26
SOLVED! Episode 26 Steps: 44 Epsilon 1.0000
Episode: 27
SOLVED! Episode 27 Steps: 41 Epsilon 1.0000
Episode: 28
SOLVED! Episode 28 Steps: 9 Epsilon 1.0000
Episode: 29
Episode: 30
Episode: 31
Episode: 32
Episode: 33
Episode: 34
Episode: 35
Episode: 36
Episo

SOLVED! Episode 231 Steps: 3 Epsilon 0.2988
Episode: 232
Episode: 233
SOLVED! Episode 233 Steps: 1 Epsilon 0.2988
Episode: 234
SOLVED! Episode 234 Steps: 1 Epsilon 0.2988
Episode: 235
Episode: 236
SOLVED! Episode 236 Steps: 27 Epsilon 0.2988
Episode: 237
SOLVED! Episode 237 Steps: 3 Epsilon 0.2988
Episode: 238
SOLVED! Episode 238 Steps: 2 Epsilon 0.2988
Episode: 239
Episode: 240
SOLVED! Episode 240 Steps: 3 Epsilon 0.2988
Episode: 241
SOLVED! Episode 241 Steps: 3 Epsilon 0.2988
Episode: 242
Episode: 243
SOLVED! Episode 243 Steps: 1 Epsilon 0.2988
Episode: 244
SOLVED! Episode 244 Steps: 1 Epsilon 0.2988
Episode: 245
SOLVED! Episode 245 Steps: 74 Epsilon 0.2988
Episode: 246
SOLVED! Episode 246 Steps: 17 Epsilon 0.2988
Episode: 247
SOLVED! Episode 247 Steps: 3 Epsilon 0.2988
Episode: 248
SOLVED! Episode 248 Steps: 2 Epsilon 0.2988
Episode: 249
Episode: 250
SOLVED! Episode 250 Steps: 3 Epsilon 0.2988
Episode: 251
SOLVED! Episode 251 Steps: 3 Epsilon 0.2988
Episode: 252
Episode: 253
SOLVED!

Episode: 413
SOLVED! Episode 413 Steps: 1 Epsilon 0.2988
Episode: 414
SOLVED! Episode 414 Steps: 1 Epsilon 0.2988
Episode: 415
SOLVED! Episode 415 Steps: 7 Epsilon 0.2988
Episode: 416
SOLVED! Episode 416 Steps: 44 Epsilon 0.2988
Episode: 417
SOLVED! Episode 417 Steps: 3 Epsilon 0.2988
Episode: 418
SOLVED! Episode 418 Steps: 2 Epsilon 0.2988
Episode: 419
Episode: 420
SOLVED! Episode 420 Steps: 3 Epsilon 0.2988
Episode: 421
SOLVED! Episode 421 Steps: 3 Epsilon 0.2988
Episode: 422
Episode: 423
SOLVED! Episode 423 Steps: 1 Epsilon 0.2988
Episode: 424
SOLVED! Episode 424 Steps: 1 Epsilon 0.2988
Episode: 425
SOLVED! Episode 425 Steps: 7 Epsilon 0.2988
Episode: 426
SOLVED! Episode 426 Steps: 6 Epsilon 0.2988
Episode: 427
SOLVED! Episode 427 Steps: 3 Epsilon 0.2988
Episode: 428
SOLVED! Episode 428 Steps: 2 Epsilon 0.2988
Episode: 429
Episode: 430
SOLVED! Episode 430 Steps: 3 Epsilon 0.2988
Episode: 431
SOLVED! Episode 431 Steps: 3 Epsilon 0.2988
Episode: 432
Episode: 433
SOLVED! Episode 433 St

SOLVED! Episode 586 Steps: 6 Epsilon 0.2988
Episode: 587
SOLVED! Episode 587 Steps: 3 Epsilon 0.2988
Episode: 588
SOLVED! Episode 588 Steps: 2 Epsilon 0.2988
Episode: 589
Episode: 590
SOLVED! Episode 590 Steps: 3 Epsilon 0.2988
Episode: 591
SOLVED! Episode 591 Steps: 3 Epsilon 0.2988
Episode: 592
Episode: 593
SOLVED! Episode 593 Steps: 1 Epsilon 0.2988
Episode: 594
SOLVED! Episode 594 Steps: 1 Epsilon 0.2988
Episode: 595
SOLVED! Episode 595 Steps: 22 Epsilon 0.2988
Episode: 596
SOLVED! Episode 596 Steps: 6 Epsilon 0.2988
Episode: 597
SOLVED! Episode 597 Steps: 3 Epsilon 0.2988
Episode: 598
SOLVED! Episode 598 Steps: 2 Epsilon 0.2988
Episode: 599
Episode: 600
SOLVED! Episode 600 Steps: 3 Epsilon 0.2988
Episode: 601
SOLVED! Episode 601 Steps: 3 Epsilon 0.2988
Episode: 602
Episode: 603
SOLVED! Episode 603 Steps: 1 Epsilon 0.2988
Episode: 604
SOLVED! Episode 604 Steps: 1 Epsilon 0.2988
Episode: 605
SOLVED! Episode 605 Steps: 7 Epsilon 0.2988
Episode: 606
SOLVED! Episode 606 Steps: 6 Epsilo

SOLVED! Episode 756 Steps: 6 Epsilon 0.2988
Episode: 757
SOLVED! Episode 757 Steps: 3 Epsilon 0.2988
Episode: 758
SOLVED! Episode 758 Steps: 2 Epsilon 0.2988
Episode: 759
Episode: 760
SOLVED! Episode 760 Steps: 3 Epsilon 0.2988
Episode: 761
SOLVED! Episode 761 Steps: 3 Epsilon 0.2988
Episode: 762
Episode: 763
SOLVED! Episode 763 Steps: 1 Epsilon 0.2988
Episode: 764
SOLVED! Episode 764 Steps: 1 Epsilon 0.2988
Episode: 765
SOLVED! Episode 765 Steps: 7 Epsilon 0.2988
Episode: 766
SOLVED! Episode 766 Steps: 6 Epsilon 0.2988
Episode: 767
SOLVED! Episode 767 Steps: 3 Epsilon 0.2988
Episode: 768
SOLVED! Episode 768 Steps: 2 Epsilon 0.2988
Episode: 769
Episode: 770
SOLVED! Episode 770 Steps: 3 Epsilon 0.2988
Episode: 771
SOLVED! Episode 771 Steps: 3 Epsilon 0.2988
Episode: 772
Episode: 773
SOLVED! Episode 773 Steps: 1 Epsilon 0.2988
Episode: 774
SOLVED! Episode 774 Steps: 1 Epsilon 0.2988
Episode: 775
SOLVED! Episode 775 Steps: 7 Epsilon 0.2988
Episode: 776
SOLVED! Episode 776 Steps: 6 Epsilon

Episode: 930
SOLVED! Episode 930 Steps: 3 Epsilon 0.2988
Episode: 931
SOLVED! Episode 931 Steps: 3 Epsilon 0.2988
Episode: 932
Episode: 933
SOLVED! Episode 933 Steps: 1 Epsilon 0.2988
Episode: 934
SOLVED! Episode 934 Steps: 1 Epsilon 0.2988
Episode: 935
Episode: 936
Episode: 937
SOLVED! Episode 937 Steps: 3 Epsilon 0.2988
Episode: 938
SOLVED! Episode 938 Steps: 2 Epsilon 0.2988
Episode: 939
Episode: 940
SOLVED! Episode 940 Steps: 3 Epsilon 0.2988
Episode: 941
SOLVED! Episode 941 Steps: 3 Epsilon 0.2988
Episode: 942
Episode: 943
SOLVED! Episode 943 Steps: 1 Epsilon 0.2988
Episode: 944
SOLVED! Episode 944 Steps: 1 Epsilon 0.2988
Episode: 945
SOLVED! Episode 945 Steps: 93 Epsilon 0.2988
Episode: 946
SOLVED! Episode 946 Steps: 16 Epsilon 0.2988
Episode: 947
SOLVED! Episode 947 Steps: 3 Epsilon 0.2988
Episode: 948
SOLVED! Episode 948 Steps: 2 Epsilon 0.2988
Episode: 949
Episode: 950
SOLVED! Episode 950 Steps: 3 Epsilon 0.2988
Episode: 951
SOLVED! Episode 951 Steps: 3 Epsilon 0.2988
Episode:

SOLVED! Episode 1098 Steps: 2 Epsilon 0.2988
Episode: 1099
Episode: 1100
SOLVED! Episode 1100 Steps: 3 Epsilon 0.2988
Episode: 1101
SOLVED! Episode 1101 Steps: 3 Epsilon 0.2988
Episode: 1102
Episode: 1103
SOLVED! Episode 1103 Steps: 1 Epsilon 0.2988
Episode: 1104
SOLVED! Episode 1104 Steps: 1 Epsilon 0.2988
Episode: 1105
SOLVED! Episode 1105 Steps: 7 Epsilon 0.2988
Episode: 1106
SOLVED! Episode 1106 Steps: 6 Epsilon 0.2988
Episode: 1107
SOLVED! Episode 1107 Steps: 3 Epsilon 0.2988
Episode: 1108
SOLVED! Episode 1108 Steps: 2 Epsilon 0.2988
Episode: 1109
Episode: 1110
SOLVED! Episode 1110 Steps: 3 Epsilon 0.2988
Episode: 1111
SOLVED! Episode 1111 Steps: 3 Epsilon 0.2988
Episode: 1112
Episode: 1113
SOLVED! Episode 1113 Steps: 1 Epsilon 0.2988
Episode: 1114
SOLVED! Episode 1114 Steps: 1 Epsilon 0.2988
Episode: 1115
SOLVED! Episode 1115 Steps: 7 Epsilon 0.2988
Episode: 1116
SOLVED! Episode 1116 Steps: 6 Epsilon 0.2988
Episode: 1117
SOLVED! Episode 1117 Steps: 3 Epsilon 0.2988
Episode: 1118


Episode: 1266
Episode: 1267
SOLVED! Episode 1267 Steps: 3 Epsilon 0.2988
Episode: 1268
SOLVED! Episode 1268 Steps: 2 Epsilon 0.2988
Episode: 1269
Episode: 1270
SOLVED! Episode 1270 Steps: 3 Epsilon 0.2988
Episode: 1271
SOLVED! Episode 1271 Steps: 3 Epsilon 0.2988
Episode: 1272
Episode: 1273
SOLVED! Episode 1273 Steps: 1 Epsilon 0.2988
Episode: 1274
SOLVED! Episode 1274 Steps: 1 Epsilon 0.2988
Episode: 1275
SOLVED! Episode 1275 Steps: 18 Epsilon 0.2988
Episode: 1276
SOLVED! Episode 1276 Steps: 6 Epsilon 0.2988
Episode: 1277
SOLVED! Episode 1277 Steps: 3 Epsilon 0.2988
Episode: 1278
SOLVED! Episode 1278 Steps: 2 Epsilon 0.2988
Episode: 1279


KeyboardInterrupt: 