In [82]:
import numpy as np
import skimage
from PIL import Image
from skimage.measure import block_reduce
from skimage.color import colorconv
from matplotlib import pyplot as plt
from tqdm import tqdm_notebook as tqdm
import pandas as pd

In [78]:
def load_maze(path, cell_size):
    maze = skimage.io.imread(path, as_gray=True)
    maze = block_reduce(maze, cell_size, np.max)
    start = tuple(np.argwhere(maze*255==64)[0])
    finish = tuple(np.argwhere(maze*255==128)[0])
    maze[start] = 1.0
    maze[finish] = 1.0
    return maze, start, finish

In [37]:
def coordinate_to_number(coordinate, n_cols):
    return coordinate[0]*n_cols+coordinate[1]

In [119]:
class Agent:
    
    def __init__(self, maze_path, maze_cell_size):
        self.maze, self.start, self.finish = load_maze(maze_path, maze_cell_size)
        self.actionSpace = np.array([
            0, #move up
            1, #move down
            2, #move right
            3  #move left
        ])
        self.q_table = np.zeros((self.maze.size, self.actionSpace.size))
        
    def learn(self):
        
        epsilon = 0.1
        for i in tqdm(range(10)):
            self._reset()
            done = False
            
            while not done:
                actions = self.actionSpace
                
                # Get valid actions that actor can perform from current position
                if self.maze[self.pos] == 1.0:
                    upper_cell = (self.pos[0] - 1, self.pos[1])
                    lower_cell = (self.pos[0] + 1, self.pos[1])
                    right_cell = (self.pos[0], self.pos[1] + 1)
                    left_cell = (self.pos[0], self.pos[1] - 1)
                    
                    if self.maze[upper_cell] == 0.0:
                        actions = np.delete(actions, np.argwhere(actions == self.actionSpace[0]))
                    if self.maze[lower_cell] == 0.0:
                        actions = np.delete(actions, np.argwhere(actions == self.actionSpace[1]))
                    if self.maze[right_cell] == 0.0:
                        actions = np.delete(actions, np.argwhere(actions == self.actionSpace[2]))
                    if self.maze[left_cell] == 0.0:
                        actions = np.delete(actions, np.argwhere(actions == self.actionSpace[3]))
                        
                # Select action to perform
                if np.random.uniform() < epsilon:
                    action = np.random.choice(actions)
                else:
                    state_n = coordinate_to_number(self.pos, self.maze.shape[1])
                    action = actions[np.argmax(np.take(self.q_table[state_n], actions))]
                    
                done = self._step(action)
        
        
    def _reset(self):
        self.pos = self.start
    
    def _step(self, action):
        lr = 0.1
        gamma = 0.7
        
        reward = 10
        
        old_pos = self.pos
        if action == 0:
            self.pos = (self.pos[0]-1, self.pos[1])
        if action == 1:
            self.pos = (self.pos[0]+1, self.pos[1])
        if action == 2:
            self.pos = (self.pos[0], self.pos[1]+1)
        if action == 3:
            self.pos = (self.pos[0], self.pos[1]-1)
        
        state_old = coordinate_to_number(old_pos, self.maze.shape[1])
        state_new = coordinate_to_number(self.pos, self.maze.shape[1])
        
        self.q_table[(state_old, action)] = (1-lr) * self.q_table[(state_old, action)] + \
                                            lr * (reward + gamma * np.max(self.q_table[state_new]))
        
        done = self.pos == self.finish
        
        return done
        
    def qtable_to_pandas(self):
        df = pd.DataFrame({
            'up': self.q_table[:, 0],
            'down': self.q_table[:, 1],
            'right': self.q_table[:, 2],
            'left': self.q_table[:, 3],
        })
        df.index.name = 'state'
        return df
        
        

In [120]:
agent = Agent('maze2.png', (32, 32))

In [121]:
agent.qtable_to_pandas()

Unnamed: 0_level_0,up,down,right,left
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0


In [122]:
agent.learn()

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

KeyboardInterrupt: 

In [123]:
agent.qtable_to_pandas()

Unnamed: 0_level_0,up,down,right,left
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,0.000000,0.000000,0.000000,0.000000
1,0.000000,0.000000,0.000000,0.000000
2,0.000000,0.000000,0.000000,0.000000
3,0.000000,0.000000,0.000000,0.000000
4,0.000000,0.000000,0.000000,0.000000
5,0.000000,0.000000,0.000000,0.000000
6,0.000000,0.000000,0.000000,0.000000
7,0.000000,0.000000,0.000000,0.000000
8,0.000000,0.000000,0.000000,0.000000
9,0.000000,33.317754,21.434353,0.000000
