In [1]:
import numpy as np
from random import randint
import random


In [1]:
class Maze:
    '''
    This is the main class to create maze.
    '''
    def __init__(self,agent,rows=4,cols=4):
        '''
        rows--> No. of rows of the maze
        cols--> No. of columns of the maze
        Need to pass just the two arguments. The rest will be assigned automatically
        maze_map--> Will be set to a Dicationary. Keys will be cells and
                    values will be another dictionary with keys=['E','W','N','S'] for
                    East West North South and values will be 0 or 1. 0 means that 
                    direction(EWNS) is blocked. 1 means that direction is open.
        grid--> A list of all cells
        path--> Shortest path from start(bottom right) to goal(by default top left)
                It will be a dictionary
        _win,_cell_width,_canvas -->    _win and )canvas are for Tkinter window and canvas
                                        _cell_width is cell width calculated automatically
        _agents-->  A list of aganets on the maze
        markedCells-->  Will be used to mark some particular cell during
                        path trace by the agent.
        _
        '''
        self.rows=rows
        self.cols=cols
        self.ix = agent.ix ## pos of agent on rows 
        self.iy = agent.iy ## pos of agent on cols 
        self.eps = agent.eps ## eps determinated in agent class

    
        
        self.start= None
        self.end=None
        self.reward=0
        self.isFeasable = False ## If there exist a path between start and end point set the false because there is no keypoint at the beggening 
        
        self.maze_map = {}
        ### our matrix representation
        self.grid=[]
        ### path between Start - End point
        self.path_keys={}
             

        
    def __str__(self):
        """Return a (crude) string representation of the maze."""

        maze_rows = ['-' * self.rows * 2]
        for x in range(self.rows):
            maze_row = ['|']
            for y in range(self.rows):
                if x == 0 and y == 0:
                    maze_row.append('S')
                elif x == 3 and y == 3:
                    maze_row.append('E')
                elif x == 1 and y == 2:
                    maze_row.append('T')
                if not self.maze_map[x,y]['E']:
                    maze_row.append(' |')
                else:
                    maze_row.append('  ')
            maze_rows.append(''.join(maze_row))
            maze_row = ['|']
            for y in range(self.rows):
                if not self.maze_map[x,y]['N']:
                    maze_row.append('-+')
                else:
                    maze_row.append(' +')
            maze_rows.append(''.join(maze_row))
        
        return '\n'.join(maze_rows)
    
    @property
    def grid(self):
        return self._grid
    
    ### initialize our env with @property decorator
    @grid.setter        
    def grid(self,n):
        self._grid=[]
        for x in range(self.rows):
            for y in range(self.cols):
                self.grid.append((x,y))
                self.maze_map[x,y]={'E':0,'W':0,'N':0,'S':0}
        self.actions = ["openEast","openWest","openNorth","openSouth","goRight","goLeft","goUp",
                "goLeft","addStart","addEnd"]
        ### set dist between Start and End to None because there is no path at the beggening
        self.dist_SE=None
            
        self.len_actions = len(self.actions)
        ### first initiale state with all the walls closed
        self.state = hash(str(self.maze_map)+str(self.start)+str(self.end)+(str((self.ix,self.iy))))
        ### add our first state to our Q_hash
        self.Q_hash = {self.state:[0]*self.len_actions}
        ### add to our visisted_state
        self.visited_state = {self.state:0} 

    
    ### reset the env
    def reset(self):
        for x in range(self.rows):
            for y in range(self.rows):
                self.grid.append((x,y))
                self.maze_map[x,y]={'E':0,'W':0,'N':0,'S':0}
        self.start = None
        self.end = None
        self.ix , self.iy = agent.reset_agent()
        self.reward = 0
        # self.treasure = None 
        self.state = hash(str(self.maze_map)+str(self.start)+str(self.end)+(str((self.ix,self.iy))))
        self.visited_state = {self.state:0}
        
    def give_reward(self,state,action_index,prev_isFeasable):
        ## if there is a starting and ending point 
        if all((self.start,self.end)):
            ### give a +1 reward when the agent find a new state 
            if self.state not in self.visited_state.keys():
                self.reward += 10
            ### give a negative or postive rewards depending on the distance between starting and ending point
            if self.dist_SE:
                if self.dist_SE >= 4 and self.dist_SE <=8 :
                    self.reward += 10
                elif self.dist_SE >8 : 
                    self.reward +=20
                elif self.dist_SE < 4 and self.dist_SE >= 2 :
                    self.reward -= 1
                else :
                    self.reward -= 10
            
            ## big penalty if we close the path between starting and ending point 
            if prev_isFeasable == True and self.isFeasable == False :
                self.reward -= 10 
                
            ## big bonus if we open the path between starting and ending point 
            elif prev_isFeasable == False and self.isFeasable == True :
                self.reward += 10
            
        ### give + 1 if the agent add the starting when there is no starting point
        elif not self.start and self.actions[action_index] =="addStart" :
            self.reward += 3
            
        ### give + 1 if the agent add the starting when there is no starting point
        elif not self.end and self.actions[action_index] =="addEnd" :
            self.reward += 3
            
        reward = self.reward    
        self.reward = 0        
        return reward 
            
        
    def take_actions(self,eps):
        ## randomly chose an action with proba eps otherwise take the best action given state : self.state
        if np.random.random() < eps : 
            return np.random.randint(self.len_actions)
        else : 
            return np.argmax(self.Q_hash[self.state])
            
    ### Update state with respect to action_index then get the state from :str(self.maze_map)+str(self.start)+str(self.end), and stock his hash 
    ### self.state hash(str(self.maze_map)+str(self.start)+str(self.end))
    ### if it's a new state we add it on our Q_hash and then we initialize self.Q_hash [self.state] = [0]*number of possible actions 
    ### and we add self.state in our visited_state dictionary 
    def update_states(self,action_index):
        if self.actions[action_index] == "openEast" :
            self._Open_East()
            
        elif self.actions[action_index] == "openWest" :
            self._Open_West()
            
        elif self.actions[action_index] == "openNorth" :
            self._Open_North()
            
        elif self.actions[action_index] == "openSouth" :
            self._Open_South()
            
        elif self.actions[action_index] == "goRight" :
            self._Right()
            
        elif self.actions[action_index] == "goLeft" :
            self._Left()
            
        elif self.actions[action_index] == "goUp" :
            self._Up()
            
        elif self.actions[action_index] == "goDown" :
            self._Down()
            
        elif self.actions[action_index] == "addStart" :
            self._Add_Start()
            
        elif self.actions[action_index] == "addEnd" :
            self._Add_End()
            
        self.state = hash(str(self.maze_map)+str(self.start)+str(self.end)+(str((self.ix,self.iy))))
        ### If it's a new state add it on our Q_hash
        if not self.state in self.Q_hash.keys():
            self.Q_hash[self.state] = [0]*self.len_actions
        self.visited_state[self.state] = action_index
        
        ### to check at each step if the maze become feasable and set isFeasable to True 
        bfs = self.BFS(self.start,self.end) 
        self.path_keys = bfs.keys() if len(bfs)>0 else {}
        if self.end in self.path_keys :
            self.isFeasable = True
            self.dist_SE = len(self.path_keys)
            
        ### otherwhise set isFeasable to false
        if not self.end in self.path_keys :
            self.isFeasable = False 
        
        
    ## agent move to bottom cell if it's not a edge  
    def _Down(self):
        if self.maze_map[self.ix,self.iy]['S'] == True :
            self.ix = self.ix-1  
            
            
            
    def _Up(self):
        if self.maze_map[self.ix,self.iy]['N'] == True :
            self.ix = self.ix+1  
            
            
            
    def _Left(self):
        if self.maze_map[self.ix,self.iy]['W'] == True :
            self.iy = self.iy-1  
            
            
            
    def _Right(self):
        if self.maze_map[self.ix,self.iy]['E'] == True :
            self.iy = self.iy+1 
    
    def _Add_End(self):
        ### if there is already a key point do nothing :
        if self.start != (self.ix,self.iy) :
            self.end = (self.ix, self.iy)
    
    def _Add_Start(self):
        if self.end != (self.ix,self.iy) :
            self.start = (self.ix, self.iy)


    ### Open east wall if it's close, close it if it's open                              
    def _Open_East(self):
        '''
        To change the East Wall of the cell
        '''
        ### Open if it's close 
        if self.maze_map[self.ix,self.iy]['E']==0:
            if self.iy+1<self.cols:
                self.maze_map[self.ix,self.iy]['E']=1
                self.maze_map[self.ix,self.iy+1]['W']=1
        ### Close if it's open     
        else :
            if self.iy+1<self.cols:
                self.maze_map[self.ix,self.iy]['E']=0
                self.maze_map[self.ix,self.iy+1]['W']=0
            
    def _Open_West(self):
        if self.maze_map[self.ix,self.iy]['W']==0 :
            if self.iy-1>=0:
                self.maze_map[self.ix,self.iy]['W']=1
                self.maze_map[self.ix,self.iy-1]['E']=1   
        else :
            if self.iy-1>=0:
                self.maze_map[self.ix,self.iy]['W']=0
                self.maze_map[self.ix,self.iy-1]['E']=0
            
            
            
    def _Open_North(self):
        if self.maze_map[self.ix,self.iy]['N']==0:
            if self.ix+1<self.rows:
                self.maze_map[self.ix,self.iy]['N']=1
                self.maze_map[self.ix+1,self.iy]['S']=1
        else :
            if self.ix+1<self.rows:
                self.maze_map[self.ix,self.iy]['N']=0
                self.maze_map[self.ix+1,self.iy]['S']=0
            
            
            
    def _Open_South(self):
        if self.maze_map[self.ix,self.iy]['S']==0:
            if self.ix-1>=0:
                self.maze_map[self.ix,self.iy]['S']=1
                self.maze_map[self.ix-1,self.iy]['N']=1
        else : 
            if self.ix-1>=0:
                self.maze_map[self.ix,self.iy]['S']=0
                self.maze_map[self.ix-1,self.iy]['N']=0
               
                    
    ### to find path between start and end point
    def BFS(self,from_,to_):
        ## Do BFS only there is a start and
        dist_SE = 0
        start = from_
        end = to_ 
        path = {}
        if from_ and to_ :
            frontier = [start]
            visited =[start]
            while len(frontier)>0 :
                currCell = frontier.pop(0) #first in first out
                for d in 'ESNW':
                    if self.maze_map[currCell][d] == True :
                        if d=="E":
                            childCell=(currCell[0],currCell[1]+1)
                        elif d=="S":
                            childCell=(currCell[0]-1,currCell[1])
                        elif d=="N":
                            childCell=(currCell[0]+1,currCell[1])
                        elif d=="W":
                            childCell=(currCell[0],currCell[1]-1) 
                        if childCell in visited:
                            continue
                        frontier.append(childCell)
                        visited.append(childCell)
                        path[childCell]=currCell
                        if childCell == end :
                            return path
        return path

    

In [2]:

import numpy as np
class Agent():
    """
    alpha : learning rate 
    gamma : discount factor 
    eps : exploration/exploitation greedy score
    """
    def __init__(self,name="first_game", alpha=0.2, gamma=0.9, eps=0.8):
        self.name = name
        self.eps= eps
        self.gamma = gamma
        self.alpha = alpha
        self.ix = np.random.randint(4)
        self.iy = np.random.randint(4)
        self.reward = 0
    
    def reset_agent(self):
        self.ix = np.random.randint(4)
        self.iy = np.random.randint(4)
        self.reward = 0
        return(self.ix,self.iy)
    

In [3]:
agent = Agent()
maze = Maze(agent)
for epochs in range(100):
    for step in range(10000):
        ## choose best action with respect to current Q table 
        isFeasable = maze.isFeasable
        current_action_index = maze.take_actions(agent.eps)
        ## current state 
        current_state = maze.state
        current_q_value = maze.Q_hash[current_state][current_action_index]
        reward = maze.give_reward(current_state,current_action_index,isFeasable)
        ### reset the current reward
        ## update state with respect to  the current best action 
        maze.update_states(current_action_index)
        
        ## new best action with respect to new Q table, we don't want to explore here so eps = 0
        new_action_index = maze.take_actions(0)
        new_state = maze.state 
        new_q_value = maze.Q_hash[new_state][new_action_index]
        ##bellman equation 
        temporal_difference = reward + agent.gamma * new_q_value - current_q_value
        
        maze.Q_hash[current_state][current_action_index] = current_q_value + (agent.alpha * temporal_difference)
    maze.reset()
    
    

KeyError: 4947353282883159499

In [23]:
print(maze.__str__())
print(maze.ix,maze.iy)
print(maze.start)
print(maze.end)
print(maze.path_keys)
j=0
for i in range(len(maze.Q_hash.keys())):
    j+=1
    print(maze.Q_hash[list(maze.Q_hash.keys())[i]])
j

--------
|S | | | |
|-+-+-+-+
| | |T | |
|-+-+-+-+
| | | | |
|-+-+-+-+
| | | |E |
|-+-+-+-+
1 1
None
None
dict_keys([(3, 1)])
[1.4592008733211765, 2.1496259304156857, 0.6606035124958931, 1.3827023016632636, 5.529514653082614, 4.984459343821093, 5.064482592546958, 4.937877157666328, 9.011798117206236, 2.882394944569407]
[0.0, 0.19440000000000007, 0.0, 0, 0, 0, 0, 0, 0.7080000000000001, 0.6000000000000001]
[0.0, 0, 0, 0, 0.0, 0, 0, 0, 0, 0]
[0.0, 0, 0.0, 0, 0, 0, 0, 0, 0, 0]
[0.0, 0, 0, 0, 0, 0, 0.0, 0, 0, 0]
[0.0, 0.0, 0, 0.0, 0.0, 0, 0, 0, 0, 0]
[0, 0, 0.0, 0.0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0]
[0.0, 0, 0, 0, 0.0, 0, 0, 0, 0, 0]
[0, 0.0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0.0, 0, 0.0, 0.0, 0, 0.0, 0, 0, 0]
[0.0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0.0, 0, 0, 0, 0, 0, 0]
[0.0, 0, 0, 0, 0, 0, 0, 0.0, 0.6000000000000001, 0]
[0.0, 0, 0.0, 0.0, 0, 0, 0.0, 0.0, 0.0, 0.0]
[0.0, 0.0, 0.0, 0.0, 0, 0.0, 0.0, 0, 0, 0.0]
[-0.36000000000000004, 0, 0, -0.2, 0, -0.2, -0.2, -0.2, 0, 0]
[0,

KeyboardInterrupt: 

In [24]:
len(maze.Q_hash.keys())

52002

In [27]:

for step in range(10000):
        ## choose best action with respect to current Q table 
        isFeasable = maze.isFeasable
        current_action_index = maze.take_actions(agent.eps)
        ## current state 
        current_state = maze.state
        current_q_value = maze.Q_hash[current_state][current_action_index]
        reward = maze.give_reward(current_state,current_action_index,isFeasable)
        ### reset the current reward
        ## update state with respect to  the current best action 
        maze.update_states(current_action_index)
        
        ## new best action with respect to new Q table, we don't want to explore here so eps = 0
        new_action_index = maze.take_actions(0)
        new_state = maze.state 
        new_q_value = maze.Q_hash[new_state][new_action_index]
        ##bellman equation 
        temporal_difference = reward + agent.gamma * new_q_value - current_q_value
        
        maze.Q_hash[current_state][current_action_index] = current_q_value + (agent.alpha * temporal_difference)
        
print(maze.__str__())
len(maze.Q_hash.keys())
print(maze.start)
print(maze.end)
print(maze.path_keys)

--------
|S | | | |
|-+ +-+-+
|   |T | |
|-+ + +-+
| |   | |
|-+ +-+ +
| | | |E |
|-+-+-+-+
(3, 1)
(3, 0)
dict_keys([(2, 1), (2, 2), (1, 1), (1, 2), (0, 1), (1, 0)])


In [None]:
    
            
    # ### give + 1 if the agent add the starting when there is no starting point
    # elif not self.start and self.actions[action_index] =="addStart" :
    #     self.reward += 1
        
    # ### give + 1 if the agent add the starting when there is no starting point
    # elif not self.end and self.actions[action_index] =="addEnd" :
    #     self.reward += 1
    
            
    # elif self.actions[action_index] == "addStart" :
    #     self._Add_Start()
    #     ### a cell cant be a start and end cell 
    #     if self.start == self.end :
    #         self.end = None
            
    # elif self.actions[action_index] == "addEnd" :
    #     self._Add_End()
    #     ### a cell cant be a start and end cell 
    #     if self.start == self.end :
    #         self.start = None

        
    # def _Add_Treasure():
    #     # if not all(self.trasure):
    #     self.trasure = (self.ix, self.ix)
    #     # self.trasure =(random.randint(0,rows),random.randint(0,cols))
    
    # ### Random Initialization of start and end point 
    # self.start = (np.random.randint(self.rows),np.random.randint(self.cols))
    # self.end = (np.random.randint(self.rows),np.random.randint(self.cols))
    # while self.end == self.start :
    #     self.end = (np.random.randint(self.rows),np.random.randint(self.cols))

In [None]:
# # !pip install tkinter 
# # !pip install enum 
# # !pip install collections 
# import random,datetime,csv,os
# from tkinter import *
# from enum import Enum
# from collections import deque

# class COLOR(Enum):
#     '''
#     This class is created to use the Tkinter colors easily.
#     Each COLOR object has two color values.
#     The first two objects (dark and light) are for theme and the two color
#     values represent the Canvas color and the Maze Line color respectively.
#     The rest of the colors are for Agents.
#     The first value is the color of the Agent and the second is the color of
#     its footprint
#     '''
#     dark=('gray11','white')
#     light=('white','black')
#     black=('black','dim gray')
#     red=('red3','tomato')
#     cyan=('cyan4','cyan4')
#     green=('green4','pale green')
#     blue=('DeepSkyBlue4','DeepSkyBlue2')
#     yellow=('yellow2','yellow2')
