In [246]:
class Maze:
    '''
    This is the main class to create maze.
    '''
    def __init__(self,agent,rows=4,cols=4):
        '''
        rows--> No. of rows of the maze
        cols--> No. of columns of the maze
        Need to pass just the two arguments. The rest will be assigned automatically
        maze_map--> Will be set to a Dicationary. Keys will be cells and
                    values will be another dictionary with keys=['E','W','N','S'] for
                    East West North South and values will be 0 or 1. 0 means that 
                    direction(EWNS) is blocked. 1 means that direction is open.
        grid--> A list of all cells
        path--> Shortest path from start(bottom right) to goal(by default top left)
                It will be a dictionary
        _win,_cell_width,_canvas -->    _win and )canvas are for Tkinter window and canvas
                                        _cell_width is cell width calculated automatically
        _agents-->  A list of aganets on the maze
        markedCells-->  Will be used to mark some particular cell during
                        path trace by the agent.
        _
        '''
        self.rows=rows
        self.cols=cols
        self.ix = agent.ix ## pos of agent on rows 
        self.iy = agent.iy ## pos of agent on cols 
        self.eps = agent.eps ## eps determinated in agent class
        
        self.start= None
        self.end=None
        self.reward=0
        self.isFeasable = False ## If there exist a path between start and end point set the false because there is no keypoint at the beggening 
        
        self.maze_map = {}
        ### our matrix representation
        self.grid=[]
        ### path between Start - End point
        self.path=[]
        
        ####
        self._cell_width=50     
        self._win=None 
        self._canvas=None
        self._agents=[]
        self.markCells=[]
        
    def __str__(self):
        """Return a (crude) string representation of the maze."""

        maze_rows = ['-' * self.rows * 2]
        for x in range(self.rows):
            maze_row = ['|']
            for y in range(self.cols):
                if x == 0 and y == 0:
                    maze_row.append('S')
                elif x == 3 and y == 3:
                    maze_row.append('E')
                elif x == 1 and y == 2:
                    maze_row.append('T')
                if not self.maze_map[x,y]['E']:
                    maze_row.append(' |')
                else:
                    maze_row.append('  ')
            maze_rows.append(''.join(maze_row))
            maze_row = ['|']
            for y in range(self.cols):
                if not self.maze_map[x,y]['N']:
                    maze_row.append('-+')
                else:
                    maze_row.append(' +')
            maze_rows.append(''.join(maze_row))
        
        return '\n'.join(maze_rows)
    def write_svg(self, filename):
        """Write an SVG image of the maze to filename."""

        aspect_ratio = self.rows / self.cols
        # Pad the maze all around by this amount.
        padding = 10
        # Height and width of the maze image (excluding padding), in pixels
        height = 500
        width = int(height * aspect_ratio)
        # Scaling factors mapping maze coordinates to image coordinates
        scy, scx = height / self.cols, width / self.rows

        def write_wall(ww_f, ww_x1, ww_y1, ww_x2, ww_y2):
            """Write a single wall to the SVG image file handle f."""

            print('<line x1="{}" y1="{}" x2="{}" y2="{}"/>'
                  .format(ww_x1, ww_y1, ww_x2, ww_y2), file=ww_f)

        # Write the SVG image file for maze
        with open(filename, 'w') as f:
            # SVG preamble and styles.
            print('<?xml version="1.0" encoding="utf-8"?>', file=f)
            print('<svg xmlns="http://www.w3.org/2000/svg"', file=f)
            print('    xmlns:xlink="http://www.w3.org/1999/xlink"', file=f)
            print('    width="{:d}" height="{:d}" viewBox="{} {} {} {}">'
                  .format(width + 2 * padding, height + 2 * padding,
                          -padding, -padding, width + 2 * padding, height + 2 * padding),
                  file=f)
            print('<defs>\n<style type="text/css"><![CDATA[', file=f)
            print('line {', file=f)
            print('    stroke: #000000;\n    stroke-linecap: square;', file=f)
            print('    stroke-width: 5;\n}', file=f)
            print(']]></style>\n</defs>', file=f)
            # Draw the "South" and "East" walls of each cell, if present (these
            # are the "North" and "West" walls of a neighbouring cell in
            # general, of course).
            for x in range(self.rows):
                for y in range(self.cols):
                    if not self.maze_map[y, x]['N']:
                        x1, y1, x2, y2 = x * scx, (y + 1) * scy, (x + 1) * scx, (y + 1) * scy
                        write_wall(f, x1, y1, x2, y2)
                    if not self.maze_map[y, x]['E']:
                        x1, y1, x2, y2 = (x + 1) * scx, y * scy, (x + 1) * scx, (y + 1) * scy
                        write_wall(f, x1, y1, x2, y2)
            # Draw the North and West maze border, which won't have been drawn
            # by the procedure above.
            print('<line x1="0" y1="0" x2="{}" y2="0"/>'.format(width), file=f)
            print('<line x1="0" y1="0" x2="0" y2="{}"/>'.format(height), file=f)
            print('</svg>', file=f)
            
    @property
    def grid(self):
        return self._grid
    
    ### initialize our env with @property decorator
    @grid.setter        
    def grid(self,n):
        self._grid=[]
        for x in range(self.rows):
            for y in range(self.cols):
                self.grid.append((x,y))
                self.maze_map[x,y]={'E':0,'W':0,'N':0,'S':0}
        self.actions = ["openEast","openWest","openNorth","openSouth","goRight","goLeft","goUp",
                "goLeft","addStart","addEnd"]
        
        self.ix = agent.ix ## pos of agent on rows 
        self.iy = agent.iy ## pos of agent on cols 
        self.eps = agent.eps ## eps determinated in agent class
        
        ### set dist between Start and End to None because there is no path at the beggening
        self.dist_SE=None
            
        self.len_actions = len(self.actions)
        ### first initiale state with all the walls closed
        self.state = hash(str(self.maze_map)+str(self.start)+str(self.end)+str((self.ix,self.iy)))
        ### add our first state to our Q_hash
        self.Q_hash = {self.state:[0]*self.len_actions}
        ### add to our visisted_state
        self.visited_state = {self.state:0} 

    
    ### reset the env
    def reset(self):
        for x in range(self.rows):
            for y in range(self.rows):
                self.grid.append((x,y))
                self.maze_map[x,y]={'E':0,'W':0,'N':0,'S':0}
        self.start = None
        self.end = None
        self.ix , self.iy = np.random.randint(4),np.random.randint(4)
        self.reward = 0
        # self.treasure = None 
        self.state = hash(str(self.maze_map)+str(self.start)+str(self.end)+str((self.ix,self.iy)))
        ###Q_hash doesn't reset thus it can be possible that this state was already visited(even if the proba is low at the beggening)
        if not self.state in self.Q_hash.keys():
            self.Q_hash[self.state] = [0]*self.len_actions
        self.visited_state = {self.state:0}
        
    def give_reward(self,state,action_index,prev_isFeasable):
        ## if there is a starting and ending point 
        if all((self.start,self.end)):
            ### give a +1 reward when the agent find a new state 
            if self.state not in self.visited_state.keys():
                self.reward += 10
            ### give a negative or postive rewards depending on the distance between starting and ending point
            if self.dist_SE:
                if self.dist_SE >= 4 and self.dist_SE <=8 :
                    self.reward += 10
                elif self.dist_SE >8 : 
                    self.reward +=20
                elif self.dist_SE < 4 and self.dist_SE >= 2 :
                    self.reward -= 1
                else :
                    self.reward -= 10
            
            ## big penalty if we close the path between starting and ending point 
            if prev_isFeasable == True and self.isFeasable == False :
                self.reward -= 10 
                
            ## big bonus if we open the path between starting and ending point 
            elif prev_isFeasable == False and self.isFeasable == True :
                self.reward += 10
            
        ### give + 1 if the agent add the starting when there is no starting point
        elif not self.start and self.actions[action_index] =="addStart" :
            self.reward += 3
            
        ### give + 1 if the agent add the starting when there is no starting point
        elif not self.end and self.actions[action_index] =="addEnd" :
            self.reward += 3
            
        reward = self.reward    
        self.reward = 0        
        return reward 
            
        
    def take_actions(self,eps):
        ## randomly chose an action with proba eps otherwise take the best action given state : self.state
        if np.random.random() < eps : 
            return np.random.randint(self.len_actions)
        else : 
            return np.argmax(self.Q_hash[self.state])
            
    ### Update state with respect to action_index then get the state from :str(self.maze_map)+str(self.start)+str(self.end), and stock his hash 
    ### self.state hash(str(self.maze_map)+str(self.start)+str(self.end))
    ### if it's a new state we add it on our Q_hash and then we initialize self.Q_hash [self.state] = [0]*number of possible actions 
    ### and we add self.state in our visited_state dictionary 
    def update_states(self,action_index):
        if self.actions[action_index] == "openEast" :
            self._Open_East()
            
        elif self.actions[action_index] == "openWest" :
            self._Open_West()
            
        elif self.actions[action_index] == "openNorth" :
            self._Open_North()
            
        elif self.actions[action_index] == "openSouth" :
            self._Open_South()
            
        elif self.actions[action_index] == "goRight" :
            self._Right()
            
        elif self.actions[action_index] == "goLeft" :
            self._Left()
            
        elif self.actions[action_index] == "goUp" :
            self._Up()
            
        elif self.actions[action_index] == "goDown" :
            self._Down()
            
        elif self.actions[action_index] == "addStart" :
            self._Add_Start()
            
        elif self.actions[action_index] == "addEnd" :
            self._Add_End()
            
        self.state = hash(str(self.maze_map)+str(self.start)+str(self.end)+str((self.ix,self.iy)))
        ### If it's a new state add it on our Q_hash
        if not self.state in self.Q_hash.keys():
            self.Q_hash[self.state] = [0]*self.len_actions
        self.visited_state[self.state] = action_index
        
        ### to check at each step if the maze become feasable and set isFeasable to True 
        self.path = self.BFS(self.start,self.end)
        if self.end in self.path :
            self.isFeasable = True
            self.dist_SE = len(self.path) - 1
            
        ### otherwhise set isFeasable to false
        if not self.end in self.path :
            self.isFeasable = False 
        
        
    ## agent move to bottom cell if it's not a edge  
    def _Down(self):
        if self.maze_map[self.ix,self.iy]['S'] == True :
            self.ix = self.ix-1  
            
            
            
    def _Up(self):
        if self.maze_map[self.ix,self.iy]['N'] == True :
            self.ix = self.ix+1  
            
            
            
    def _Left(self):
        if self.maze_map[self.ix,self.iy]['W'] == True :
            self.iy = self.iy-1  
            
            
            
    def _Right(self):
        if self.maze_map[self.ix,self.iy]['E'] == True :
            self.iy = self.iy+1 
    
    def _Add_End(self):
        ### if there is already a key point do nothing 
        if self.start != (self.ix,self.iy) :
            self.end = (self.ix, self.iy)
    
    def _Add_Start(self):
        if self.end != (self.ix,self.iy) :
            self.start = (self.ix, self.iy)


    ### Open east wall if it's close, close it if it's open                              
    def _Open_East(self):
        '''
        To change the East Wall of the cell
        '''
        ### Open if it's close 
        # if self.maze_map[self.ix,self.iy]['E']==0:
        if self.iy+1<self.cols:
            self.maze_map[self.ix,self.iy]['E']=1
            self.maze_map[self.ix,self.iy+1]['W']=1
        ### Close if it's open     
        # else :
        #     if self.iy+1<self.cols:
        #         self.maze_map[self.ix,self.iy]['E']=0
        #         self.maze_map[self.ix ,self.iy+1]['W']=0
            
    def _Open_West(self):
        # if self.maze_map[self.ix,self.iy]['W']==0 :
        if self.iy-1>=0:
            self.maze_map[self.ix,self.iy]['W']=1
            self.maze_map[self.ix,self.iy-1]['E']=1   
        # else :
        #     if self.iy-1>=0:
        #         self.maze_map[self.ix,self.iy]['W']=0
        #         self.maze_map[self.ix,self.iy-1]['E']=0
            
            
            
    def _Open_North(self):
        # if self.maze_map[self.ix,self.iy]['N']==0:
        if self.ix+1<self.rows:
            self.maze_map[self.ix,self.iy]['N']=1
            self.maze_map[self.ix+1,self.iy]['S']=1
        # else :
        #     if self.ix+1<self.rows:
        #         self.maze_map[self.ix,self.iy]['N']=0
        #         self.maze_map[self.ix+1,self.iy]['S']=0
            
            
            
    def _Open_South(self):
        # if self.maze_map[self.ix,self.iy]['S']==0:
        if self.ix-1>=0:
            self.maze_map[self.ix,self.iy]['S']=1
            self.maze_map[self.ix-1,self.iy]['N']=1
        # else : 
        #     if self.ix-1>=0:
        #         self.maze_map[self.ix,self.iy]['S']=0
        #         self.maze_map[self.ix-1,self.iy]['N']=0
               
                    
    ### to find path between start and end point
    def BFS(self,from_,to_):
        ## Do BFS only there is a start and
        dist_SE = 0
        start = from_
        end = to_ 
        path = {}
        if from_ and to_ :
            frontier = [start]
            visited =[start]
            while len(frontier)>0 :
                currCell = frontier.pop(0) #first in first out
                if currCell == end :
                    break
                for d in 'ESNW':
                    if self.maze_map[currCell][d] == True :
                        if d=="E":
                            childCell=(currCell[0],currCell[1]+1)
                        elif d=="S":
                            childCell=(currCell[0]-1,currCell[1])
                        elif d=="N":
                            childCell=(currCell[0]+1,currCell[1])
                        elif d=="W":
                            childCell=(currCell[0],currCell[1]-1) 
                        if childCell in visited:
                            continue
                        frontier.append(childCell)
                        visited.append(childCell)
                        path[childCell]=currCell
        ## keeping only the working path 
        fwdPath = {}
        cell = maze.end
        while cell != maze.start :
            fwdPath[path[cell]] = cell
            cell = path[cell]
        return [maze.end] + list(fwdPath.keys())

           
    def open_try(self):
        for x in range(maze.rows):
            for y in range(maze.cols):
                random = np.random.randint(4)
                self.ix,self.iy = (x,y)
                if random ==0:
                    self._Open_East()
                elif random ==1 :
                    self._Open_West()
                elif random == 2:
                    self._Open_South()
                elif random == 3:
                   self._Open_North() 
                   
    

In [253]:
agent = Agent()
maze = Maze(agent)
maze.open_try()
print(maze.__str__())
maze.write_svg("test.svg")
print(maze.maze_map[maze.ix,maze.iy])

--------
|S |     |
| + +-+ +
| | |T   |
| + +-+ +
|   |   |
|-+-+ + +
| |   |E |
|-+-+-+-+
{'E': 0, 'W': 0, 'N': 0, 'S': 1}


In [255]:
maze.start = (0,1)
maze.end = (3,1) 
len(maze.BFS(maze.start,maze.end))

8

In [27]:

import numpy as np
class Agent():
    """
    alpha : learning rate 
    gamma : discount factor 
    eps : exploration/exploitation greedy score
    """
    def __init__(self,name="first_game", alpha=0.2, gamma=0.9, eps=0.1):
        self.name = name
        self.eps= eps
        self.gamma = gamma
        self.alpha = alpha
        self.ix = np.random.randint(4)
        self.iy = np.random.randint(4)
        self.reward = 0
    
    def reset_agent(self):
        self.ix = np.random.randint(4)
        self.iy = np.random.randint(4)
        self.reward = 0
        return(self.ix,self.iy)
    

In [50]:
agent = Agent()
maze = Maze(agent)
j=0
for epochs in range(1000):
    for step in range(100):
        ## choose best action with respect to current Q table 
        isFeasable = maze.isFeasable
        current_action_index = maze.take_actions(agent.eps)
        ## current state 
        current_state = maze.state
        current_q_value = maze.Q_hash[current_state][current_action_index]
        reward = maze.give_reward(current_state,current_action_index,isFeasable)
        print(maze.actions[current_action_index])
        ## reset the current reward
        ## update state with respect to  the current best action 
        maze.update_states(current_action_index)
        
        ## new best action with respect to new Q table, we don't want to explore here so eps = 0
        new_action_index = maze.take_actions(0)
        new_state = maze.state 
        new_q_value = maze.Q_hash[new_state][new_action_index]
        ##bellman equation 
        temporal_difference = reward + agent.gamma * new_q_value - current_q_value
        
        maze.Q_hash[current_state][current_action_index] = current_q_value + (agent.alpha * temporal_difference)
    maze.reset()
    
   


openEast
openEast
openEast
openEast
openSouth
openEast
openEast
openEast
openEast
openEast
openEast
addEnd
goLeft
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openNorth
openEast
openSouth
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
addEnd
openEast
openEast
openEast
openEast
openNorth
openEast
openEast
openEast
openEast
openNorth
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
goLeft
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openNorth
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openEast
openWest
ope

In [80]:
import pandas as pd 
sum(pd.DataFrame(maze.Q_hash).T[9].values>0)

143

In [51]:
print(maze.__str__())
print(maze.ix,maze.iy)
print(maze.start)
print(maze.end)
# print(maze.path_keys)
# j=0
# for i in range(len(maze.Q_hash.keys())):
#     j+=1
#     print(maze.Q_hash[list(maze.Q_hash.keys())[i]])
# j

--------
|S | | | |
|-+-+-+-+
| | |T | |
|-+-+-+-+
| | | | |
|-+-+-+-+
| | | |E |
|-+-+-+-+
3 1
None
None


In [81]:
16*16

256

In [7]:
print(maze.BFS(maze.start,maze.end))

{}


In [61]:
maze.visited_state.keys()

TypeError: keys() takes no arguments (1 given)

In [99]:
agent = Agent(0,0)

agent.ix = 0
agent.iy = 0
print(agent.ix,agent.iy)


0 0


In [100]:
m = Maze(agent)
print(m.visited_state)

{-4399343940583269523: 0}


In [101]:
m._Open_East()
print(m.__str__())


--------
|S   | | |
|-+-+-+-+
| | |T | |
|-+-+-+-+
| | | | |
|-+-+-+-+
| | | |E |
|-+-+-+-+


In [102]:
m._Add_Start()
print(m.__str__())

--------
|S   | | |
|-+-+-+-+
| | |T | |
|-+-+-+-+
| | | | |
|-+-+-+-+
| | | |E |
|-+-+-+-+


In [103]:
m._Right()
print(m.__str__())

--------
|S   | | |
|-+-+-+-+
| | |T | |
|-+-+-+-+
| | | | |
|-+-+-+-+
| | | |E |
|-+-+-+-+


In [104]:
m._Open_North()
print(m.__str__())

--------
|S   | | |
|-+ +-+-+
| | |T | |
|-+-+-+-+
| | | | |
|-+-+-+-+
| | | |E |
|-+-+-+-+


In [105]:
m._Left()

In [106]:
m._Open_North()
print(m.__str__())

--------
|S   | | |
| + +-+-+
| | |T | |
|-+-+-+-+
| | | | |
|-+-+-+-+
| | | |E |
|-+-+-+-+


In [107]:
m._Open_South()

In [108]:
m._Up()

In [109]:
m._Open_North()
print(m.__str__())

--------
|S   | | |
| + +-+-+
| | |T | |
| +-+-+-+
| | | | |
|-+-+-+-+
| | | |E |
|-+-+-+-+


In [110]:
m._Up()
m.ix
m.iy

0

In [111]:
m._Open_East()
print(m.__str__())

--------
|S   | | |
| + +-+-+
| | |T | |
| +-+-+-+
|   | | |
|-+-+-+-+
| | | |E |
|-+-+-+-+


In [112]:
m.ix
m.iy

0

In [113]:
m._Right()
print(m.ix,m.iy)

2 1


In [114]:
m._Add_End()
print(m.__str__())
m.end

--------
|S   | | |
| + +-+-+
| | |T | |
| +-+-+-+
|   | | |
|-+-+-+-+
| | | |E |
|-+-+-+-+


(2, 1)

In [115]:
print(m.ix,m.iy)

2 1


In [116]:
# m._Open_West()
# print(m.__str__())

In [117]:
m.end

(2, 1)

In [118]:
m.BFS(m.start,m.end).keys()

dict_keys([(0, 1), (1, 0), (1, 1), (2, 0), (2, 1)])

In [119]:
m.end in m.BFS(m.start,m.end).keys()

True

In [34]:
agent = Agent()
m=maze(agent)
# a._Open_East(0,0)
# a._Open_East(1,1)
# a._Open_North(1,1)
# a._Open_North(0,0) 
# # a._Open_North(1,2)
# # a._Open_North(1,3)
# a.start = (0,0)
# a.end = (1,0)
print(m.__str__())
# a.BFS()

--------
|S | | | |
|-+-+-+-+
| | |T | |
|-+-+-+-+
| | | | |
|-+-+-+-+
| | | |E |
|-+-+-+-+


In [516]:
"""There is 17  editable walls here or not here = 2^17 combination * 11 actions *(16 cells, 3 of theme filled, 16p3 =3360)
Q table size in order of 1.e10"""
#### maybe use of deep Q learning

'There is 17  editable walls here or not here = 2^17 combination * 11 actions *(16 cells, 3 of theme filled, 16p3 =3360)\nQ table size in order of 1.e10'

In [513]:
# n_action = 11
# n_cell = 16
# n_wall_combi = 4*3+2
# n_cell_func = 3

Q_table = [[[[[0]*n_action]*n_cell]*n_wall_combi]*n_cell_func]
def take_action(sest, Q, epsilon):
    # Take an action
    if random.uniform(0, 1) < eps:
        action = randint(0, 3)
    else: # Or greedy action
        action = np.argmax(Q[st])
    return(action)

In [2]:
2//3# Q_table[0][0][0][0]

0

In [3]:
a=maze()
a._Open_North(0,0)
a.start = (0,0)
a.end = (1,0)
print(a.__str__())
# a.BFS()

NameError: name 'maze' is not defined

In [None]:
print(a.maze_map)

{(0, 0): {'E': 0, 'W': 0, 'N': 0, 'S': 0}, (0, 1): {'E': 0, 'W': 0, 'N': 0, 'S': 0}, (0, 2): {'E': 0, 'W': 0, 'N': 0, 'S': 0}, (0, 3): {'E': 0, 'W': 0, 'N': 0, 'S': 0}, (1, 0): {'E': 0, 'W': 0, 'N': 1, 'S': 0}, (1, 1): {'E': 0, 'W': 0, 'N': 0, 'S': 0}, (1, 2): {'E': 0, 'W': 0, 'N': 0, 'S': 0}, (1, 3): {'E': 0, 'W': 0, 'N': 0, 'S': 0}, (2, 0): {'E': 0, 'W': 0, 'N': 0, 'S': 1}, (2, 1): {'E': 0, 'W': 0, 'N': 0, 'S': 0}, (2, 2): {'E': 0, 'W': 0, 'N': 0, 'S': 0}, (2, 3): {'E': 0, 'W': 0, 'N': 0, 'S': 0}, (3, 0): {'E': 0, 'W': 0, 'N': 0, 'S': 0}, (3, 1): {'E': 0, 'W': 0, 'N': 0, 'S': 0}, (3, 2): {'E': 0, 'W': 0, 'N': 0, 'S': 0}, (3, 3): {'E': 0, 'W': 0, 'N': 0, 'S': 0}}


In [None]:
a._Open_East(2,2)
a.write_svg('test.svg')
print(a.__str__())


--------
| | | | |
|-+-+-+-+
| | | | |
|-+-+-+-+
| | |   |
|-+-+-+-+
| | | | |
|-+-+-+-+


In [36]:
a._Open_North(0,0)
print(a.__str__())
a.write_svg('test.svg')

TypeError: _Open_North() takes 1 positional argument but 3 were given

In [37]:
print(m.maze_map)

{(0, 0): {'E': 0, 'W': 0, 'N': 0, 'S': 0}, (0, 1): {'E': 0, 'W': 0, 'N': 0, 'S': 0}, (0, 2): {'E': 0, 'W': 0, 'N': 0, 'S': 0}, (0, 3): {'E': 0, 'W': 0, 'N': 0, 'S': 0}, (1, 0): {'E': 0, 'W': 0, 'N': 0, 'S': 0}, (1, 1): {'E': 0, 'W': 0, 'N': 0, 'S': 0}, (1, 2): {'E': 0, 'W': 0, 'N': 0, 'S': 0}, (1, 3): {'E': 0, 'W': 0, 'N': 0, 'S': 0}, (2, 0): {'E': 0, 'W': 0, 'N': 0, 'S': 0}, (2, 1): {'E': 0, 'W': 0, 'N': 0, 'S': 0}, (2, 2): {'E': 0, 'W': 0, 'N': 0, 'S': 0}, (2, 3): {'E': 0, 'W': 0, 'N': 0, 'S': 0}, (3, 0): {'E': 0, 'W': 0, 'N': 0, 'S': 0}, (3, 1): {'E': 0, 'W': 0, 'N': 0, 'S': 0}, (3, 2): {'E': 0, 'W': 0, 'N': 0, 'S': 0}, (3, 3): {'E': 0, 'W': 0, 'N': 0, 'S': 0}}


In [None]:
[[0]*4]*11

[[0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0]]

In [None]:
if any((1,0)):
    print("h")

h


In [None]:
# def is_finished(self):
#         return self.BFS()
    
    # class Agent():
    #     # m = maze()
        
    #     def __init__(self,name="007", alpha=0.2, gamma=0.9, espilon=0.3,Q=Q_):
    #         """ Constructor.
    
    #         Parameters
    #         ----------
    #         name : `str`
    #             Name of computer player.
    #         epsilon : `float`
    #             Greedy rate for exploration-exploitation. 0.3 means 30% of random actions
    #         """
    #         self.name = name 
    #         self.states = []
    #         self.alpha = alpha 
    #         self.epsilon = epsilon 
    #         self.gamma = gamma 
    #         self.Q_ = Q_
    #         # self.maze_ = maze_
    #         # 11  possible actions by cell 
        
    #     def take_action(self,st, Q, epsilon):
    #         # Take an action
    #         if random.uniform(0, 1) < eps:
    #             action = randint(0, 3)
    #         else: # Or greedy action
    #             action = np.argmax(Q[st])
    #         return(action)
    # def BFS(self):
    #     ## Do BFS only there is a start and
    #     if self.start and self.end :
    #         dist_SE = 0
    #         path = {}
    #         start = self.start
    #         end = self.end 
    #         frontier = [start]
    #         visited =[start]
    #         while len(frontier)>0 :
    #             currCell = frontier.pop(0) #first in first out
    #             if currCell == end :
    #                 self.path_len=len(path)
    #                 return True 
    #             for d in 'ESNW':
    #                 print(self.maze_map[currCell])
    #                 print(self.maze_map[currCell][d])
    #                 if self.maze_map[currCell][d] == True :
    #                     if d=="E":
    #                         childCell=(currCell[0],currCell[1]+1)
    #                     elif d=="S":
    #                         childCell=(currCell[0]-1,currCell[1])
    #                     elif d=="N":
    #                         childCell=(currCell[0]+1,currCell[1])
    #                     elif d=="W":
    #                         childCell=(currCell[0],currCell[1]-1) 
    #                     if childCell in visited:
    #                         continue
    #                     print(childCell)
    #                     if childCell == end :
    #                         self.path_len=len(path)
    #                         return True
    #                     frontier.append(childCell)
    #                     visited.append(childCell)
    #                     path[childCell]=currCell
    #         return False 
    #     else :
    #         return False
    


In [None]:
len(None)

TypeError: object of type 'NoneType' has no len()

In [None]:
!pip install gym

Collecting gym
  Downloading gym-0.21.0.tar.gz (1.5 MB)
Collecting cloudpickle>=1.2.0
  Downloading cloudpickle-2.0.0-py3-none-any.whl (25 kB)
Building wheels for collected packages: gym
  Building wheel for gym (setup.py): started
  Building wheel for gym (setup.py): finished with status 'done'
  Created wheel for gym: filename=gym-0.21.0-py3-none-any.whl size=1616824 sha256=e1ab056b0f941f87527f955099b1619264d6c4bb96d8e0a90bc4961ddbcef972
  Stored in directory: c:\users\ahmet\appdata\local\pip\cache\wheels\76\ee\9c\36bfe3e079df99acf5ae57f4e3464ff2771b34447d6d2f2148
Successfully built gym
Installing collected packages: cloudpickle, gym
Successfully installed cloudpickle-2.0.0 gym-0.21.0


In [4]:
import numpy as np

In [6]:
arr1,arr2  = np.array([2,3,1,3,2,4,6,7,9,2,19]),  np.array([2,1,4,3,9,6])

In [25]:
 ~np.isin(arr1,arr2)

array([False, False, False, False, False, False, False,  True, False,
       False,  True])

In [40]:
new_arr = []
for el in arr2:
    for i in range(len(arr1[arr1==el])):
        new_arr.append(el)
new_arr = new_arr +list(arr1[~np.isin(arr1,arr2)])
new_arr 

[2, 2, 2, 1, 4, 3, 3, 9, 6, 7, 19]

In [38]:
new_arr + list(arr1[~np.isin(arr1,arr2)])

[2, 2, 2, 1, 4, 3, 3, 9, 6, 7, 19]

In [42]:
isinstance([3,4],int)

False