# Graph Search Agent

The graph search agent requires an environment to define the following

1. Start State
2. Goal State
3. Possible Actions

We have to make a generalised agent, which reaches the goal state using the functions of the environment. Our agent will use BFS/DFS to reach to the goal state

In [3]:
import numpy as np

In [7]:
x = np.random.rand(3,4)
x.reshape(1,-1)

array([[0.13835066, 0.8978911 , 0.33437627, 0.79286952, 0.54526932,
        0.48194842, 0.94222112, 0.04775815, 0.51047733, 0.18858194,
        0.92827307, 0.27955491]])

In [212]:
class Node:
    def __init__(self, parent, state, cost):
        
        self.parent = parent
        self.state = state
        self.cost = cost
    
    def __hash__(self):
        
        return hash(''.join(self.state.flatten()))
    
    def __str__(self):
        return str(self.state)
    
    def __eq__(self, other):
        
        return hash(''.join(self.state.flatten())) == hash(''.join(other.state.flatten())) 
    
    def __ne__(self, other):
        return hash(''.join(self.state.flatten())) != hash(''.join(other.state.flatten()))
        
    
class PriorityQueue():
    
    def __init__(self):
        self.queue = []
        
    def push(self, node):
        self.queue.append(node)
    
    def pop(self):
        
        next_state = None
        state_cost = 10**18
        index = -1
        
        for i in range(len(self.queue)):
            
            if self.queue[i].cost<state_cost:
                state_cost = self.queue[i].cost
                index = i
        
        return self.queue.pop(index)
    
    def is_empty(self):
        
        return len(self.queue)==0
    
    def __str__(self):
        l = []
        for i in self.queue:
            l.append(i.state)
        
        return str(l)
    
    def __len__(self):
        return len(self.queue)
            

In [213]:
class Environment():
    
    def __init__(self, start_state = None, goal_state = None):
        self.actions = [1,2,3,4] #1 - Up, 2 - Down, 3 - Right, 4 - Left
        self.start_state = start_state
        self.goal_state = goal_state
    
    def get_start_state(self):
        return self.start_state
    
    def get_next_states(self, state):
        
        space = (0,0)
        for i in range(3):
            for j in range(3):
                if state[i,j] == '_':
                    space = (i,j)
                    break
        
        new_states = []
        
        if space[0] > 0:# Move Up
            new_state = np.copy(state)
            
            val = new_state[space[0], space[1]]
            new_state[space[0], space[1]]  = new_state[space[0]-1, space[1]]
            new_state[space[0]-1, space[1]] = val
            
            new_states.append(new_state)
            
        if space[0] < 2: #Move down
            new_state = np.copy(state)
            
            val = new_state[space[0], space[1]]
            new_state[space[0], space[1]]  = new_state[space[0]+1, space[1]]
            new_state[space[0]+1, space[1]] = val
            
            new_states.append(new_state)
        
        if space[1]<2: #Move right
            new_state = np.copy(state)
            
            val = new_state[space[0], space[1]]
            new_state[space[0], space[1]] = new_state[space[0], space[1]+1]
            new_state[space[0], space[1]+1] = val
            
            new_states.append(new_state)
            
        if space[1] > 0: #Move Left
            new_state = np.copy(state)
            
            val = new_state[space[0], space[1]]
            new_state[space[0], space[1]] = new_state[space[0], space[1]-1]
            new_state[space[0], space[1]-1] = val
            
            new_states.append(new_state)
        
        return new_states
    
    def reached_goal(self, state):
        
        for i in range(3):
            for j in range(3):
                if state[i,j] != self.goal_state[i,j]:
                    return False
        
        return True

In [256]:
start_state = np.array([[2,3,4], [1,8,5], [7,'_',6]])
goal_state = np.array([[1,2,3], [8,'_',4], [7,6,5]])
env = Environment(start_state, goal_state)

In [257]:
print(start_state)
print(goal_state)

[['2' '3' '4']
 ['1' '8' '5']
 ['7' '_' '6']]
[['1' '2' '3']
 ['8' '_' '4']
 ['7' '6' '5']]


In [258]:
explored = dict()
frontier = PriorityQueue()

In [259]:
init_state = env.get_start_state()
init_node = Node(parent = None, state = init_state, cost = 0)
frontier.push(init_node)

In [260]:
goal_node = None
while not frontier.is_empty():
    
    curr_node = frontier.pop()
    next_states = env.get_next_states(curr_node.state)
    
    if hash(curr_node) in explored:
        continue
        
    explored[hash(curr_node)] = curr_node
    
    if env.reached_goal(curr_node.state):
        goal_node = curr_node
        break
    
    for state in next_states:
        node = Node(parent=curr_node, state=state, cost=curr_node.cost+1)
        frontier.push(node)
        
    
    

In [261]:
node = goal_node
l = []
while node is not None:
    l.append(node)
    node = node.parent

step = 1
for node in l[::-1]:
    print("Step: ",step)
    print(node)
    step+=1
    

Step:  1
[['2' '3' '4']
 ['1' '8' '5']
 ['7' '_' '6']]
Step:  2
[['2' '3' '4']
 ['1' '8' '5']
 ['7' '6' '_']]
Step:  3
[['2' '3' '4']
 ['1' '8' '_']
 ['7' '6' '5']]
Step:  4
[['2' '3' '_']
 ['1' '8' '4']
 ['7' '6' '5']]
Step:  5
[['2' '_' '3']
 ['1' '8' '4']
 ['7' '6' '5']]
Step:  6
[['_' '2' '3']
 ['1' '8' '4']
 ['7' '6' '5']]
Step:  7
[['1' '2' '3']
 ['_' '8' '4']
 ['7' '6' '5']]
Step:  8
[['1' '2' '3']
 ['8' '_' '4']
 ['7' '6' '5']]
