# Graph Search Agent

The graph search agent requires an environment to define the following

1. Start State
2. Goal State
3. Possible Actions

We have to make a generalised agent, which reaches the goal state using the functions of the environment. Our agent will use BFS/DFS to reach to the goal state

In [1]:
import numpy as np

## Node
The Node class greates the graph node. It has the following values
1. Parent Node
2. State 
3. Cost

It makes use of the following built in functions: 
1. \_\_hash\_\_ : This provides the hash value for every node, which is required for the hashset
2. \_\_eq\_\_ : To check if 2 nodes are equal (Operator overload)
3. \_\_ne\_\_ : To check if 2 nodes are not equal (Operator overload)
4. \_\_str\_\_ : To get string representation of state in node

In [2]:
class Node:
    def __init__(self, parent, state, cost):
        
        self.parent = parent
        self.state = state
        self.cost = cost
    
    def __hash__(self):
        
        return hash(''.join(self.state.flatten()))
    
    def __str__(self):
        return str(self.state)
    
    def __eq__(self, other):
        
        return hash(''.join(self.state.flatten())) == hash(''.join(other.state.flatten())) 
    
    def __ne__(self, other):
        return hash(''.join(self.state.flatten())) != hash(''.join(other.state.flatten()))

## PriorityQueue
The Priority Queue is used to store the nodes along with the cost, and pop the node having the least cost for BFS

It makes use of the following functions: 
1. push : Add node to queue
2. pop : Pop node having least cost
3. is_empty : To check if queue is empty
4. \_\_len\_\_ : To get length of queue
5. \_\_str\_\_ : To get string representation of queue

In [3]:
class PriorityQueue():
    
    def __init__(self):
        self.queue = []
        
    def push(self, node):
        self.queue.append(node)
    
    def pop(self):
        
        next_state = None
        state_cost = 10**18
        index = -1
        
        for i in range(len(self.queue)):
            
            if self.queue[i].cost<state_cost:
                state_cost = self.queue[i].cost
                index = i
        
        return self.queue.pop(index)
    
    def is_empty(self):
        
        return len(self.queue)==0
    
    def __str__(self):
        l = []
        for i in self.queue:
            l.append(i.state)
        
        return str(l)
    
    def __len__(self):
        return len(self.queue)
            

## Environment

The environment is what the agent plays in. It has the following entities:
1. actions : The actions defined in the environment
2. start_state : The starting state of the environment
3. goal_state : The goal state of the environment

It has the following functions: 
1. get_start_state : returns the start state
2. reached_goal : returns goal_state
3. get_next_states : Given current state, it returns all possible next states

In [10]:
class Environment():
    
    def __init__(self, depth = None, goal_state = None):
        self.actions = [1,2,3,4] #1 - Up, 2 - Down, 3 - Right, 4 - Left
        self.goal_state = goal_state
        self.depth = depth
        self.start_state = self.generate_start_state()
    
    def generate_start_state(self):
        
        past_state = goal_state
        i=0
        while i!= self.depth:
            new_states = self.get_next_states(past_state)
            choice = np.random.randint(low=0, high=len(new_states))
            
            if np.array_equal(new_states[choice], past_state):
                continue
            
            past_state = new_states[choice]
            i+=1
            
        return past_state
    
    def get_start_state(self):
        return self.start_state
    
    def get_goal_state(self):
        return self.goal_state
    
    def get_next_states(self, state):
        
        space = (0,0)
        for i in range(3):
            for j in range(3):
                if state[i,j] == '_':
                    space = (i,j)
                    break
        
        new_states = []
        
        if space[0] > 0:# Move Up
            new_state = np.copy(state)
            
            val = new_state[space[0], space[1]]
            new_state[space[0], space[1]]  = new_state[space[0]-1, space[1]]
            new_state[space[0]-1, space[1]] = val
            
            new_states.append(new_state)
            
        if space[0] < 2: #Move down
            new_state = np.copy(state)
            
            val = new_state[space[0], space[1]]
            new_state[space[0], space[1]]  = new_state[space[0]+1, space[1]]
            new_state[space[0]+1, space[1]] = val
            
            new_states.append(new_state)
        
        if space[1]<2: #Move right
            new_state = np.copy(state)
            
            val = new_state[space[0], space[1]]
            new_state[space[0], space[1]] = new_state[space[0], space[1]+1]
            new_state[space[0], space[1]+1] = val
            
            new_states.append(new_state)
            
        if space[1] > 0: #Move Left
            new_state = np.copy(state)
            
            val = new_state[space[0], space[1]]
            new_state[space[0], space[1]] = new_state[space[0], space[1]-1]
            new_state[space[0], space[1]-1] = val
            
            new_states.append(new_state)
        
        return new_states
    
    def reached_goal(self, state):
        
        for i in range(3):
            for j in range(3):
                if state[i,j] != self.goal_state[i,j]:
                    return False
        
        return True

In [67]:
class Agent:
    
    def __init__(self, start_state, goal_state, env, heuristic):
        self.frontier = PriorityQueue()
        self.explored = dict()
        self.start_state = start_state
        self.goal_state = goal_state
        self.env = env
        self.goal_node = None
        self.heuristic = heuristic
    
    def run(self):
        init_node = Node(parent = None, state = self.start_state, cost = 0)
        self.frontier.push(init_node)
        while not self.frontier.is_empty():

            curr_node = self.frontier.pop()
            #print(curr_node.cost)
            next_states = self.env.get_next_states(curr_node.state)

            if hash(curr_node) in self.explored:
                continue

            self.explored[hash(curr_node)] = curr_node

            if self.env.reached_goal(curr_node.state):
                print("Reached goal!")
                self.goal_node = curr_node
                break
            goal_state = self.env.get_goal_state()

            l = []
            for state in next_states:

                hcost = self.heuristic(state, goal_state)
                node = Node(parent=curr_node, state=state, cost=curr_node.cost+1+hcost)
                self.frontier.push(node)
    
    def print_nodes(self):
        
        node = self.goal_node
        l = []
        while node is not None:
            l.append(node)
            node = node.parent

        step = 1
        for node in l[::-1]:
            print("Step: ",step)
            print(node)
            step+=1

        
    

Since the environment cannot spawn its own start and end states, we input the start state and end state to the environment

In [111]:
depth = 100
goal_state = np.array([[1,2,3], [8,'_',4], [7,6,5]])
env = Environment(depth, goal_state)

In [112]:
print("Start State: ")
print(env.get_start_state())
print("Goal State: ")
print(goal_state)

Start State: 
[['6' '4' '3']
 ['1' '2' '7']
 ['_' '5' '8']]
Goal State: 
[['1' '2' '3']
 ['8' '_' '4']
 ['7' '6' '5']]


In [113]:
def heuristic0(curr_state, goal_state):
    return 0

In [114]:
def heuristic1(curr_state, goal_state):
    
    count = 0
    for i in range(3):
        for j in range(3):
            if curr_state[i, j]!=goal_state[i,j]:
                count+=1
    
    return count

In [115]:
def heuristic2(curr_state, goal_state):
    
    dist = 0

    for i in range(3):
        for j in range(3):
            ele = curr_state[i, j]
            goal_i, goal_j = np.where(goal_state==ele)
            d = abs(goal_i[0] - i) + abs(goal_j[0] - j)
            dist += d
    
    return dist

In [118]:
agent = Agent(env.get_start_state(), env.get_goal_state(), env, heuristic1)

In [119]:
agent.run()

Reached goal!
