Next cell runs a simple reflex agent program in a partially observable environment. Output depicts all the steps in a 50 iteration run with 3 items of each kind placed randomly in the environment 

In [67]:
#Import the libraries used for constructing this agent and environment
from agents import *
from random import *
import numpy as np

#Create the classes of the objects placed in the enviroments
class Treasure1(Thing):
    pass

class Treasure2(Thing):
    pass

class DisposTool(Thing):
    pass

class ReuseTool(Thing):
    pass

class Wall(Thing):
    pass



#Create a partially observable class for the environment
class PartialIsland(Environment):
    def __init__(self, width=7, height=7): #Default shape of the environment is square 7x7
        super(PartialIsland, self).__init__()

        self.width = width
        self.height = height
    
    def percept(self, agent): #returns a list of objects that the given agent can sense
        self.percepts = []
        
        locations = self.getPartialLocations(agent.location) #coordinates used to fetch the objects perceivable by the agent

        for locus in locations:
            things = self.list_things_at(locus)
            for thing in things:
                self.percepts.append(thing)
        return self.percepts
    
    def getPartialLocations(self, agent_locus): #returns a list of coordinates of the cells adjacent to the current agent's position
        here = [agent_locus[0], agent_locus[1]]
        up = [agent_locus[0] - 1, agent_locus[1]]
        upright = [agent_locus[0] - 1, agent_locus[1] + 1]
        right = [agent_locus[0], agent_locus[1] + 1]
        downright = [agent_locus[0] + 1, agent_locus[1] + 1]
        down = [agent_locus[0] + 1, agent_locus[1]]
        downleft = [agent_locus[0] + 1, agent_locus[1] - 1]
        left = [agent_locus[0], agent_locus[1] - 1]
        upleft = [agent_locus[0] - 1, agent_locus[1] - 1]
        
        locations = [here, up, upright, right, downright, down, downleft, left, upleft]
        return locations
    
            
    def execute_action(self, agent, action): #this function is called when running the environment after the agent's program returns an action
        '''changes the state of the environment based on what the agent does.'''
        
        if action == 'moveRandom':
            direction = randint(1, 4)
            print("SELECTED ACTION: Random")
            if direction == 1:
                action = 'moveRight'
            elif direction == 2:
                action = 'moveLeft'
            elif direction == 3:
                action = 'moveUp'
            elif direction == 4:
                action = 'moveDown'
                '''This module handles the case when the agent percieved only walls or empty spaces in the cells adjacent to it. The agent in this
                case decides to prepare a random movement action'''

        if action == 'moveRight':
            if agent.location[1] < 6:
                walls = self.list_things_at([agent.location[0], agent.location[1]+1], tclass=Wall)
                if len(walls) == 0:
                    agent.moveRight()
                    print("SELECTED ACTION: Right")
                else:
                    agent.NoOp()
            else:
                print("SELECTED ACTION: canceled, tried to move out of bounds")
                agent.performance -= 5
                '''This module handles the case when the agent decided to move right. First a check is performed to see if the agent decided to move out
                of bounds, in this case the agent's location does not change in the environment and 5 points are deducted from the performance for the
                agent's intent. If the move was in-bounds then it is checked wether or not the destination cell contains a wall; if this is the case the
                agent's decision to move is changed to standing still, otherwise the agent moves and its location is updated in the env successfully'''
                        
        elif action == 'moveLeft':
            if agent.location[1] > 1:
                walls = self.list_things_at([agent.location[0], agent.location[1]-1], tclass=Wall)
                if len(walls) == 0:
                    agent.moveLeft()
                    print("SELECTED ACTION: Left")
                else:
                    agent.NoOp()
            else:
                print("SELECTED ACTION: canceled, tried to move out of bounds")
                agent.performance -= 5
                '''This module handles the case when the agent decided to move left. It follows the same logic as the previous module'''
        
        elif action == 'moveUp':
            if agent.location[0] > 1:
                walls = self.list_things_at([agent.location[0] - 1, agent.location[1]], tclass=Wall)
                if len(walls) == 0:
                    agent.moveUp()
                    print("SELECTED ACTION: Up")
                else:
                    agent.NoOp()
            else:
                print("SELECTED ACTION: canceled, tried to move out of bounds")
                agent.performance -= 5
                '''This module handles the case when the agent decided to move up. It follows the same logic as the previous module'''
                
        elif action == 'moveDown':
            if agent.location[0] < 6:
                walls = self.list_things_at([agent.location[0] + 1, agent.location[1]], tclass=Wall)
                if len(walls) == 0:
                    agent.moveDown()
                    print("SELECTED ACTION: Down")
                else:
                    agent.NoOp()
            else:
                print("SELECTED ACTION: canceled, tried to move out of bounds")
                agent.performance -= 5
                '''This module handles the case when the agent decided to move down. It follows the same logic as the previous module'''
                
        elif action == "Greuse":
            items = self.list_things_at(agent.location, tclass=ReuseTool)
            if len(items) != 0:
                if agent.greuse(items[0]):
                    self.delete_thing(items[0])
                    self.matrix[items[0].location[0]][items[0].location[1]] = '-'
                    '''This module handles the case when the agent decides to grab a reusable tool. First a confirmation that a reusable tool object is
                    indeed in the current agent's location is performed. Upon success in the check, the agent picks up the tool, then it is deleted from
                    the env (internally from the array of things in the env) and finally the graphic representation of the env is modified accordingly'''
        
        elif action == "Gdispos":
            agent.gdispos()
            items = self.list_things_at(agent.location, tclass=DisposTool)
            if len(items) != 0:
                if agent.gdispos(items[0]):
                    self.delete_thing(items[0])
                    self.matrix[items[0].location[0]][items[0].location[1]] = '-'
                    '''This module handles the case when the agent decides to grab a disposable tool. It follows the same logic as the previous module'''
        
        elif action == "GTreasure1":
            items = self.list_things_at(agent.location, tclass=Treasure1)
            if len(items) != 0:
                if agent.gTreasure1(items[0]): 
                    self.delete_thing(items[0])
                    self.matrix[items[0].location[0]][items[0].location[1]] = '-'
                    '''This module handles the case when the agent decides to grab a type 1 treasure. It follows the same logic as the previous module'''
        
        elif action == "GTreasure2":
            items = self.list_things_at(agent.location, tclass=Treasure2)
            if len(items) != 0:
                if agent.gTreasure2(items[0]):
                    self.delete_thing(items[0])
                    self.matrix[items[0].location[0]][items[0].location[1]] = '-'
                    '''This module handles the case when the agent decides to grab a type 2 treasure. It follows the same logic as the previous module'''
        
        elif action == "NoOp":
            pass
        
        #Report the modified environment along with agent status
        print("NEW AGENT'S PERFORMANCE: " + str(agent.performance))
        print("NEW ENVIRONMENT STATE")
        print("Agent location: " + str(agent.location))
        print("Agent tools: " + str(agent.holding))
        print('\n'.join([''.join(['{:3}'.format(item) for item in row]) 
              for row in self.matrix]))
    
    def run(self, steps=50): #AIMA function overriden to create a graphical representation of the environment and report starting status
        "Run the Environment for given number of time steps."
        print("SIMPLE REFLEX AGENT in PARTIALLY OBSERVABLE ENVIRONMENT")
        print("<STARTING>")
        print("Agent location: " + str(self.things[0].location))
        print("Agent tools: " + str(self.things[0].holding))
        
        self.matrix = templateEnv(size = 6) #create an empty matrix for representing the environment graphically
        self.matrix = fillEnv(self.matrix, self.things) #fill the matrix with objects in the environment
        print("Agent performance: " + str(self.things[0].performance))
        
        for step in range(steps):
            if self.is_done():
                print("\nFINAL AGENT's PERFORMANCE: " + str(self.things[0].performance)) #print the performance after last step of the run
                return
            print("\n<STEP" + str(step + 1) + ">") #print step number starting from 1
            self.step()
        
        print("\nFINAL AGENT's PERFORMANCE: " + str(self.things[0].performance)) #print the performance after last step of the run
        
    def step(self): #AIMA function overriden to access agent status in each step of a run and print the performance
        """Run the environment for one time step. If the
        actions and exogenous changes are independent, this method will
        do.  If there are interactions between them, you'll need to
        override this method."""
        if not self.is_done():
            actions = []
            for agent in self.agents:
                if agent.alive:
                    actions.append(agent.program(agent, self.percept(agent)))
                else:
                    actions.append("")
                
            #Print a graphical matrix representation of what the agent was percieving along with its status at that moment
            print("PERCEPT")
            print("Agent location: " + str(agent.location))
            print("Agent tools: " + str(agent.holding))
            percept_repr = templateEnv(size = 6)
            percept_repr = getUnknowns(percept_repr, agent.location)
            fillEnv(percept_repr, self.percepts)
                
            for (agent, action) in zip(self.agents, actions):
                self.execute_action(agent, action)
            self.exogenous_change()
            
    def is_done(self): #AIMA function overriden to terminate the run if there are no objects of interest left in the environment
        no_edibles = not any(isinstance(thing, Treasure1) or isinstance(thing, DisposTool) or isinstance(thing, ReuseTool) or isinstance(thing, Treasure2) for thing in self.things)
        dead_agents = not any(agent.is_alive() for agent in self.agents)
        return dead_agents or no_edibles
    
    
#Create a simple reflex agent for use in an island environment
class ReflexHunter(Agent):
    def __init__(self, program=None): #default state of the agent
        self.alive = True
        self.bump = False
        self.holding = []
        self.performance = 50
        if program is None:
            def program(percept):
                return eval(input('Percept={}; action? ' .format(percept)))
        assert isinstance(program, collections.Callable)
        self.program = program
        
    #The following 4 methods are actions that the agent executes when it wants to move. Which move of the four is called depends on the calculations
    #made in the reflexProgram() function and on wether or not the action is valid in the current state of the environment as seen in execute_action()
    #in the environment's methods. Performance is reduced by one point in all cases
    def moveRight(self):
        self.performance -= 1
        self.location[1] += 1

    def moveLeft(self):
        self.performance -= 1
        self.location[1] -= 1

    def moveUp(self):
        self.performance -= 1
        self.location[0] -= 1

    def moveDown(self):
        self.performance -= 1
        self.location[0] += 1

    #The following 4 functions are actions that the agent executes when it wants to grab something. They operate in the a similar fashion as the
    #movement methods except that the agent's inventory is also manipulated
    def greuse(self, thing):
        self.performance -= 1
        if isinstance(thing, ReuseTool):
            print("SELECTED ACTION: Greuse")
            self.holding.append('H')
            return True
        return False

    def gdispos(self, thing = None):
        self.performance -= 1
        if isinstance(thing, DisposTool):
            print("SELECTED ACTION: Gdispos")
            self.holding.append('h')
            return True
        return False

    def gTreasure1(self, thing):
        if isinstance(thing, Treasure1):
            print("SELECTED ACTION: GTreas1")
            self.performance += 20
            if 'H' in self.holding == False:
                self.holding.remove('h')
            return True
        return False

    def gTreasure2(self, thing):
        if isinstance(thing, Treasure2):
            print("SELECTED ACTION: GTreas2")
            self.performance += 40
            self.holding.remove('h')
            return True
        return False
    
    #This action is performed when there is a wall at a cell where the agent wants to move to. Mantains the agent's location for the current step
    def NoOp(self):
        print("SELECTED ACTION: NoOp saw a wall")
        return False
    
    
def reflexProgram(agent, percepts):
    '''returns a string dictating the action to be performed by the agent and ultimately what needs to be modified in environment'''
    actionTaken = False
    
    for p in percepts:
        if actionTaken:
            break
            
        in_location = agent.location == p.location #used to check the agent's current location for a grabable object
        
        if isinstance(p, Treasure1) and ('h' in agent.holding or 'H' in agent.holding):
            if in_location:
                actionTaken = True
                return 'GTreasure1'
            else:
                moveTo = getDirection(agent.location, p.location)
                actionTaken = True
                return moveTo
                '''This module handles when the agent is able to sense a type 1 treasure. First check if the agent has a any kind of tool to grab the
                treasure: if not then let this iteration of the loop pass because there is no way to obtain the treasure right now (a new percept is
                checked in the next iteration), if yes check if the treasure is at the same place as the agent, if not move towards it by calling the
                getDirection() function, if yes output the grab action'''
        
        elif isinstance(p, Treasure2) and 'h' in agent.holding:
            if in_location:
                actionTaken = True
                return 'GTreasure2'
            else:
                moveTo = getDirection(agent.location, p.location)
                actionTaken = True
                return moveTo
                '''This module handles when the agent is able to sense a type 2 treasure. It works in the same fashion as the last module with the
                difference being that only disposable tools on inventory are considered in the first condition check and not any kind of tool as in
                the previous module'''
            
        elif isinstance(p, DisposTool):
            if in_location:
                actionTaken = True
                return 'Gdispos'
            else:
                moveTo = getDirection(agent.location, p.location)
                actionTaken = True
                return moveTo
                '''This module handles when the agent is able to sense a disposable tool. I simply checks if the tool is in the current location of the
                agent to pick it up or otherwise move towards it calling the getDirection() function'''
            
        elif isinstance(p, ReuseTool):
            if in_location:
                actionTaken = True
                return 'Greuse'
            else:
                moveTo = getDirection(agent.location, p.location)
                actionTaken = True
                return moveTo
                '''This module handles when the agent is able to sense a reusable tool. Works the same as the last module'''
    
    if not actionTaken:
        return 'moveRandom'
    
def getDirection(origin, goal):
    '''Decides an action that will move the agent one cell closer (in manhatan distance) to an object it wants to pick up'''
    if origin[1] < goal[1]:
        return 'moveRight'
    elif origin[1] > goal[1]:
        return 'moveLeft'
    elif origin [0] > goal[0]:
        return 'moveUp'
    else:
        return 'moveDown'
    
def templateEnv(size):
    '''Creates an empty matrix to represent an enviroment or agent's percept'''
    matrix = np.array([['-' for i in range (0, size + 2)] for j in range (0, size + 2)])
    
    #Make the matrix pretty with outer rol and col aesthetics
    for i in range(0, size + 2):
        for j in range(0, size + 2):
            if (i == 0 and j == 0) or (i == size + 1 and j == size + 1):
                matrix[i][j] = '\\'

            elif (i == 0 and j == size + 1) or (i == size + 1 and j == 0):
                matrix[i][j] = '/'

            elif i == 0 and (j != 0 or j!= size + 1):
                matrix[i][j] = str(j)
            
            elif i == size + 1 and (j != 0 or j!= size + 1):
                matrix[i][j] = str(j)

            elif (i != 0 or i != size + 1) and j == 0:
                matrix[i][j] = str(i)
            
            elif (i != 0 or i != size + 1) and j == size + 1:
                matrix[i][j] = str(i)
    return matrix

def fillEnv(matrix, things):
    '''fills an empty matrix with objects in the environment at their proper locations'''
    
    for thing in things:
        if isinstance(thing, Treasure1):
            matrix[thing.location[0]][thing.location[1]] = 'T'
        elif isinstance(thing, Treasure2):
            matrix[thing.location[0]][thing.location[1]] = 't'
        elif isinstance(thing, DisposTool):
            matrix[thing.location[0]][thing.location[1]] = 'h'
        elif isinstance(thing, ReuseTool):
            matrix[thing.location[0]][thing.location[1]] = 'H'
        elif isinstance(thing, Wall):
            matrix[thing.location[0]][thing.location[1]] = 'X'
        
    print('\n'.join([''.join(['{:3}'.format(item) for item in row]) 
           for row in matrix]))
    return matrix
    
def getUnknowns(matrix, reference):
    '''modifies the graphical representation of the agent's percept to reflect the cells it can't sense (for partial env)'''
    for i in range(1, len(matrix) - 1):
            for j in range(1, len(matrix) - 1):
                
                if [i, j] != reference:
                    if [i+1, j] != reference and [i-1, j] != reference:
                        if [i, j+1] != reference and [i, j-1] != reference:
                            if [i+1, j+1] != reference and [i-1, j-1] != reference:
                                if [i+1, j-1] != reference and [i-1, j+1] != reference:
                                    matrix[i][j] = '?'
    return matrix


def main():
    '''Run the simple reflex hunter in the partial treasure island environment'''
    partial_island = PartialIsland()
    mike = ReflexHunter(reflexProgram)

    #Create 2 objects of each kind to place on the environment
    treasure1A = Treasure1()
    treasure1B = Treasure1()
    treasure1C = Treasure1()
    treasure2A = Treasure2()
    treasure2B = Treasure2()
    treasure2C = Treasure2()
    dispos1 = DisposTool()
    dispos2 = DisposTool()
    dispos3 = DisposTool()
    reusable1 = ReuseTool()
    reusable2 = ReuseTool()
    reusable3 = ReuseTool()
    wall = Wall()
    wall2 = Wall()
    wall3 = Wall()

    partial_island.add_thing(mike, [1,1])
    mike.performance = 50 #prevents AIMA from resetting the performance to 0 when adding the agent to the environment
    
    #add things preventing placing more than one item in a single cell in the environment
    xy = [[1, 1],[1, 2],[1, 3],[1, 4],[1, 5],[1, 6],
          [2, 1],[2, 2],[2, 3],[2, 4],[2, 5],[2, 6],
          [3, 1],[3, 2],[3, 3],[3, 4],[3, 5],[3, 6],
          [4, 1],[4, 2],[4, 3],[4, 4],[4, 5],[4, 6],
          [5, 1],[5, 2],[5, 3],[5, 4],[5, 5],[5, 6],
          [6, 1],[6, 2],[6, 3],[6, 4],[6, 5],[6, 6]]

    partial_island.add_thing(treasure1A, xy.pop(randint(0, len(xy) - 1)))
    partial_island.add_thing(treasure1B, xy.pop(randint(0, len(xy) - 1)))
    partial_island.add_thing(treasure1C, xy.pop(randint(0, len(xy) - 1)))
    partial_island.add_thing(reusable1, xy.pop(randint(0, len(xy) - 1)))
    partial_island.add_thing(reusable2, xy.pop(randint(0, len(xy) - 1)))
    partial_island.add_thing(reusable3, xy.pop(randint(0, len(xy) - 1)))
    partial_island.add_thing(treasure2A, xy.pop(randint(0, len(xy) - 1)))
    partial_island.add_thing(treasure2B, xy.pop(randint(0, len(xy) - 1)))
    partial_island.add_thing(treasure2C, xy.pop(randint(0, len(xy) - 1)))
    partial_island.add_thing(dispos1, xy.pop(randint(0, len(xy) - 1)))
    partial_island.add_thing(dispos2, xy.pop(randint(0, len(xy) - 1)))
    partial_island.add_thing(dispos3, xy.pop(randint(0, len(xy) - 1)))
    partial_island.add_thing(wall, xy.pop(randint(0, len(xy)) - 1))
    partial_island.add_thing(wall2, xy.pop(randint(0, len(xy) - 1)))
    partial_island.add_thing(wall3, xy.pop(randint(0, len(xy) - 1)))

    partial_island.run(50) #run the program for 50 iterations
    return mike.performance #for studying several runs
                                          
main()

SIMPLE REFLEX AGENT in PARTIALLY OBSERVABLE ENVIRONMENT
<STARTING>
Agent location: [1, 1]
Agent tools: []
\  1  2  3  4  5  6  /  
1  t  -  -  T  -  t  1  
2  H  h  -  -  X  h  2  
3  -  H  T  T  -  -  3  
4  -  -  -  X  -  -  4  
5  t  h  -  -  -  -  5  
6  -  -  H  -  X  -  6  
/  1  2  3  4  5  6  \  
Agent performance: 50

<STEP1>
PERCEPT
Agent location: [1, 1]
Agent tools: []
\  1  2  3  4  5  6  /  
1  t  -  ?  ?  ?  ?  1  
2  H  h  ?  ?  ?  ?  2  
3  ?  ?  ?  ?  ?  ?  3  
4  ?  ?  ?  ?  ?  ?  4  
5  ?  ?  ?  ?  ?  ?  5  
6  ?  ?  ?  ?  ?  ?  6  
/  1  2  3  4  5  6  \  
SELECTED ACTION: Right
NEW AGENT'S PERFORMANCE: 49
NEW ENVIRONMENT STATE
Agent location: [1, 2]
Agent tools: []
\  1  2  3  4  5  6  /  
1  t  -  -  T  -  t  1  
2  H  h  -  -  X  h  2  
3  -  H  T  T  -  -  3  
4  -  -  -  X  -  -  4  
5  t  h  -  -  -  -  5  
6  -  -  H  -  X  -  6  
/  1  2  3  4  5  6  \  

<STEP2>
PERCEPT
Agent location: [1, 2]
Agent tools: []
\  1  2  3  4  5  6  /  
1  t  -  -  ?  ?  ?  1 

/  1  2  3  4  5  6  \  
SELECTED ACTION: Random
SELECTED ACTION: Up
NEW AGENT'S PERFORMANCE: 119
NEW ENVIRONMENT STATE
Agent location: [1, 1]
Agent tools: ['H', 'H', 'H']
\  1  2  3  4  5  6  /  
1  -  -  -  -  -  t  1  
2  -  -  -  -  X  h  2  
3  -  -  -  -  -  -  3  
4  -  -  -  X  -  -  4  
5  -  -  -  -  -  -  5  
6  -  -  -  -  X  -  6  
/  1  2  3  4  5  6  \  

FINAL AGENT's PERFORMANCE: 119


119

Next cell executes 100 runs of 50 iterations each of the previous agent and environment. The average agent performance of the runs is computed

In [68]:
%%capture
total_performance = 0
for i in range (1, 100):
    run_result = main();
    total_performance += run_result
average_performance = total_performance / 100

Next cell prints the average agent performance from the last experiment

In [69]:
average_performance

77.43