In [None]:
!pip install agentpy

Collecting agentpy
  Downloading agentpy-0.1.5-py3-none-any.whl.metadata (3.3 kB)
Collecting SALib>=1.3.7 (from agentpy)
  Downloading salib-1.5.1-py3-none-any.whl.metadata (11 kB)
Collecting multiprocess (from SALib>=1.3.7->agentpy)
  Downloading multiprocess-0.70.17-py311-none-any.whl.metadata (7.2 kB)
Collecting dill>=0.3.9 (from multiprocess->SALib>=1.3.7->agentpy)
  Downloading dill-0.3.9-py3-none-any.whl.metadata (10 kB)
Downloading agentpy-0.1.5-py3-none-any.whl (53 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.9/53.9 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading salib-1.5.1-py3-none-any.whl (778 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m778.9/778.9 kB[0m [31m23.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading multiprocess-0.70.17-py311-none-any.whl (144 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m144.3/144.3 kB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.9-py3-non

In [None]:
import agentpy as ap
import numpy as np
import random, json
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import seaborn as sns, IPython
from matplotlib import pyplot as plt, cm

#######################################
# CLASS FOR AGENT
#######################################
class MazeAgent(ap.Agent):
    '''
    Initializing agent elements:
    - actions: 4 possible actions
    - env: reference to its environment
    '''
    def setup(self):
        # Actions are linked to a movement in the grid.
        self.actions = {'up': (-1,0), 'down': (1, 0), 'left': (0, -1), 'right': (0, 1)}
        self.env = self.model.env
        self.history = [] # Will keep track of movements to backtrack if necessary
        self.visited = [] # Will keep track of visited coordinates to prioritize new paths

    '''
    Actual action execution. Each step explores an unvisited space by following a heuristic approach.
    If there are no more unvisted spaces, then the agent backtracks to find a new path.
    '''
    def execute(self):
        next_move = (0,0)
        next_move_cost = np.inf
        curr_pos = self.get_position()

        # Check possible movements
        for _, action in self.actions.items():
            new_pos = (curr_pos[0] + action[0], curr_pos[1] + action[1])

            # If it is valid, calculate the distance (manhatan) and set next_move if smaller than current
            if self.env.is_valid(new_pos) and new_pos not in self.visited:
                distance = abs(new_pos[0] - self.p.goal[0]) + abs(new_pos[1] - self.p.goal[1])
                cost = self.env.costs[curr_pos] + distance
                
                if cost < next_move_cost:
                    next_move_cost = cost
                    next_move = new_pos

        # If no next move possible
        if next_move == (0,0):
            if self.history:
                # Backtrack to previous position if available
                next_move = self.history.pop() 
                self.env.move_to(self, next_move)
            else:
                # If no history (stuck at the start), do random exploration
                action = self.choose_action()
                self.env.move_by(self, self.actions[action])
                print(f"Randomly moving to {next_move}")

        # If there is a valid next move, use it
        else:
            self.env.move_to(self, next_move)
            self.history.append(curr_pos)
            
        self.visited.append(curr_pos)
        curr_cost = self.env.costs[curr_pos] + 1
        if (curr_cost < self.env.costs[next_move]):
            self.env.costs[next_move] = curr_cost


    '''
    Get position of agent in environment
    '''
    def get_position(self):
        return self.env.positions[self]

    '''
    Dumb agent chooses a random action
    '''
    def choose_action(self):
        return random.choice(list(self.actions.keys()))



#######################################
# CLASS FOR ENVIRONMENT
#######################################
class Maze(ap.Grid):
    def setup(self):
        # Initialize the maze environment
        self.rewards = self.p.maze[:, :]
        self.maze = self.p.maze
        self.costs = np.full((len(self.p.maze), len(self.p.maze[0])), np.inf)
        self.costs[self.p.init] = 0


    '''
    Reward function. The returned value is used to update Q-values
    '''
    def get_reward(self, state):
        reward = self.rewards[state]
        if self.rewards[state] != 0 and self.rewards[state] != -1:
            self.rewards[state] = 0
        return reward
    
    def is_valid(self, position):
        return (position[0] >= 0 and position[0] < len(self.p.maze) and position[1] >= 0 and position[1] < len(self.p.maze[0]) and (self.maze[position[0]][position[1]] == 0))


#######################################
# CLASS FOR THE SYSTEM (Ag, Env)
#######################################
class MazeModel(ap.Model):
    def setup(self):
        self.env = Maze(self, shape=maze.shape)
        self.agent = MazeAgent(self)
        self.env.add_agents([self.agent], positions=[self.p.init])
        self.agent.reward = 0

    def step(self):
        self.agent.execute()

    def update(self):
        # If agent reaches the goal, simulation stops
        if self.agent.get_position() == self.model.p.goal:
            print('ending, goal reached')
            self.stop()




####################################################
# EXECUTION AND VISUALIZATION OF SPECIFIC INSTANCES
####################################################
def animation_plot(model, ax):
    n, m = model.p.maze.shape
    grid = np.zeros((n, m))
    grid[model.p.maze == -1] = -1
    grid[model.p.goal] = 2

    color_dict = {0:'#ffffff', -1:'#000000', 3:'#0000ff', 2:'#00ff00', 1:'#ffff00'}
    ap.gridplot(grid, ax=ax, color_dict=color_dict, convert=True)
    agent = list(model.env.agents)[0]
    grid[model.env.positions[agent]] = 3
    ap.gridplot(grid, ax=ax, color_dict=color_dict, convert=True)
    ax.set_title("Maze Agent\nWall hits: {}".format(-agent.reward))


# Reading information from numpy file
maze = np.array([[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,-1, -1, -1],
                 [-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, -1],
                 [-1, -1, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, -1, 0, -1],
                 [-1, 0, 0, -1, -1, -1, -1, -1, 0, 0, 0, 0, -1, 0, 0, -1, -1, -1, 0, -1],
                 [-1, 0, 0, 0, 0, -1, -1, -1, 0, -1, -1, -1, -1, 0, 0, 0, -1, 0, 0, -1],
                 [-1, -1, -1, 0, -1, -1, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, -1],
                 [-1, 0, 0, -1, -1, 0, 0, -1, 0, 0, 0, 0, -1, -1, 0, 0, -1, -1, 0, -1],
                 [-1, -1, 0, -1, 0, 0, 0, 0, -1, -1, -1, 0, 0, -1, -1, 0, 0, 0, 0, -1],
                 [-1, 0, -1, -1, 0, 0, 0, 0, 0, -1, 0, 0, -1, -1, 0, 0, 0, 0, 0, -1],
                 [-1, 0, 0, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1],
                 [-1, 0, 0, 0, 0, -1, -1, 0, 0, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, -1],
                 [-1, -1, 0, -1, -1, 0, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, -1, -1],
                 [-1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, -1, -1, 0, -1],
                 [-1, 0, 0, -1, -1, -1, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, -1, 0, -1],
                 [-1, 0, -1, 0, -1, -1, 0, 0, 0, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1],
                 [-1, 0, -1, -1, 0, 0, -1, 0, 0, 0, 0, 0, -1, -1, 0, 0, -1, -1, -1, -1],
                 [-1, 0, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, 0, -1],
                 [-1, 0, -1, 0, -1, 0, -1, 0, -1, -1, 0, 0, 0, 0, 0, 0, -1, -1, 0, -1],
                 [-1, 0, 0, 0, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1],
                 [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,-1, -1, -1]])
#maze = -np.load('maze_example.npy')
n = len(maze)

parameters = {
    'maze': maze,
    'init': (n - 2, n - 3),
    'goal': (8, 10),
    'steps': 100
}
#18,17

fig = plt.figure(figsize=(7,7))
ax = fig.add_subplot(111)
mazeModel = MazeModel(parameters)
animation = ap.animate(mazeModel, fig, ax, animation_plot)
IPython.display.HTML(animation.to_jshtml())

ending, goal reached
