# AGENT2: L'agent qui simulait longtemps (en construction)

Ce notebook présente un agent qui simule plusieurs cycle à l'avance.

# L'environnement Small Loop

Ajoutons la méthode `clone()` pour permettre de faire plusieurs simulations

In [105]:
save_dir = "sav"

FORWARD = 0
FEEL_FRONT = 1
FEEL_LEFT = 2    # Not used
FEEL_RIGHT = 3   # Not used
TURN_LEFT = 4  # Turn 180°
TURN_RIGHT = 5   # Not used

In [1102]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap, BoundaryNorm
from ipywidgets import Button, HBox,VBox, Output
from IPython.display import display

LEFT = 0
DOWN = 1
RIGHT = 2
UP = 3
EMPTY = 2
WALL = 3
FEELING_EMPTY = 4  # 2
FEELING_WALL = 5  # 3
BUMPING = 6  # 4

colors = ["#ffffff", "#dddddd", "#b0b0b0", '#b0b0b0', '#ffffff', '#535865', "#F93943"]  # Hidden environment
colors = ["#eeeeee", "#85A48F", "#D6D6D6", '#5C946E', '#FAE2DB', '#535865', "#F93943"]
agent_color = "#1976D2"

class SmallLoop():
    def __init__(self, position, direction):
        self.environment_grid = np.array([[WALL, EMPTY, EMPTY, EMPTY, WALL]])
        self.display_grid = np.zeros((2, self.environment_grid.shape[1]), dtype=int)
        self.display_grid[0, :] = self.environment_grid[0, :]
        self.position = np.array(position) 
        self.direction = direction
        self.cmap = ListedColormap(colors)
        self.norm = BoundaryNorm([-0.5, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5], self.cmap.N)
        self.marker_size = 400
        self.marker_map = {LEFT: '<', DOWN: 'v', RIGHT: '>', UP: '^'}
        self.marker_color = agent_color
        self.directions = np.array([
            [0, -1],  # Left
            [1, 0],   # Down
            [0, 1],   # Right
            [-1, 0]   # Up
            ])

    def outcome(self, action):
        """Update the grid. Return the outcome of the action."""
        result = 0
        # x, y = self.position

        if action == FORWARD:  
            target_position = self.position + self.directions[self.direction]
            if self.environment_grid[tuple(target_position)] == EMPTY:
                self.position[:] = target_position
            else:
                result = 1
                self.display_grid[tuple(target_position)] = BUMPING
        
        elif action == TURN_RIGHT:
            self.direction = {LEFT: UP, DOWN: LEFT, RIGHT: DOWN, UP: RIGHT}[self.direction]
        
        elif action == TURN_LEFT:
            # self.direction = {LEFT: DOWN, DOWN: RIGHT, RIGHT: UP, UP: LEFT}[self.direction]
            self.direction = {LEFT: RIGHT, DOWN: UP, RIGHT: LEFT, UP: DOWN}[self.direction]
        
        elif action == FEEL_FRONT:
            feeling_position = self.position + self.directions[self.direction]
            if self.environment_grid[tuple(feeling_position)] == EMPTY:
                self.display_grid[tuple(feeling_position)] = FEELING_EMPTY
            else:
                result = 1
                self.display_grid[tuple(feeling_position)] = FEELING_WALL
        
        elif action == FEEL_LEFT:
            feeling_position = self.position + self.directions[(self.direction + 1) % 4]
            if self.environment_grid[tuple(feeling_position)] == EMPTY:
                self.display_grid[tuple(feeling_position)] = FEELING_EMPTY
            else:
                result = 1
                self.display_grid[tuple(feeling_position)] = FEELING_WALL
        
        elif action == FEEL_RIGHT:
            feeling_position = self.position + self.directions[self.direction - 1]
            if self.environment_grid[tuple(feeling_position)] == EMPTY:
                self.display_grid[tuple(feeling_position)] = FEELING_EMPTY
            else:
                result = 1
                self.display_grid[tuple(feeling_position)] = FEELING_WALL

        # print(f"Line: {self.position[0]}, Column: {self.position[1]}, direction: {self.direction}")
        return result  
    
    def display(self, simulator=None):
        """Display the grid in the notebook"""
        out.clear_output(wait=True)
        with out:
            fig, ax = plt.subplots()
            if simulator is not None:
                plt.scatter(simulator.position[1], 1, s=self.marker_size, marker=self.marker_map[simulator.direction], c="#aaaaaa")
                self.display_grid[1, :] = simulator.environment_grid[0, :] - 2
            ax.imshow(self.display_grid, cmap=self.cmap, norm=self.norm)
            plt.scatter(self.position[1], self.position[0], s=self.marker_size, marker=self.marker_map[self.direction], c=self.marker_color)
            ax.text(4.5, 0, f"{step:>3}", fontsize=12, color='White')
            plt.show()
    
    def save(self, step):
        """Save the display as a PNG file"""
        fig, ax = plt.subplots()
        ax.set_xticks([])
        ax.set_yticks([])
        ax.axis('off')
        ax.imshow(self.display_grid, cmap=self.cmap, norm=self.norm)
        plt.scatter(self.position[1], self.position[0], s=self.marker_size, marker=self.marker_map[self.direction], c=self.marker_color)
        ax.text(4.5, 0, f"{step:>4}", fontsize=12, color='White')
        plt.savefig(f"{save_dir}/{step:04}.png", bbox_inches='tight', pad_inches=0, transparent=True)
        plt.close(fig)
    
    def clear(self, clear):
        """Clear the grid display"""
        if clear:
            self.display_grid[0, :] = self.environment_grid

    def clone(self):
        """Return a copy of the environment in its current state"""
        return SmallLoop(self.position, self.direction)
        

# L'agent

In [1103]:
class Interaction:
    """An interaction is a tuple (action, outcome) with a valence"""
    def __init__(self, action, outcome, valence):
        self.action = action
        self.outcome = outcome
        self.valence = valence

    def key(self):
        """ The key to find this interaction in the dictinary is the string '<action><outcome>'. """
        return f"{self.action}{self.outcome}"

    def __str__(self):
        """ Print interaction in the form '<action><outcome:<valence>' for debug."""
        return f"{self.action}{self.outcome}:{self.valence}"

    def __eq__(self, other):
        """ Interactions are equal if they have the same key """
        return self.key() == other.key()

L'agent est instancié avec un simularteur interne qui lui est passé en argument.

In [1104]:
import pandas as pd

class Agent:
    """Creating our agent"""
    def __init__(self, _interactions, simulator):
        """ Initialize the dictionary of interactions"""
        self._interactions = {interaction.key(): interaction for interaction in _interactions}
        self._intended_interaction = self._interactions["00"]
        self._actions = [i.action for i in _interactions if i.outcome == OUTCOME_EMPTY]
        self.action_df = pd.DataFrame({"action": [i.action for i in _interactions if i.outcome == OUTCOME_EMPTY]}) # , columns=['action', 'outcome', 'valence'])
        self.simulator = simulator

    def select_action(self): 
        """Select the action that yeilds the highest valence"""
        # Roll the actions to try different actions when all outcome are equal
        self.action_df = pd.concat([self.action_df.tail(1), self.action_df.head(len(self.action_df) - 1)], ignore_index=True)
        # self.action_df = pd.concat([self.action_df.iloc[1:], self.action_df.iloc[[0]]], ignore_index=True)
        # Try every action in a clone of the simulator
        self.action_df["outcome"] = self.action_df.apply(lambda row: self.simulator.clone().outcome(row["action"]), axis=1)
        # Record the expected valence for each resulting interaction
        self.action_df["valence"] = self.action_df.apply(lambda row: self._interactions[f"{row["action"]}{row["outcome"]}"].valence, axis=1)
        # Sort by descending valence
        self.action_df = self.action_df.sort_values(by=['valence'], ascending=[False]).reset_index(drop=True)
        # print(self.action_df)
        # Return the action that yeilds the highest valence
        return self.action_df.loc[0, "action"]

    def action(self, _outcome):
        """ Tracing the previous cycle """
        previous_interaction = self._interactions[f"{self._intended_interaction.action}{_outcome}"]
        print(f"Action: {self._intended_interaction.action}, Prediction: {self._intended_interaction.outcome}, Outcome: {_outcome}, " 
              f"Prediction: {self._intended_interaction.outcome == _outcome}, Valence: {previous_interaction.valence})")

        """ Computing the next interaction to try to enact """
        # Select the next action
        intended_action = self.select_action()
        
        # Predict the outcome based on simulation
        intended_outcome = self.simulator.outcome(intended_action)
        # Memorize the intended interaction
        self._intended_interaction = self._interactions[f"{intended_action}{intended_outcome}"]
        return intended_action

# Run the simulation

Nous utilisons un instance de l'environnement lui même comme simulateur passé à l'agent

In [1105]:
# Instanciate the small loop environment
e = SmallLoop([0, 1], 0)

# Instanciate the agent 
interactions = [
    Interaction(FORWARD, 0, 5),
    Interaction(FORWARD, 1, -10),
    Interaction(FEEL_FRONT, 0, -1),
    Interaction(FEEL_FRONT, 1, -1),
    Interaction(TURN_LEFT, 0, -3),
    Interaction(TURN_LEFT, 1, -3)
]
simulator = SmallLoop([0, 1], 0)
a = Agent(interactions, simulator)

# Run the interaction loop
step = 0
outcome = 0

# Display
out = Output()
e.display()
display(out)

Output()

In [1106]:
print(f"Step {step}")
step += 1
action = a.action(outcome)
e.display()
outcome = e.outcome(action)
a.action_df

Step 0
Action: 0, Prediction: 0, Outcome: 0, Prediction: True, Valence: 5)


Unnamed: 0,action,outcome,valence
0,1,1,-1
1,4,0,-3
2,0,1,-10


L'agent sélectionne toujours l'action `feel` car c'est celle qui produit l'interaction qui a la valence la moins basse (-1).

# Agent2

Implémentons l'Agent2 qui simule plusieurs coups à l'avance 

In [1107]:
class Agent2(Agent):
    def select_action(self): 
        """Select the action that yeilds the highest valence"""
        self.action_df = pd.DataFrame(columns=['action', 'outcome', 'a2', 'o2', 'valence'])
        # Simulate two steps ahead
        for a1 in self._actions:
            s1 = self.simulator.clone()
            o1 = s1.outcome(a1)
            for a2 in self._actions:
                s2 = s1.clone()
                o2 = s2.outcome(a2)
                v = self._interactions[f"{a1}{o1}"].valence + self._interactions[f"{a2}{o2}"].valence
                new_df = pd.DataFrame({"action": [a1], "outcome": [o1], "a2": [a2], "o2": [o2], "valence": [v]})
                self.action_df = pd.concat([self.action_df, new_df], ignore_index=True)

        # Sort by descending valence
        self.action_df = self.action_df.sort_values(by=['valence'], ascending=[False]).reset_index(drop=True)
        # Return the action that yeilds the highest valence
        return self.action_df.loc[0, "action"]


## Testons l'Agent2 en lui passant le Small Loop comme simulateur

Le simulateur est initialisée avec une position différente de celle de l'environnement. 
Le simulateur se synchronise rapidement avec l'environnement quand l'agent est bloqué par un mur. 

In [1284]:
# Instanciate the small loop environment
e = SmallLoop([0, 1], 0)

# Instanciate the agent 
interactions = [
    Interaction(FORWARD, 0, 5),
    Interaction(FORWARD, 1, -10),
    Interaction(FEEL_FRONT, 0, -1),
    Interaction(FEEL_FRONT, 1, -1),
    Interaction(TURN_LEFT, 0, -3),
    Interaction(TURN_LEFT, 1, -3)
]
simulator = SmallLoop([0, 2], 0)
a = Agent2(interactions, simulator)

# Run the interaction loop
step = 0
outcome = 0

# Display
out = Output()
e.display(a.simulator)
display(out)

Output()

In [1286]:
print(f"Step {step}")
step += 1
action = a.action(outcome)
e.display(a.simulator)
e.clear(True)
outcome = e.outcome(action)
a.action_df

Step 1
Action: 0, Prediction: 0, Outcome: 1, Prediction: False, Valence: -10)


Unnamed: 0,action,outcome,a2,o2,valence
0,4,0,0,0,2
1,1,1,1,1,-2
2,1,1,4,0,-4
3,4,0,1,0,-4
4,4,0,4,0,-6
5,0,1,1,1,-11
6,1,1,0,1,-11
7,0,1,4,0,-13
8,0,1,0,1,-20
