# AGENT2: L'agent qui simulait longtemps (en construction)

Ce notebook présente un agent qui simule plusieurs cycle à l'avance.

# L'environnement Small Loop

Ajoutons la méthode `clone()` pour permettre de faire plusieurs simulations

In [165]:
save_dir = "sav"

FORWARD = 0
FEEL_FRONT = 1
FEEL_LEFT = 2    # Not used
FEEL_RIGHT = 3   # Not used
TURN_LEFT = 4  # Turn 180°
TURN_RIGHT = 5   # Not used

In [166]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap, BoundaryNorm
from ipywidgets import Button, HBox,VBox, Output
from IPython.display import display

LEFT = 0
DOWN = 1
RIGHT = 2
UP = 3
EMPTY = 0
WALL = 1
FEELING_EMPTY = 2
FEELING_WALL = 3
BUMPING = 4

colors = ["#b0b0b0", '#b0b0b0', '#ffffff', '#535865', "#F93943"]  # Hidden environment
colors = ["#D6D6D6", '#5C946E', '#FAE2DB', '#535865', "#F93943", 
          "#eeeeee", "#85A48F", '#FAE2DB', '#535865', "#F93943"]  # Simulator
agent_color = "#1976D2"

class SmallLoop():
    def __init__(self, position, direction):
        self.environment_grid = np.array([[WALL, EMPTY, EMPTY, EMPTY, WALL]])
        self.display_grid = np.zeros((2, self.environment_grid.shape[1]), dtype=int)
        self.display_grid[0, :] = self.environment_grid[0, :]
        self.position = np.array(position) 
        self.direction = direction
        self.cmap = ListedColormap(colors)
        # self.norm = BoundaryNorm([-0.5, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5], self.cmap.N)
        self.norm = BoundaryNorm(np.arange(-0.5, len(colors) + 0.5, 1.0), self.cmap.N)
        self.marker_size = 400
        self.marker_map = {LEFT: '<', DOWN: 'v', RIGHT: '>', UP: '^'}
        self.marker_color = agent_color
        self.directions = np.array([
            [0, -1],  # Left
            [1, 0],   # Down
            [0, 1],   # Right
            [-1, 0]   # Up
            ])

    def outcome(self, action):
        """Update the grid. Return the outcome of the action."""
        result = 0
        # x, y = self.position

        if action == FORWARD:  
            target_position = self.position + self.directions[self.direction]
            if self.environment_grid[tuple(target_position)] == EMPTY:
                self.position[:] = target_position
            else:
                result = 1
                self.display_grid[tuple(target_position)] = BUMPING
        
        elif action == TURN_RIGHT:
            self.direction = {LEFT: UP, DOWN: LEFT, RIGHT: DOWN, UP: RIGHT}[self.direction]
        
        elif action == TURN_LEFT:
            # self.direction = {LEFT: DOWN, DOWN: RIGHT, RIGHT: UP, UP: LEFT}[self.direction]
            self.direction = {LEFT: RIGHT, DOWN: UP, RIGHT: LEFT, UP: DOWN}[self.direction]
        
        elif action == FEEL_FRONT:
            feeling_position = self.position + self.directions[self.direction]
            if self.environment_grid[tuple(feeling_position)] == EMPTY:
                self.display_grid[tuple(feeling_position)] = FEELING_EMPTY
            else:
                result = 1
                self.display_grid[tuple(feeling_position)] = FEELING_WALL
        
        elif action == FEEL_LEFT:
            feeling_position = self.position + self.directions[(self.direction + 1) % 4]
            if self.environment_grid[tuple(feeling_position)] == EMPTY:
                self.display_grid[tuple(feeling_position)] = FEELING_EMPTY
            else:
                result = 1
                self.display_grid[tuple(feeling_position)] = FEELING_WALL
        
        elif action == FEEL_RIGHT:
            feeling_position = self.position + self.directions[self.direction - 1]
            if self.environment_grid[tuple(feeling_position)] == EMPTY:
                self.display_grid[tuple(feeling_position)] = FEELING_EMPTY
            else:
                result = 1
                self.display_grid[tuple(feeling_position)] = FEELING_WALL

        # print(f"Line: {self.position[0]}, Column: {self.position[1]}, direction: {self.direction}")
        return result  
    
    def display(self, simulator=None):
        """Display the grid in the notebook"""
        out.clear_output(wait=True)
        with out:
            fig, ax = plt.subplots()
            if simulator is not None:
                plt.scatter(simulator.position[1], 1, s=self.marker_size, marker=self.marker_map[simulator.direction], c="#aaaaaa")
                self.display_grid[1, :] = simulator.display_grid[0, :] + 5
            ax.imshow(self.display_grid, cmap=self.cmap, norm=self.norm)
            plt.scatter(self.position[1], self.position[0], s=self.marker_size, marker=self.marker_map[self.direction], c=self.marker_color)
            ax.text(4.5, 0, f"{step:>3}", fontsize=12, color='White')
            plt.show()
    
    def save(self, step):
        """Save the display as a PNG file"""
        fig, ax = plt.subplots()
        ax.set_xticks([])
        ax.set_yticks([])
        ax.axis('off')
        ax.imshow(self.display_grid, cmap=self.cmap, norm=self.norm)
        plt.scatter(self.position[1], self.position[0], s=self.marker_size, marker=self.marker_map[self.direction], c=self.marker_color)
        ax.text(4.5, 0, f"{step:>4}", fontsize=12, color='White')
        plt.savefig(f"{save_dir}/{step:04}.png", bbox_inches='tight', pad_inches=0, transparent=True)
        plt.close(fig)
    
    def clear(self, clear):
        """Clear the grid display"""
        if clear:
            self.display_grid[0, :] = self.environment_grid

    def clone(self):
        """Return a copy of the environment in its current state"""
        return SmallLoop(self.position, self.direction)
        

# L'agent

In [167]:
class Interaction:
    """An interaction is a tuple (action, outcome) with a valence"""
    def __init__(self, action, outcome, valence):
        self.action = action
        self.outcome = outcome
        self.valence = valence

    def key(self):
        """ The key to find this interaction in the dictinary is the string '<action><outcome>'. """
        return f"{self.action}{self.outcome}"

    def __str__(self):
        """ Print interaction in the form '<action><outcome:<valence>' for debug."""
        return f"{self.action}{self.outcome}:{self.valence}"

    def __eq__(self, other):
        """ Interactions are equal if they have the same key """
        return self.key() == other.key()

L'agent est instancié avec un simularteur interne qui lui est passé en argument.

In [168]:
import pandas as pd

class Agent:
    """Creating our agent"""
    def __init__(self, interactions, simulator):
        """ Initialize the dictionary of interactions"""
        self._interactions = {interaction.key(): interaction for interaction in interactions}
        self._intended_interaction = self._interactions["00"]
        self._actions = [i.action for i in interactions if i.outcome == 0]
        self.action_df = pd.DataFrame({"action": [i.action for i in interactions if i.outcome == 0]}) # , columns=['action', 'outcome', 'valence'])
        self._simulator = simulator

    def select_action(self): 
        """Select the action that yeilds the highest valence"""
        # Roll the actions to try different actions when all outcome are equal
        self.action_df = pd.concat([self.action_df.tail(1), self.action_df.head(len(self.action_df) - 1)], ignore_index=True)
        # self.action_df = pd.concat([self.action_df.iloc[1:], self.action_df.iloc[[0]]], ignore_index=True)
        # Try every action in a clone of the simulator
        self.action_df["outcome"] = self.action_df.apply(lambda row: self._simulator.clone().outcome(row["action"]), axis=1)
        # Record the expected valence for each resulting interaction
        self.action_df["valence"] = self.action_df.apply(lambda row: self._interactions[f"{row["action"]}{row["outcome"]}"].valence, axis=1)
        # Sort by descending valence
        self.action_df = self.action_df.sort_values(by=['valence'], ascending=[False]).reset_index(drop=True)
        # print(self.action_df)
        # Return the action that yeilds the highest valence
        return self.action_df.loc[0, "action"]

    def action(self, _outcome):
        """ Tracing the previous cycle """
        previous_interaction = self._interactions[f"{self._intended_interaction.action}{_outcome}"]
        print(f"Action: {self._intended_interaction.action}, Prediction: {self._intended_interaction.outcome}, Outcome: {_outcome}, " 
              f"Prediction: {self._intended_interaction.outcome == _outcome}, Valence: {previous_interaction.valence})")

        """ Computing the next interaction to try to enact """
        # Select the next action
        intended_action = self.select_action()
        
        # Predict the outcome based on simulation
        intended_outcome = self._simulator.outcome(intended_action)
        # Memorize the intended interaction
        self._intended_interaction = self._interactions[f"{intended_action}{intended_outcome}"]
        return intended_action

# Run the simulation

Nous utilisons un instance de l'environnement lui même comme simulateur passé à l'agent

In [169]:
# Instanciate the small loop environment
e = SmallLoop([0, 1], 0)

# Instanciate the agent 
interactions = [
    Interaction(FORWARD, 0, 5),
    Interaction(FORWARD, 1, -10),
    Interaction(FEEL_FRONT, 0, -1),
    Interaction(FEEL_FRONT, 1, -1),
    Interaction(TURN_LEFT, 0, -3),
    Interaction(TURN_LEFT, 1, -3)
]
simulator = SmallLoop([0, 1], 0)
a = Agent(interactions, simulator)

# Run the interaction loop
step = 0
outcome = 0

# Display
out = Output()
e.display()
display(out)

Output()

In [170]:
print(f"Step {step}")
step += 1
action = a.action(outcome)
e.display()
outcome = e.outcome(action)
a.action_df

Step 0
Action: 0, Prediction: 0, Outcome: 0, Prediction: True, Valence: 5)


Unnamed: 0,action,outcome,valence
0,1,1,-1
1,4,0,-3
2,0,1,-10


L'agent sélectionne toujours l'action `feel` car c'est celle qui produit l'interaction qui a la valence la moins basse (-1).

# Agent2

Implémentons l'Agent2 qui simule plusieurs coups à l'avance.

On passe l'environnement à l'agent pour pouvoir visualiser la simulation.

In [194]:
import itertools

class Agent2(Agent):
    def __init__(self, interactions, simulator):
        super().__init__(interactions, simulator)
        self.simulation = simulator  # Used to display the current state of the simulation
        pairs = list(itertools.product(self._actions, repeat=2))
        self.default_df = pd.DataFrame(pairs, columns=['action1', 'action2'])
        self.default_df['outcome1'] = 0
        self.default_df['step'] = 0
        self.default_df['outcome2'] = 0
        self.default_df['valence'] = 0
        self.action_df = self.default_df.copy()
        self.selection_df = None
        self.simulation_step = 0
        self._s1 = None
        self._valence1 = 0

    def simulate(self): 
        """Compute the next row in action_df. Return True during the simulation and False when all the rows are computed"""
        # Simulate two steps ahead
        a1, step, a2 = self.action_df.loc[self.simulation_step, ['action1', 'step', 'action2']]
        if step == 0:
            self._s1 = self._simulator.clone()
            o1 = self._s1.outcome(a1)
            self.action_df.loc[self.action_df['action1'] == a1, 'outcome1'] = o1
            self.simulation = self._s1
            self.action_df.loc[self.simulation_step, 'step'] = 1
            self._valence1 = self._interactions[f"{a1}{o1}"].valence
            # print(self.action_df)
            return True
        else:
            s2 = self._s1.clone()
            o2 = s2.outcome(a2)
            valence = self._valence1 + self._interactions[f"{a2}{o2}"].valence
            self.action_df.loc[(self.action_df['action1'] == a1) & (self.action_df['action2'] == a2), ['outcome2', 'valence']] = [o2, valence]
            # self.action_df.loc[(self.action_df['action1'] == a1) & (self.action_df['action2'] == a2), 'valence'] = valence
            self.simulation = s2
            self.simulation_step += 1
            # print(self.action_df)
            return self.simulation_step < len(self.action_df)

    def update(self, outcome):
        """ Tracing the previous cycle """
        previous_interaction = self._interactions[f"{self._intended_interaction.action}{outcome}"]
        print(f"Action: {self._intended_interaction.action}, Prediction: {self._intended_interaction.outcome}, Outcome: {outcome}, " 
              f"Prediction: {self._intended_interaction.outcome == outcome}, Valence: {previous_interaction.valence})")
        # Prepare the next simulation
        self.simulation_step = 0
        self.action_df = self.default_df.copy()

    def action(self):
        """ Select the next interaction to try to enact """
        # Sort by descending valence
        self.selection_df = self.action_df.sort_values(by=['valence'], ascending=[False]).reset_index(drop=True)
        # Return the action that yeilds the highest valence
        intended_action, intended_outcome = self.selection_df.loc[0, ['action1', 'outcome1']]
        self._intended_interaction = self._interactions[f"{intended_action}{intended_outcome}"]
        # Update the simulator
        self._simulator.outcome(intended_action)
        self.simulation = self._simulator
        return self._intended_interaction.action


## Testons l'Agent2 en lui passant le Small Loop comme simulateur

Le simulateur est initialisée avec une position différente de celle de l'environnement. 
Le simulateur se synchronise rapidement avec l'environnement quand l'agent est bloqué par un mur. 

La ligne du haut de l'affichage représent l'environnement. La ligne du bas représente la simulation que l'agent fait avant chaque décision.

In [195]:
# Instanciate the small loop environment
e = SmallLoop([0, 1], 0)

# Instanciate the agent 
interactions = [
    Interaction(FORWARD, 0, 5),
    Interaction(FORWARD, 1, -10),
    Interaction(FEEL_FRONT, 0, -1),
    Interaction(FEEL_FRONT, 1, -1),
    Interaction(TURN_LEFT, 0, -3),
    Interaction(TURN_LEFT, 1, -3)
]
simulator = SmallLoop([0, 2], 0)
agent = Agent2(interactions, simulator)

# Run the interaction loop
step = 0
outcome = 0

# Display
out = Output()
e.display(agent.simulation)
display(out)

Output()

In [200]:
step += 1
print(f"Step {step}")
while agent.simulate():
    e.display(agent.simulation)
action = agent.action()
outcome = e.outcome(action)
e.display(agent.simulation)
e.clear(True)
agent.update(outcome)
agent.selection_df

Step 5
Action: 4, Prediction: 0, Outcome: 0, Prediction: True, Valence: -3)


Unnamed: 0,action1,action2,outcome1,step,outcome2,valence
0,4,0,0,1,0,2
1,1,1,1,1,1,-2
2,1,4,1,1,0,-4
3,4,1,0,1,0,-4
4,4,4,0,1,0,-6
5,1,0,1,1,1,-11
6,0,1,1,1,1,-11
7,0,4,1,1,0,-13
8,0,0,1,1,1,-20
