[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/PetiteIA/schema_mechanism/blob/master/notebooks/agent6.ipynb)

# THE AGENT WHO FOLLOWED ANCIENT CLUES

# Learning objectives

Upon completing this lab, you will be able to implement a developmental agent that learns sequences of interactions.

## Define the necessary classes

Let's improve the Interaction class a little bit.

In [652]:
class Interaction:
    """An interaction is a tuple (action, outcome) with a valence"""
    def __init__(self, action, outcome, valence):
        self._action = action
        self._outcome = outcome
        self._valence = valence

    def get_action(self):
        """Return the action"""
        return self._action

    def get_outcome(self):
        """Return the action"""
        return self._outcome

    def get_valence(self):
        """Return the action"""
        return self._valence

    def key(self):
        """ The key to find this interaction in the dictinary is the string '<action><outcome>'. """
        return f"{self._action}{self._outcome}"

    def pre_key(self):
        return self.key()

    def __str__(self):
        """ Print interaction in the form '<action><outcome:<valence>' for debug."""
        return f"{self._action}{self._outcome}:{self._valence}"

    def __eq__(self, other):
        """ Interactions are equal if they have the same key """
        if isinstance(other, self.__class__):
            return self.key() == other.key()
        else:
            return False            

In [653]:
class CompositeInteraction:
    """A composite interaction is a tuple (pre_interaction, post_interaction) and a weight"""
    def __init__(self, pre_interaction, post_interaction):
        self.pre_interaction = pre_interaction
        self.post_interaction = post_interaction
        self.weight = 1
        self.isActivated = False

    def get_action(self):
        """Return the action of the post interaction"""
        return self.post_interaction.get_action()
    
    def get_valence(self):
        """Return the valence of the pre_interaction plus the valence of the post_interaction"""
        return self.pre_interaction.get_valence() + self.post_interaction.get_valence()

    def reinforce(self):
        """Increment the composite interaction's weight"""
        self.weight += 1

    def key(self):
        """ The key to find this interaction in the dictionary is the string '<pre_interaction><post_interaction>'. """
        return f"({self.pre_interaction.key()},{self.post_interaction.key()})"

    def pre_key(self):
        """Return the key of the pre_interaction"""
        return self.pre_interaction.pre_key()

    def __str__(self):
        """ Print the interaction in the Newick tree format (pre_interaction, post_interaction: valence) """
        return f"({self.pre_interaction}, {self.post_interaction}: {self.weight})"

    def __eq__(self, other):
        """ Interactions are equal if they have the same pre and post interactions """
        if isinstance(other, self.__class__):
            return (self.pre_interaction == other.pre_interaction) and (self.post_interaction == other.post_interaction)
        else:
            return False

## Define the Agent class

We will use a Pandas DataFrame to compute the selection of the next intended interaction and to predict its most likely outcome.

In [None]:
!pip install pandas

Let's implement a base Agent that has the functionnalities of Agent5 implemented using pandas.

In [667]:
import pandas as pd

class Agent:
    def __init__(self, _interactions):
        """ Initialize our agent """
        self._interactions = {interaction.key(): interaction for interaction in _interactions}
        self._composite_interactions = {}
        self._intended_interaction = self._interactions["00"]
        self._last_interaction = None
        self._previous_interaction = None
        self._penultimate_interaction = None
        self._last_composite_interaction = None
        self._previous_composite_interaction = None
        # Create a dataframe of default primitive interactions 
        default_interactions = [interaction for interaction in _interactions if interaction.get_outcome() == 0]
        data = {'post_action': [i.get_action() for i in default_interactions],
                'weight': [0] * len(default_interactions),
                'proclivity': [0] * len(default_interactions),
                'post_interaction': [i.key() for i in default_interactions]}
        self.primitive_df = pd.DataFrame(data)
        # Store the selection dataframe as a class attribute so we can display it in the notebook
        self.selection_df = None

    def action(self, _outcome):
        """Implement the agent's policy"""
        # tracing the previous cycle
        self._previous_composite_interaction = self._last_composite_interaction
        self._penultimate_interaction = self._previous_interaction
        self._previous_interaction = self._last_interaction
        self._last_interaction = self._interactions[f"{self._intended_interaction.get_action()}{_outcome}"]
        print(f"Action: {self._intended_interaction.get_action()}, Prediction: {self._intended_interaction.get_outcome()}, "
              f"Outcome: {_outcome}, Prediction_correct: {self._intended_interaction.get_outcome() == _outcome}, "
              f"Valence: {self._last_interaction.get_valence()}")

        # Call the learning mechanism
        self.learn()
        
        # Create a dataframe from the activated composite interactions 
        activated_keys = [composite_interaction.key() for composite_interaction in self._composite_interactions.values() 
                          if composite_interaction.pre_interaction == self._last_interaction or 
                          composite_interaction.pre_interaction == self._last_composite_interaction]
        data = {'composite': activated_keys,
                'weight': [self._composite_interactions[k].weight for k in activated_keys],
                'post_valence': [self._composite_interactions[k].post_interaction.get_valence() for k in activated_keys],
                'post_action': [self._composite_interactions[k].post_interaction.get_action() for k in activated_keys],
                'post_interaction': [self._composite_interactions[k].post_interaction.pre_key() for k in activated_keys]
                }
        activated_df = pd.DataFrame(data)

        # Create the selection dataframe from the primitive and the activated dataframes
        df = pd.concat([self.primitive_df, activated_df], ignore_index=True)

        # Compute the proclivity for each action
        df['proclivity'] = df['weight'] * df['post_valence']
        grouped_df = df.groupby('post_action').agg({'proclivity': 'sum'}).reset_index()
        df = df.merge(grouped_df, on='post_action', suffixes=('', '_sum'))

        # Find the most probable outcome for each action
        max_weight_df = df.loc[df.groupby('post_action')['weight'].idxmax(), ['post_action', 'post_interaction']].reset_index(drop=True)
        max_weight_df.columns = ['post_action', 'intended']
        df = df.merge(max_weight_df, on='post_action')

        # Find the first row that has the highest proclivity
        max_index = df['proclivity_sum'].idxmax()
        intended_interaction_key = df.loc[max_index, ['intended']].values[0]
        self._intended_interaction = self._interactions[intended_interaction_key]
        print("Intended", self._intended_interaction)

        # Store the selection dataframe for printing
        self.selection_df = df.copy()
        
        return self._intended_interaction.get_action()

    def learn(self):
        """Recording previous composite interaction"""
        if self._previous_interaction is not None:
            composite_interaction = CompositeInteraction(self._previous_interaction, self._last_interaction)
            if composite_interaction.key() not in self._composite_interactions:
                self._composite_interactions[composite_interaction.key()] = composite_interaction
                print(f"Learning {composite_interaction}")
                self._last_composite_interaction = composite_interaction
            else:
                self._composite_interactions[composite_interaction.key()].reinforce()
                print(f"Reinforcing {self._composite_interactions[composite_interaction.key()]}")
                self._last_composite_interaction = self._composite_interactions[composite_interaction.key()]

# PRELIMINARY EXERCISE

Let's test this agent in Environment4

In [724]:
class Environment4:
    """ Environment4 """
    def __init__(self):
        """ Initializing Environment4 """
        self.step = 0

    def outcome(self, _action):
        """Take the action and generate the next outcome """
        self.step += 1
        # Behave like environment1 during the first 10 steps
        if self.step < 10:
            if _action == 0:
                return 0
            else:
                return 1            
        # Behave like Environment2 after the first 10 steps
        else: 
            if _action == 0:
                return 1
            else:
                return 0          

Initialize the simulation

In [725]:
# Instanciate a new agent
interactions = [
    Interaction(0,0,-1),
    Interaction(0,1,1),
    Interaction(1,0,-1),
    Interaction(1,1,1),
    Interaction(2,0,-1),
    Interaction(2,1,1)
]
a = Agent(interactions)
e = Environment4()

# Run the interaction loop
step = 0
outcome = 0

Run the simulation step by step to see the Selection DataFrame. Use `Ctrl+Enter` to run the cell bellow and stay on it.

In [744]:
print(f"Step {step}")
step += 1
action = a.action(outcome)
outcome = e.outcome(action)
a.selection_df[['composite', 'weight', 'post_valence', 'post_action', 'proclivity', 'proclivity_sum', 'intended']]

Step 18
Action: 0, Prediction: 1, Outcome: 1, Prediction_correct: True, Valence: 1
Reinforcing (01:1, 01:1: 7)
Intended 01:1


Unnamed: 0,composite,weight,post_valence,post_action,proclivity,proclivity_sum,intended
0,,0,,0,,7.0,1
1,,0,,1,,0.0,10
2,,0,,2,,0.0,20
3,"(01,01)",7,1.0,0,7.0,7.0,1


Observe the Selection DataFrame above as you run the agent step by step. 
Each activated composite interaction proposes its post_action with proclity equals to the composite interaction's weight multiplied by the post interactions' valence. 

The proclivities are summed for each action. The action that has the highest `proclivity_sum` is selected.

* During the first 10 steps, the composite interaction (11,11) is progressively reinforced as the agent learns that, in the context of intreaction 11, it can again enact interation 11.
* After Step 10, the agent learns the composite interaction (01,01), which tells that, in the context of interaction, 01, the agent can again enact 01.

Let's define Environment5 in which the agent must perform the same action twice in a row in order to get an outcome of 1.

In [651]:
class Environment6:
    """ Environm4 """
    def __init__(self):
        """ Initializing Environment4 """
        self._previous_action = 0
        self._last_action = 0

    def outcome(self, _action):
        """Take the action and generate the next outcome """
        if self._previous_action == action:
            # If same action then outcome 0
            outcome = 0
        else:
            # If different action then outcome 1
            outcome = 1
        self._previous_action = self._last_action
        self._last_action = action
        return outcome

In [745]:
class Environment7:
    """ Environment5 """
    def __init__(self):
        """ Initializing Environment4 """
        self._previous_action = 0
        self._last_action = 0

    def outcome(self, _action):
        """Take the action and generate the next outcome """
        if self._last_action == action and self._previous_action != action:
            # If same action twice only
            outcome = 1
        else:
            # If different action then outcome 0
            outcome = 0
        self._previous_action = self._last_action
        self._last_action = action
        return outcome     

In [1003]:
class Environment5:
    """ Environment5 """
    def __init__(self):
        """ Initializing Environment4 """
        self._penultimate_action =0
        self._previous_action = 0
        self._last_action = 0

    def outcome(self, _action):
        """Take the action and generate the next outcome """
        if action == self._last_action and action == self._previous_action:
            # If same action twice only
            outcome = 0
        else:
            # If different action then outcome 0
            outcome = 1
        self._penultimate_action = self._previous_action
        self._previous_action = self._last_action
        self._last_action = action
        return outcome     

In [1004]:
# Instanciate a new agent
interactions = [
    Interaction(0,0,-1),
    Interaction(0,1,1),
    Interaction(1,0,-1),
    Interaction(1,1,1)
]
a = Agent(interactions)
e = Environment5()

# Run the interaction loop
step = 0
outcome = 0

In [1020]:
print(f"Step {step}")
step += 1
action = a.action(outcome)
outcome = e.outcome(action)
a.selection_df[['composite', 'weight', 'post_valence', 'post_action', 'proclivity', 'proclivity_sum', 'intended']]

Step 15
Action: 0, Prediction: 1, Outcome: 1, Prediction_correct: True, Valence: 1
Reinforcing (11:1, 01:1: 4)
Intended 01:1


Unnamed: 0,composite,weight,post_valence,post_action,proclivity,proclivity_sum,intended
0,,0,,0,,0.0,1
1,,0,,1,,0.0,10
2,"(01,01)",3,1.0,0,3.0,0.0,1
3,"(01,00)",3,-1.0,0,-3.0,0.0,1


Observe that this agent cannot get a positive valence each time. 
To do so, he would need to select an action base on the last two previous interactions. 
We are going to design Agent6 that can do it.

# ASSIGNMENT

Implement Agent6 that learns a higher level of composite interactions as shown in Figure 1.

![Agent5](img/Figure_1_Agent6.svg)

Figure 1: Agent6 records and reinforces two levels of composite interactions:
* First-level composite interaction $c_{t-1} = (i_{t-2}, i_{t-1}: weight)$, 
* Second-level composite interaction $(c_{t-2}, i_{t-1}: weight)$.

The last enacted primitive interaction $i_{t-1}$ and the last enacted composite interaction $c_{t-1}$ activates previously-learned composite interactions that propose the action of their post interaction.

In [None]:
class Agent6(Agent):
    def learn(self):
        # Recording previous composite interaction
        if self._previous_interaction is not None:
            # Record or reinforce the first level composite interaction
            composite_interaction = CompositeInteraction(self._previous_interaction, self._last_interaction)
            if composite_interaction.key() not in self._composite_interactions:
                self._composite_interactions[composite_interaction.key()] = composite_interaction
                print(f"Learning {composite_interaction}")
                self._last_composite_interaction = composite_interaction
            else:
                self._composite_interactions[composite_interaction.key()].reinforce()
                print(f"Reinforcing {self._composite_interactions[composite_interaction.key()]}")
                self._last_composite_interaction = self._composite_interactions[composite_interaction.key()]
            # Record or reinforce the second level composite interaction
            if self._previous_composite_interaction is not None:
                composite_interaction_2 = CompositeInteraction(self._previous_composite_interaction, self._last_interaction)
                if composite_interaction_2.key() not in self._composite_interactions:
                    self._composite_interactions[composite_interaction_2.key()] = composite_interaction_2
                    print(f"Learning {composite_interaction_2}")
                else:
                    self._composite_interactions[composite_interaction_2.key()].reinforce()
                    print(f"Reinforcing {self._composite_interactions[composite_interaction_2.key()]}")

## Test your Agent6 in Environment5

In [None]:
# Instanciate a new agent
interactions = [
    Interaction(0,0,-1),
    Interaction(0,1,1),
    Interaction(1,0,-1),
    Interaction(1,1,1),

]
a = Agent6(interactions)
e = Environment5()

# Run the interaction loop
step = 0
outcome = 0

In [None]:
print(f"Step {step}")
step += 1
action = a.action(outcome)
outcome = e.outcome(action)
a.selection_df[['composite', 'weight', 'post_valence', 'post_action', 'proclivity', 'proclivity_sum', 'intended']]

Observe that Agent6 obtains a positive valence after Step 29.

## Test your Agent6 in the turtle environment

In [1054]:
# @title Install the turtle environment
!pip3 install ColabTurtle
from ColabTurtle.Turtle import *



In [1055]:
# @title Initialize the turtle environment

BORDER_WIDTH = 20

class ColabTurtleEnvironment:

    def __init__(self):
        """ Creating the Turtle window """
        bgcolor("lightGray")
        penup()
        goto(window_width() / 2, window_height()/2)
        face(0)
        pendown()
        color("green")

    def outcome(self, action):
        """ Enacting an action and returning the outcome """
        _outcome = 0
        for i in range(10):
            # _outcome = 0
            if action == 0:
                # move forward
                forward(10)
            elif action == 1:
                # rotate left
                left(4)
                forward(2)
            elif action == 2:
                # rotate right
                right(4)
                forward(2)

            # Bump on screen edge and return outcome 1
            if xcor() < BORDER_WIDTH:
                goto(BORDER_WIDTH, ycor())
                _outcome = 1
            if xcor() > window_width() - BORDER_WIDTH:
                goto(window_width() - BORDER_WIDTH, ycor())
                _outcome = 1
            if ycor() < BORDER_WIDTH:
                goto(xcor(), BORDER_WIDTH)
                _outcome = 1
            if ycor() > window_height() - BORDER_WIDTH:
                goto(xcor(), window_height() -BORDER_WIDTH)
                _outcome = 1

            # Change color
            if _outcome == 0:
                color("green")
            else:
                # Finit l'interaction
                color("red")
                # if action == 0:
                #     break
                if action == 1:
                    for j in range(10):
                        left(4)
                elif action == 2:
                    for j in range(10):
                        right(4)
                break

        return _outcome

In [None]:
# @title Run the turtle environment
import pandas as pd
initializeTurtle()

# Parameterize the rendering
bgcolor("lightGray")
penup()
goto(window_width() / 2, window_height()/2)
face(0)
pendown()
color("green")
speed(10)

# Some valences to avoid bumping into walls
interactions = [
    Interaction(0,0,3),
    Interaction(0,1,-3),
    Interaction(1,0,-1),
    Interaction(1,1,-1),
    Interaction(2,0,-2),
    Interaction(2,1,-2)
]

a = Agent6(interactions)
e = ColabTurtleEnvironment()

outcome = 0
for i in range(100):
    action = a.action(outcome)
    outcome = e.outcome(action)

## Report 

Explain what you programmed and what results you observed. Export this document as PDF including your code, the traces you obtained, and your explanations below (no more than a few paragraphs):