[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/PetiteIA/schema_mechanism/blob/master/notebooks/agent6.ipynb)

# THE AGENT WHO CHANGED HIS MIND

# Learning objectives

Upon completing this lab, you will be able to implement a developmental agent that reinforces simple behaviors.

## Define the necessary classes

Let's use the same interaction class as Agent4

In [216]:
class Interaction:
    """An interaction is a tuple (action, outcome) with a valence"""
    def __init__(self, action, outcome, valence):
        self.action = action
        self.outcome = outcome
        self.valence = valence

    def key(self):
        """ The key to find this interaction in the dictinary is the string '<action><outcome>'. """
        return f"{self.action}{self.outcome}"

    def __str__(self):
        """ Print interaction in the form '<action><outcome:<valence>' for debug."""
        return f"{self.action}{self.outcome}:{self.valence}"

    def __eq__(self, other):
        """ Interactions are equal if they have the same key """
        return self.key() == other.key()

Let's use the same CompositeInteraction class as Agent5

In [217]:
class CompositeInteraction:
    """A composite interaction is a tuple (pre_interaction, post_interaction) and a weight"""
    def __init__(self, pre_interaction, post_interaction):
        self.pre_interaction = pre_interaction
        self.post_interaction = post_interaction
        self.weight = 1
        self.isActivated = False

    def get_valence(self):
        """Return the valence of the pre_interaction plus the valence of the post_interaction"""
        return self.pre_interaction.get_valence() + self.post_interaction.get_valence()

    def reinforce(self):
        """Increment the composite interaction's weight"""
        self.weight += 1

    def key(self):
        """ The key to find this interaction in the dictionary is the string '<pre_interaction><post_interaction>'. """
        return f"({self.pre_interaction.key()},{self.post_interaction.key()})"

    def __str__(self):
        """ Print the interaction in the Newick tree format (pre_interaction, post_interaction: valence) """
        return f"({self.pre_interaction}, {self.post_interaction}: {self.weight})"

    def __hash__(self):
        """ The hash is necessary to use interactions as keys in a dictionary """
        return self.key()

    def __eq__(self, other):
        """ Interactions are equal if they have the same pre and post interactions """
        if isinstance(other, self.__class__):
            return (self.pre_interaction == other.pre_interaction) and (self.post_interaction == other.post_interaction)
        else:
            return False

## Define the Agent class

Let's install the libraray Pandas 

Agent 6 uses it to compute the selection of the next intended interaction. 

In [218]:
!pip install pandas




[notice] A new release of pip is available: 24.1.2 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


Now we create the Agent6 class

In [219]:
import pandas as pd

class Agent6:
    def __init__(self, _interactions):
        """ Initialize our agent """
        self._interactions = {interaction.key(): interaction for interaction in _interactions}
        self._composite_interactions = {}
        self._intended_interaction = self._interactions["00"]
        self._last_interaction = None
        self._previous_interaction = None
        pd.set_option('display.max_columns', 8)
        default_interactions = [interaction for interaction in _interactions if interaction.outcome == 0]
        data = {'post_action': [i.action for i in default_interactions],
                'weight': [0] * len(default_interactions),
                'proclivity': [0] * len(default_interactions),
                'post_interaction': [i.key() for i in default_interactions]}
        self.default_df = pd.DataFrame(data)
        self.df = self.default_df.copy()

    def action(self, _outcome):
        """Implement the agent's policy"""
        # tracing the previous cycle
        self._previous_interaction = self._last_interaction
        self._last_interaction = self._interactions[f"{self._intended_interaction.action}{_outcome}"]
        print(f"Action: {self._intended_interaction.action}, Prediction: {self._intended_interaction.outcome}, "
              f"Outcome: {_outcome}, Prediction_correct: {self._intended_interaction.outcome == _outcome}, "
              f"Valence: {self._last_interaction.valence}")

        # Recording previous composite interaction
        if self._previous_interaction is not None:
            composite_interaction = CompositeInteraction(self._previous_interaction, self._last_interaction)
            if composite_interaction.key() not in self._composite_interactions:
                self._composite_interactions[composite_interaction.key()] = composite_interaction
                print(f"Learning {composite_interaction}")
            else:
                self._composite_interactions[composite_interaction.key()].reinforce()
                print(f"Reinforcing {self._composite_interactions[composite_interaction.key()]}")

        activated_keys = \
            [ci.key() for ci in self._composite_interactions.values() if ci.pre_interaction == self._last_interaction]

        data = {'composite': activated_keys,
                'weight': [self._composite_interactions[k].weight for k in activated_keys],
                'post_valence': [self._composite_interactions[k].post_interaction.valence for k in activated_keys],
                'post_action': [self._composite_interactions[k].post_interaction.action for k in activated_keys],
                'post_interaction': [self._composite_interactions[k].post_interaction.key() for k in activated_keys]
                }
        df = pd.DataFrame(data)

        # Add the actions by default
        df = pd.concat([self.default_df, df], ignore_index=True)

        # Compute the proclivity for each action
        df['proclivity'] = df['weight'] * df['post_valence']
        grouped_df = df.groupby('post_action').agg({'proclivity': 'sum'}).reset_index()
        df = df.merge(grouped_df, on='post_action', suffixes=('', '_sum'))

        # Find the most probable outcome for each action
        max_weight_df = df.loc[df.groupby('post_action')['weight'].idxmax(), ['post_action', 'post_interaction']].reset_index(drop=True)
        max_weight_df.columns = ['post_action', 'intended']
        df = df.merge(max_weight_df, on='post_action')

        df = df.drop(['post_interaction'], axis=1)
        # df = df.rename(columns={'proclivity_sum': 'proclivity'})
        # print(df)
        # print(df[['composite', 'weight', 'post_valence', 'post_action', 'proclivity', 'intended']])

        # Find the first row that has the highest proclivity
        max_index = df['proclivity_sum'].idxmax()
        intended_interaction = df.loc[max_index, ['intended']].values[0]
        self._intended_interaction = self._interactions[intended_interaction]
        print("Intended", self._intended_interaction)

        self.df = df.copy()
        
        return self._intended_interaction.action


# PRELIMINARY EXERCISE

Let's test this agent in Environment3

In [220]:
class Environment3:
    """ Environment 3 yields outcome 1 only when the agent alternates actions 0 and 1 """
    def __init__(self):
        """ Initializing Environment3 """
        self.previous_action = 0

    def outcome(self, _action):
        if _action == self.previous_action:
            _outcome = 0
        else:
            _outcome = 1
        self.previous_action = _action
        return _outcome

Initialize the simulation

In [221]:
# Instanciate a new agent
interactions = [
    Interaction(0,0,-1),
    Interaction(0,1,1),
    Interaction(1,0,-1),
    Interaction(1,1,1),
    Interaction(2,0,-1),
    Interaction(2,1,1)
]
a = Agent6(interactions)
e = Environment3()

# Run the interaction loop
step = 0
outcome = 0

Run the simulation step by step to see the action selection table. Use `Ctrl+Enter` to run the cell and stay on the same cell.

In [222]:
print(f"Step {step}")
step += 1
action = a.action(outcome)
outcome = e.outcome(action)
a.df[['composite', 'weight', 'post_valence', 'post_action', 'proclivity', 'proclivity_sum', 'intended']]

Step 0
Action: 0, Prediction: 0, Outcome: 0, Prediction_correct: True, Valence: -1
Intended 00:-1


Unnamed: 0,composite,weight,post_valence,post_action,proclivity,proclivity_sum,intended
0,,0.0,,0.0,,0.0,0
1,,0.0,,1.0,,0.0,10
2,,0.0,,2.0,,0.0,20


Observe the interaction selecting table above. 

Each activated composite interaction proposes its post_action with proclity equals to the composite interaction's weight multiplied by the post interactions' valence. 

The proclivities are summed for each action. The action that has the hiest sum proclivity is selected.

# ASSIGNMENT

Implement Agent6 that learns higher levels of composite interactions.

![Agent5](img/Agent6.png)

Figure 1: The agent records and reinforce composite interactions as tuples $(i_{t-2}, i_{t-1}: weight)$. The last enacted interaction $i_{t-1}$ activates previously-learned composite interactions that propose the action of their post interaction.

## Test Agent6 in the turtle environment

In [223]:
# @title Install the turtle environment
!pip3 install ColabTurtle
from ColabTurtle.Turtle import *




[notice] A new release of pip is available: 24.1.2 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [224]:
# @title Initialize the turtle environment

BORDER_WIDTH = 20

class ColabTurtleEnvironment:

    def __init__(self):
        """ Creating the Turtle window """
        bgcolor("lightGray")
        penup()
        goto(window_width() / 2, window_height()/2)
        face(0)
        pendown()
        color("green")

    def outcome(self, action):
        """ Enacting an action and returning the outcome """
        _outcome = 0
        for i in range(10):
            # _outcome = 0
            if action == 0:
                # move forward
                forward(10)
            elif action == 1:
                # rotate left
                left(4)
                forward(2)
            elif action == 2:
                # rotate right
                right(4)
                forward(2)

            # Bump on screen edge and return outcome 1
            if xcor() < BORDER_WIDTH:
                goto(BORDER_WIDTH, ycor())
                _outcome = 1
            if xcor() > window_width() - BORDER_WIDTH:
                goto(window_width() - BORDER_WIDTH, ycor())
                _outcome = 1
            if ycor() < BORDER_WIDTH:
                goto(xcor(), BORDER_WIDTH)
                _outcome = 1
            if ycor() > window_height() - BORDER_WIDTH:
                goto(xcor(), window_height() -BORDER_WIDTH)
                _outcome = 1

            # Change color
            if _outcome == 0:
                color("green")
            else:
                # Finit l'interaction
                color("red")
                # if action == 0:
                #     break
                if action == 1:
                    for j in range(10):
                        left(4)
                elif action == 2:
                    for j in range(10):
                        right(4)
                break

        return _outcome

In [227]:
# @title Run the turtle environment
initializeTurtle()

# Parameterize the rendering
bgcolor("lightGray")
penup()
goto(window_width() / 2, window_height()/2)
face(0)
pendown()
color("green")
speed(10)

# Some valences to avoid bumping into walls
interactions = [
    Interaction(0,0,3),
    Interaction(0,1,-3),
    Interaction(1,0,-1),
    Interaction(1,1,-1),
    Interaction(2,0,-2),
    Interaction(2,1,-2)
]

a = Agent6(interactions)
e = ColabTurtleEnvironment()

outcome = 0
for i in range(50):
    action = a.action(outcome)
    outcome = e.outcome(action)

Action: 0, Prediction: 0, Outcome: 0, Prediction_correct: True, Valence: 3
Intended 00:3
Action: 0, Prediction: 0, Outcome: 0, Prediction_correct: True, Valence: 3
Learning (00:3, 00:3: 1)
Intended 00:3
Action: 0, Prediction: 0, Outcome: 0, Prediction_correct: True, Valence: 3
Reinforcing (00:3, 00:3: 2)
Intended 00:3
Action: 0, Prediction: 0, Outcome: 0, Prediction_correct: True, Valence: 3
Reinforcing (00:3, 00:3: 3)
Intended 00:3
Action: 0, Prediction: 0, Outcome: 1, Prediction_correct: False, Valence: -3
Learning (00:3, 01:-3: 1)
Intended 00:3
Action: 0, Prediction: 0, Outcome: 1, Prediction_correct: False, Valence: -3
Learning (01:-3, 01:-3: 1)
Intended 10:-1
Action: 1, Prediction: 0, Outcome: 1, Prediction_correct: False, Valence: -1
Learning (01:-3, 11:-1: 1)
Intended 00:3
Action: 0, Prediction: 0, Outcome: 1, Prediction_correct: False, Valence: -3
Learning (11:-1, 01:-3: 1)
Intended 20:-2
Action: 2, Prediction: 0, Outcome: 1, Prediction_correct: False, Valence: -2
Learning (01:

## Report 

Explain what you programmed and what results you observed. Export this document as PDF including your code, the traces you obtained, and your explanations below (no more than a few paragraphs):