[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/PetiteIA/schema_mechanism/blob/master/notebooks/agent5.ipynb)

# THE AGENT WHO CHANGED HIS MIND

# Learning objectives

Upon completing this lab, you will be able to implement a developmental agent that reinforces simple behaviors.

## Define the Interaction class

Let's use the same interaction class as Agent4

In [68]:
class Interaction:
    """An interaction is a tuple (action, outcome) with a valence"""
    def __init__(self, action, outcome, valence):
        self.action = action
        self.outcome = outcome
        self.valence = valence

    def key(self):
        """ The key to find this interaction in the dictinary is the string '<action><outcome>'. """
        return f"{self.action}{self.outcome}"

    def __str__(self):
        """ Print interaction in the form '<action><outcome:<valence>' for debug."""
        return f"{self.action}{self.outcome}:{self.valence}"

    def __eq__(self, other):
        """ Interactions are equal if they have the same key """
        return self.key() == other.key()

## Define the Agent class

The agent is initialized with the list of interactions 

On a new step _t+1_:
* The interaction enacted on step _t_ is memorized in `self._last_interaction`
* The interaction enacted on step _t-1_ is memorized in `self._previous_interaction`
* The intended interaction `(selected action, predicted outcome)` is memorized in `self._intended_interaction`

In [69]:
class Agent:
    """Creating our agent"""
    def __init__(self, _interactions):
        """ Initialize the dictionary of interactions"""
        self._interactions = {interaction.key(): interaction for interaction in _interactions}
        self._intended_interaction = self._interactions["00"]
        self._last_interaction = None
        self._previous_interaction = None


    def action(self, _outcome):
        """ Tracing the previous cycle """
        self._previous_interaction = self._last_interaction
        self._last_interaction = self._interactions[f"{self._intended_interaction.action}{_outcome}"]
        print(f"Action: {self._intended_interaction.action}, Prediction: {self._intended_interaction.outcome}, "
              f"Outcome: {_outcome}, Prediction_correct: {self._intended_interaction.outcome == _outcome}, "
              f"Valence: {self._last_interaction.valence})")

        """ Computing the next interaction to try to enact """
        # TODO: Implement the agent's decision mechanism
        intended_action = 0
        # TODO: Implement the agent's prediction mechanism
        intended_outcome = 0
        # Memorize the intended interaction
        self._last_interaction = self._interactions[f"{intended_action}{intended_outcome}"]
        return intended_action


## Environment1 class

In [70]:
class Environment1:
    """ In Environment 1, action 0 yields outcome 0, action 1 yields outcome 1 """
    def outcome(self, _action):
        if _action == 0:
            return 0
        else:
            return 1

## Environment2 class

In [71]:
class Environment2:
    """ In Environment 2, action 0 yields outcome 1, action 1 yields outcome 0 """
    def outcome(self, _action):
        if _action == 0:
            return 1
        else:
            return 0

## Environment3 class

Environment 3 yields outcome 1 only when the agent alternates actions 0 and 1

In [72]:
class Environment3:
    """ Environment 3 yields outcome 1 only when the agent alternates actions 0 and 1 """
    def __init__(self):
        """ Initializing Environment3 """
        self.previous_action = 0

    def outcome(self, _action):
        if _action == self.previous_action:
            _outcome = 0
        else:
            _outcome = 1
        self.previous_action = _action
        return _outcome

## Environment4 class

Environment4 behaves like Environment1 during the first 10 cycles and then like Environment 2

In [74]:
class Environment4:
    """ Environm4 """
    def __init__(self):
        """ Initializing Environment4 """
        self.step = 0

    def outcome(self, _action):
        """Take the action and generate the next outcome """
        self.step += 1
        # Behave like environment1 during the first 10 steps
        if self.step < 10:
            if _action == 0:
                return 0
            else:
                return 1            
        # Behave like Environment2 after the first 10 steps
        else: 
            if _action == 0:
                return 1
            else:
                return 0            

## Initialize the interactions 

In [75]:
interactions = [
    Interaction(0,0,-1),
    Interaction(0,1,1),
    Interaction(1,0,-1),
    Interaction(1,1,1),
    Interaction(2,0,-1),
    Interaction(2,1,1)
]

Interactions are initialized with their action, their outcome, and their valence:

|| outcome 0 | outcome 1|
|---|---|---|
| action 0| -1 | 1 |
| action 1 | -1 | 1 |
| action 2 | -1 | 1 |

## Instantiate the agent

In [76]:
a = Agent(interactions)

## Instantiate the environment 

In [77]:
e = Environment4()

## Test run the simulation

In [78]:
outcome = 0
for i in range(30):
    action = a.action(outcome)
    outcome = e.outcome(action)

Action: 0, Prediction: 0, Outcome: 0, Prediction_correct: True, Valence: -1)
Action: 0, Prediction: 0, Outcome: 0, Prediction_correct: True, Valence: -1)
Action: 0, Prediction: 0, Outcome: 0, Prediction_correct: True, Valence: -1)
Action: 0, Prediction: 0, Outcome: 0, Prediction_correct: True, Valence: -1)
Action: 0, Prediction: 0, Outcome: 0, Prediction_correct: True, Valence: -1)
Action: 0, Prediction: 0, Outcome: 0, Prediction_correct: True, Valence: -1)
Action: 0, Prediction: 0, Outcome: 0, Prediction_correct: True, Valence: -1)
Action: 0, Prediction: 0, Outcome: 0, Prediction_correct: True, Valence: -1)
Action: 0, Prediction: 0, Outcome: 0, Prediction_correct: True, Valence: -1)
Action: 0, Prediction: 0, Outcome: 0, Prediction_correct: True, Valence: -1)
Action: 0, Prediction: 0, Outcome: 1, Prediction_correct: False, Valence: 1)
Action: 0, Prediction: 0, Outcome: 1, Prediction_correct: False, Valence: 1)
Action: 0, Prediction: 0, Outcome: 1, Prediction_correct: False, Valence: 1)

# PRELIMINARY EXERCISE

Run your Agent4 in Environment4.

Depending on how you designed it, it may not be able to re-adapt after Environment4 changes on step 10. 
Let's design Agent5 that can readapt!

# ASSIGNMENT

Implement Agent5 that obtains positive valences in either Environment 1, 2, 3, and that can adapt to Environment 4 and then readapt after 10 more steps.

Now we call *primitive interaction* a tuple `(action, outcome)`.
We call *composite interaction* a sequence of two primitive interactions: `composite_intreaction = (pre_interaction, post_interaction)`.

Composite interactions are used to store the sequences of the last two enacted interactions.
They have a weight that counts the number of times that this sequence has been enacted. 

When computing the prediction, the agent uses the weight of the activated composite interactions to predict the most likely outcome for each possible action in a given context as shown in Figure 1.

![Agent5](img/Figure_1_Agent5.svg)

Figure 1: Agent5 records and reinforces composite interactions as tuples $(i_{t-2}, i_{t-1}: weight)$. The last enacted interaction $i_{t-1}$ activates previously-learned composite interactions that propose the action of their post interaction.

The proclivity $proclivity_a$ is computed for each action $a$ according to the formula: 

$\displaystyle proclivity_a = \sum_{c \in A} w_c \cdot v_{post(c)}$

in which $A$ is the set of activated composite interactions that propose action $a$, $w_a$ is the weight of composite interaction $c$, $v_{post(c)}$ is the valence of the post interaction of $c$. 


## Let's define the class CompositeInteraction

In [33]:
class CompositeInteraction:
    """A composite interaction is a tuple (pre_interaction, post_interaction) and a weight"""
    def __init__(self, pre_interaction, post_interaction):
        self.pre_interaction = pre_interaction
        self.post_interaction = post_interaction
        self.weight = 1

    def reinforce(self):
        """Increment the composite interaction's weight"""
        self.weight += 1

    def key(self):
        """ The key to find this interaction in the dictionary is the string '<pre_interaction><post_interaction>'. """
        return f"{self.pre_interaction}{self.post_interaction}"

    def __str__(self):
        """ Print the interaction in the Newick tree format (pre_interaction, post_interaction: valence) """
        return f"({self.pre_interaction}, {self.post_interaction})"

    def __hash__(self):
        """ The hash is necessary to use interactions as keys in a dictionary """
        return self.key()

    def __eq__(self, other):
        """ Composite interactions are equal if they have the same pre and post interactions """
        if isinstance(other, self.__class__):
            return (self.pre_interaction == other.pre_interaction) and (self.post_interaction == other.post_interaction)
        else:
            return False


## Create Agent5 by overriding the class Agent

You may add any attribute and method you deem usefull to the class Agent5.

When selecting the next action, Agent5 must take into account the likelyhood of each possible outcome and the valence of the predicted resulting interaction.

In [79]:
class Agent5(Agent):
    pass
    # TODO override the method action(self, _outcome)

## Test your Agent5 in Environment1

In [None]:
a = Agent5(interactions)
e = Environment1()
outcome = 0
for i in range(20):
    action = a.action(outcome)
    outcome = e.outcome(action)

## Test your Agent5 in Environment2

In [None]:
a = Agent5(interactions)
e = Environment2()
outcome = 0
for i in range(20):
    action = a.action(outcome)
    outcome = e.outcome(action)

## Test your Agent5 in Environment3

In [None]:
a = Agent5(interactions)
e = Environment3()
outcome = 0
for i in range(20):
    action = a.action(outcome)
    outcome = e.outcome(action)

## Test your Agent5 in Environment4

In [None]:
a = Agent5(interactions)
e = Environment4()
outcome = 0
for i in range(20):
    action = a.action(outcome)
    outcome = e.outcome(action)

## Test your Agent5 with interactions that have other valences

Replace the valences of interactions with your choice in the code below

In [None]:
# Choose different valence of interactions
interactions = [
    Interaction(0,0,1),
    Interaction(0,1,0),
    Interaction(1,0,-1),
    Interaction(1,1,1),
    Interaction(2,0,-1),
    Interaction(2,1,1)
]
# Run the agent
a = Agent5(interactions)
e = Environment4()
outcome = 0
for i in range(20):
    action = a.action(outcome)
    outcome = e.outcome(action)

## Test your agent in the Turtle environment 

In [None]:
# @title Install the turtle environment
!pip3 install ColabTurtle
from ColabTurtle.Turtle import *

In [3]:
# @title Initialize the turtle environment

BORDER_WIDTH = 20

class ColabTurtleEnvironment:

    def __init__(self):
        """ Creating the Turtle window """
        bgcolor("lightGray")
        penup()
        goto(window_width() / 2, window_height()/2)
        face(0)
        pendown()
        color("green")

    def outcome(self, action):
        """ Enacting an action and returning the outcome """
        _outcome = 0
        for i in range(10):
            # _outcome = 0
            if action == 0:
                # move forward
                forward(10)
            elif action == 1:
                # rotate left
                left(4)
                forward(2)
            elif action == 2:
                # rotate right
                right(4)
                forward(2)

            # Bump on screen edge and return outcome 1
            if xcor() < BORDER_WIDTH:
                goto(BORDER_WIDTH, ycor())
                _outcome = 1
            if xcor() > window_width() - BORDER_WIDTH:
                goto(window_width() - BORDER_WIDTH, ycor())
                _outcome = 1
            if ycor() < BORDER_WIDTH:
                goto(xcor(), BORDER_WIDTH)
                _outcome = 1
            if ycor() > window_height() - BORDER_WIDTH:
                goto(xcor(), window_height() -BORDER_WIDTH)
                _outcome = 1

            # Change color
            if _outcome == 0:
                color("green")
            else:
                # Finit l'interaction
                color("red")
                # if action == 0:
                #     break
                if action == 1:
                    for j in range(10):
                        left(4)
                elif action == 2:
                    for j in range(10):
                        right(4)
                break

        return _outcome

In [None]:
# @title Run the turtle environment
initializeTurtle()

# Parameterize the rendering
bgcolor("lightGray")
penup()
goto(window_width() / 2, window_height()/2)
face(0)
pendown()
color("green")
speed(10)

# Some valences to avoid bumping into walls
interactions = [
    Interaction(0,0,3),
    Interaction(0,1,-3),
    Interaction(1,0,-1),
    Interaction(1,1,-1),
    Interaction(2,0,-2),
    Interaction(2,1,-2)
]

a = Agent5(interactions)
e = ColabTurtleEnvironment()

outcome = 0
for i in range(50):
    action = a.action(outcome)
    outcome = e.outcome(action)

## Report 

Explain what you programmed and what results you observed. Export this document as PDF including your code, the traces you obtained, and your explanations below (no more than a few paragraphs):