[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/PetiteIA/schema_mechanism/blob/master/notebooks/agent5.ipynb)

# THE AGENT WHO CHANGED HIS MIND

# Learning objectives

Upon completing this lab, you will be able to implement a developmental agent that reinforces simple behaviors.

## Define the Interaction class

Let's use the same interaction class as Agent4

In [1]:
class Interaction:
    """An interaction is a tuple (action, outcome) with a valence"""
    def __init__(self, action, outcome, valence):
        self.action = action
        self.outcome = outcome
        self.valence = valence

    def key(self):
        """ The key to find this interaction in the dictinary is the string '<action><outcome>'. """
        return f"{self.action}{self.outcome}"

    def __str__(self):
        """ Print interaction in the form '<action><outcome:<valence>' for debug."""
        return f"{self.action}{self.outcome}:{self.valence}"

    def __eq__(self, other):
        """ Interactions are equal if they have the same key """
        return self.key() == other.key()

## Define the Agent class

The agent is initialized with the list of interactions 

On a new step _t+1_:
* The interaction enacted on step _t_ is memorized in `self._last_interaction`
* The interaction enacted on step _t-1_ is memorized in `self._previous_interaction`
* The intended interaction `(selected action, predicted outcome)` is memorized in `self._intended_interaction`

In [32]:
class Agent:
    """Creating our agent"""
    def __init__(self, _interactions):
        """ Initialize the dictionary of interactions"""
        self._interactions = {interaction.key(): interaction for interaction in _interactions}
        self._intended_interaction = self._interactions["20"]
        self._last_interaction = None
        self._previous_interaction = None


    def action(self, _outcome):
        """ Tracing the previous cycle """
        self._previous_interaction = self._last_interaction
        self._last_interaction = self._interactions[f"{self._intended_interaction.action}{_outcome}"]
        print(f"Action: {self._intended_interaction.action}, Prediction: {self._intended_interaction.outcome}, "
              f"Outcome: {_outcome}, Prediction_correct: {self._intended_interaction.outcome == _outcome}, "
              f"Valence: {self._last_interaction.valence})")

        """ Computing the next interaction to try to enact """
        # TODO: Implement the agent's decision mechanism
        intended_action = 2
        # TODO: Implement the agent's prediction mechanism
        intended_outcome = 0
        # Memorize the intended interaction
        self._last_interaction = self._interactions[f"{intended_action}{intended_outcome}"]
        return intended_action


## Environment1 class

In [33]:
class Environment1:
    """ In Environment 1, action 2 yields outcome 0, action 3 yields outcome 1 """
    def outcome(self, _action):
        if _action == 0:
            return 0
        else:
            return 1

## Environment2 class

In [34]:
class Environment2:
    """ In Environment 2, action 2 yields outcome 1, action 3 yields outcome 0 """
    def outcome(self, _action):
        if _action == 0:
            return 1
        else:
            return 0

## Environment3 class

Environment 3 yields outcome 1 only when the agent alternates actions 0 and 1

In [35]:
class Environment3:
    """ Environment 3 yields outcome 1 only when the agent alternates actions 0 and 1 """
    def __init__(self):
        """ Initializing Environment3 """
        self.previous_action = 0

    def outcome(self, _action):
        if _action == self.previous_action:
            _outcome = 0
        else:
            _outcome = 1
        self.previous_action = _action
        return _outcome

## Environment4 class

Environment4 behaves like Environment1 during the first 10 cycles and then like Environment 2

In [36]:
class Environment4:
    """ Environm4 """
    def __init__(self):
        """ Initializing Environment4 """
        self.step = 0

    def outcome(self, _action):
        """Take the action and generate the next outcome """
        self.step += 1
        # Behave like environment1 during the first 10 steps
        if self.step < 10:
            if _action == 2:
                return 0
            else:
                return 1            
        # Behave like Environment2 after the first 10 steps
        else: 
            if _action == 2:
                return 1
            else:
                return 0            

## Initialize the interactions 

In [37]:
interactions = [
    Interaction(2,0,-1),
    Interaction(2,1,1),
    Interaction(3,0,-1),
    Interaction(3,1,1),
    Interaction(4,0,-1),
    Interaction(5,1,1)
]

Interactions are initialized with their action, their outcome, and their valence:

|| outcome 0 | outcome 1|
|---|---|---|
| action 2| -1 | 1 |
| action 3 | -1 | 1 |
| action 4 | -1 | 1 |

## Instantiate the agent

In [38]:
a = Agent(interactions)

## Instantiate the environment 

In [39]:
e = Environment4()

## Test run the simulation

In [40]:
outcome = 0
for i in range(30):
    action = a.action(outcome)
    outcome = e.outcome(action)

Action: 2, Prediction: 0, Outcome: 0, Prediction_correct: True, Valence: -1)
Action: 2, Prediction: 0, Outcome: 0, Prediction_correct: True, Valence: -1)
Action: 2, Prediction: 0, Outcome: 0, Prediction_correct: True, Valence: -1)
Action: 2, Prediction: 0, Outcome: 0, Prediction_correct: True, Valence: -1)
Action: 2, Prediction: 0, Outcome: 0, Prediction_correct: True, Valence: -1)
Action: 2, Prediction: 0, Outcome: 0, Prediction_correct: True, Valence: -1)
Action: 2, Prediction: 0, Outcome: 0, Prediction_correct: True, Valence: -1)
Action: 2, Prediction: 0, Outcome: 0, Prediction_correct: True, Valence: -1)
Action: 2, Prediction: 0, Outcome: 0, Prediction_correct: True, Valence: -1)
Action: 2, Prediction: 0, Outcome: 0, Prediction_correct: True, Valence: -1)
Action: 2, Prediction: 0, Outcome: 1, Prediction_correct: False, Valence: 1)
Action: 2, Prediction: 0, Outcome: 1, Prediction_correct: False, Valence: 1)
Action: 2, Prediction: 0, Outcome: 1, Prediction_correct: False, Valence: 1)

# AGENT5 DNN

Implémentons l'Agent5 qui va prédire la probabilité de chaque outcome pour chaque action possible

## Créons le réseau de neurones

Le modèle a trois entrées: previous_action, previous_outcome, action. 

In [11]:
import torch
import torch.nn as nn

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.fc1 = nn.Linear(3, 6)
        # Apply He Initialization recommended for ReLU
        nn.init.kaiming_normal_(self.fc1.weight, mode='fan_in', nonlinearity='relu')
        
        self.fc2 = nn.Linear(6, 2)
        # Apply Xavier initialisation recommended for linear activation
        nn.init.xavier_uniform_(self.fc2.weight)
        nn.init.zeros_(self.fc2.bias)  # Biases are usually set to zero

    def forward(self, x):
        x = torch.nn.functional.relu(self.fc1(x))  # Apply non-linearity
        return self.fc2(x)  # Logits (CrossEntropyLoss handles softmax)

model = Model()

# Définisson l'agent

In [106]:
import torch.optim as optim
import pandas as pd

class Agent5:
    """Creating our agent"""
    def __init__(self, _interactions):
        """ Initialize the dictionary of interactions"""
        # Initialise le réseau de neurone
        self._model = Model()
        self._criterion = nn.CrossEntropyLoss()  # Cross-entropy for classification
        self._optimizer = optim.SGD(model.parameters(), lr=0.3)  # SGD optimizer
        
        self._interactions = {interaction.key(): interaction for interaction in _interactions}
        self._intended_interaction = self._interactions["20"]
        self._last_interaction = None
        self._previous_interaction = None
        # Store the selection dataframe as a class attribute so we can display it in the notebook
        self.selection_df = None


    def fit(self, inputs, targets):
        """La fonction d'apprentissage"""
        input_tensor = torch.tensor(inputs, dtype=torch.float)
        # input_tensor = torch.randn_like(input_tensor) * 0.01 (voir si le modèle apprend des tendances)
        target_tensor = torch.tensor(targets, dtype=torch.long)
        labels = torch.nn.functional.one_hot(target_tensor, num_classes=2).to(torch.float)
        #labels = torch.argmax(target_tensor, dim=1)  # Convert one-hot to class indices

        optimizer.zero_grad()  # Reset gradients
        outputs = self._model(input_tensor)  # Forward pass
        loss = self._criterion(outputs, labels)  # Compute loss
        loss.backward()  # Backpropagation
        self._optimizer.step()  # Update weights

        # Check accuracy (we expect 100% accuracy)
        predictions = torch.argmax(outputs, dim=1)
        accuracy = (predictions == target_tensor).float().mean().item()

        print(f"Loss: {loss.item():.6f}, Accuracy: {accuracy * 100:.0f}%")

    
    def predict(self, inputs):
        """La fonction de prediction"""
        input_tensor = torch.tensor(inputs, dtype=torch.float)
        outputs = self._model(input_tensor)
        print("prediction", torch.argmax(outputs, dim=1))
        return torch.softmax(outputs, dim=1) 
        
    
    def action(self, _outcome):
        """ Tracing the previous cycle """
        self._previous_interaction = self._last_interaction
        self._last_interaction = self._interactions[f"{self._intended_interaction.action}{_outcome}"]
        print(f"Action: {self._intended_interaction.action}, Prediction: {self._intended_interaction.outcome}, "
              f"Outcome: {_outcome}, Prediction_correct: {self._intended_interaction.outcome == _outcome}, "
              f"Valence: {self._last_interaction.valence})")

        """ Computing the next interaction to try to enact """
        # Entraine le réseau de neurone avec les informations du dernier cycle d'interaction
        if self._previous_interaction is not None:
            self.fit([[self._previous_interaction.action, self._previous_interaction.outcome, self._intended_interaction.action]], [outcome])

        # Prédit les résultats pour les différentes actions
        probabilities = self.predict([[self._intended_interaction.action, outcome, 2], [self._intended_interaction.action, outcome, 3]])
        # print(predictions)
        
        # TODO: Implement the agent's decision mechanism
        # Le dataframe pour trouver la meilleure expected valence
        data = {'action': [2, 2, 3, 3],
                'outcome': [0, 1, 0, 1],
                'valence': [self._interactions[i].valence for i in ["20", "21", "30", "31"]],
                'probability': probabilities.flatten().tolist()}
        self.selection_df = pd.DataFrame(data)
        self.selection_df['expected_valence'] = self.selection_df['valence'] * self.selection_df['probability']
        print(self.selection_df)

        # Aggregate by action
        grouped_df = self.selection_df.groupby('action').agg({'expected_valence': 'sum'}).reset_index()
        # Sort by descending order of expected valence
        grouped_df = grouped_df.sort_values(by=['expected_valence'], ascending=[False]).reset_index(drop=True)
        print(grouped_df)
        # Select the action that has the higest expected valence
        intended_action = grouped_df.loc[0, 'action']

        # TODO: Implement the agent's prediction mechanism
        predictions = torch.argmax(probabilities, dim=1)
        intended_outcome = predictions.tolist()[intended_action - 2]
        
        # Memorize the intended interaction
        self._intended_interaction = self._interactions[f"{intended_action}{intended_outcome}"]
        return intended_action


## Test your Agent5 in Environment1

In [107]:
torch.manual_seed(0)

a = Agent5(interactions)
e = Environment1()
outcome = 0
for i in range(20):
    action = a.action(outcome)
    outcome = e.outcome(action)

Action: 2, Prediction: 0, Outcome: 0, Prediction_correct: True, Valence: -1)
prediction tensor([0, 0])
   action  outcome  valence  probability  expected_valence
0       2        0       -1     0.860700         -0.860700
1       2        1        1     0.139300          0.139300
2       3        0       -1     0.759602         -0.759602
3       3        1        1     0.240398          0.240398
   action  expected_valence
0       3         -0.519203
1       2         -0.721401
Action: 3, Prediction: 0, Outcome: 1, Prediction_correct: False, Valence: 1)
Loss: 1.425457, Accuracy: 0%
prediction tensor([0, 0])
   action  outcome  valence  probability  expected_valence
0       2        0       -1     0.978569         -0.978569
1       2        1        1     0.021431          0.021431
2       3        0       -1     0.958934         -0.958934
3       3        1        1     0.041066          0.041066
   action  expected_valence
0       3         -0.917868
1       2         -0.957138
Action:

## Test your Agent5 in Environment2

In [101]:
a = Agent5(interactions)
e = Environment2()
outcome = 0
for i in range(20):
    action = a.action(outcome)
    outcome = e.outcome(action)

Action: 2, Prediction: 0, Outcome: 0, Prediction_correct: True, Valence: -1)
prediction tensor([1, 1])
   action  outcome  valence  probability  expected_valence
0       2        0       -1     0.397219         -0.397219
1       2        1        1     0.602781          0.602781
2       3        0       -1     0.380046         -0.380046
3       3        1        1     0.619954          0.619954
   action  expected_valence
0       3          0.239907
1       2          0.205563
Action: 3, Prediction: 1, Outcome: 0, Prediction_correct: False, Valence: -1)
Loss: 0.967462, Accuracy: 0%
prediction tensor([1, 1])
   action  outcome  valence  probability  expected_valence
0       2        0       -1     0.364659         -0.364659
1       2        1        1     0.635341          0.635341
2       3        0       -1     0.348082         -0.348082
3       3        1        1     0.651918          0.651918
   action  expected_valence
0       3          0.303837
1       2          0.270682
Action

## Test your Agent5 in Environment3

In [102]:
a = Agent5(interactions)
e = Environment3()
outcome = 0
for i in range(20):
    action = a.action(outcome)
    outcome = e.outcome(action)

Action: 2, Prediction: 0, Outcome: 0, Prediction_correct: True, Valence: -1)
prediction tensor([0, 0])
   action  outcome  valence  probability  expected_valence
0       2        0       -1     0.980421         -0.980421
1       2        1        1     0.019579          0.019579
2       3        0       -1     0.991958         -0.991958
3       3        1        1     0.008042          0.008042
   action  expected_valence
0       2         -0.960842
1       3         -0.983917
Action: 2, Prediction: 0, Outcome: 1, Prediction_correct: False, Valence: 1)
Loss: 3.933289, Accuracy: 0%
prediction tensor([0, 0])
   action  outcome  valence  probability  expected_valence
0       2        0       -1     0.907749         -0.907749
1       2        1        1     0.092251          0.092251
2       3        0       -1     0.960380         -0.960380
3       3        1        1     0.039620          0.039620
   action  expected_valence
0       2         -0.815497
1       3         -0.920760
Action:

## Test your Agent5 in Environment4

In [None]:
a = Agent5(interactions)
e = Environment4()
outcome = 0
for i in range(20):
    action = a.action(outcome)
    outcome = e.outcome(action)

## Test your Agent5 with interactions that have other valences

Replace the valences of interactions with your choice in the code below

In [None]:
# Choose different valence of interactions
interactions = [
    Interaction(0,0,1),
    Interaction(0,1,0),
    Interaction(1,0,-1),
    Interaction(1,1,1),
    Interaction(2,0,-1),
    Interaction(2,1,1)
]
# Run the agent
a = Agent5(interactions)
e = Environment4()
outcome = 0
for i in range(20):
    action = a.action(outcome)
    outcome = e.outcome(action)

## Test your agent in the Turtle environment 

In [None]:
# @title Install the turtle environment
!pip3 install ColabTurtle
from ColabTurtle.Turtle import *

In [3]:
# @title Initialize the turtle environment

BORDER_WIDTH = 20

class ColabTurtleEnvironment:

    def __init__(self):
        """ Creating the Turtle window """
        bgcolor("lightGray")
        penup()
        goto(window_width() / 2, window_height()/2)
        face(0)
        pendown()
        color("green")

    def outcome(self, action):
        """ Enacting an action and returning the outcome """
        _outcome = 0
        for i in range(10):
            # _outcome = 0
            if action == 0:
                # move forward
                forward(10)
            elif action == 1:
                # rotate left
                left(4)
                forward(2)
            elif action == 2:
                # rotate right
                right(4)
                forward(2)

            # Bump on screen edge and return outcome 1
            if xcor() < BORDER_WIDTH:
                goto(BORDER_WIDTH, ycor())
                _outcome = 1
            if xcor() > window_width() - BORDER_WIDTH:
                goto(window_width() - BORDER_WIDTH, ycor())
                _outcome = 1
            if ycor() < BORDER_WIDTH:
                goto(xcor(), BORDER_WIDTH)
                _outcome = 1
            if ycor() > window_height() - BORDER_WIDTH:
                goto(xcor(), window_height() -BORDER_WIDTH)
                _outcome = 1

            # Change color
            if _outcome == 0:
                color("green")
            else:
                # Finit l'interaction
                color("red")
                # if action == 0:
                #     break
                if action == 1:
                    for j in range(10):
                        left(4)
                elif action == 2:
                    for j in range(10):
                        right(4)
                break

        return _outcome

In [None]:
# @title Run the turtle environment
initializeTurtle()

# Parameterize the rendering
bgcolor("lightGray")
penup()
goto(window_width() / 2, window_height()/2)
face(0)
pendown()
color("green")
speed(10)

# Some valences to avoid bumping into walls
interactions = [
    Interaction(0,0,3),
    Interaction(0,1,-3),
    Interaction(1,0,-1),
    Interaction(1,1,-1),
    Interaction(2,0,-2),
    Interaction(2,1,-2)
]

a = Agent5(interactions)
e = ColabTurtleEnvironment()

outcome = 0
for i in range(50):
    action = a.action(outcome)
    outcome = e.outcome(action)

## Report 

Explain what you programmed and what results you observed. Export this document as PDF including your code, the traces you obtained, and your explanations below (no more than a few paragraphs):