[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/PetiteIA/schema_mechanism/blob/master/notebooks/agent5.ipynb)

# THE AGENT WHO CHANGED HIS MIND

# Learning objectives

Upon completing this lab, you will be able to implement a developmental agent that reinforces simple behaviors.

## Define the Interaction class

Let's use the same interaction class as Agent4

In [62]:
class Interaction:
    """An interaction is a tuple (action, outcome) with a valence"""
    def __init__(self, action, outcome, valence):
        self.action = action
        self.outcome = outcome
        self.valence = valence

    def key(self):
        """ The key to find this interaction in the dictinary is the string '<action><outcome>'. """
        return f"{self.action}{self.outcome}"

    def __str__(self):
        """ Print interaction in the form '<action><outcome:<valence>' for debug."""
        return f"{self.action}{self.outcome}:{self.valence}"

    def __eq__(self, other):
        """ Interactions are equal if they have the same key """
        return self.key() == other.key()

## Environment1 class

In [63]:
class Environment1:
    """ In Environment 1, action 2 yields outcome 0, action 3 yields outcome 1 """
    def outcome(self, _action):
        if _action == 2:
            return 0
        else:
            return 1

## Environment2 class

In [64]:
class Environment2:
    """ In Environment 2, action 2 yields outcome 1, action 3 yields outcome 0 """
    def outcome(self, _action):
        if _action == 2:
            return 1
        else:
            return 0

## Environment3 class

Environment 3 yields outcome 1 only when the agent alternates actions 0 and 1

In [65]:
class Environment3:
    """ Environment 3 yields outcome 1 only when the agent alternates actions 0 and 1 """
    def __init__(self):
        """ Initializing Environment3 """
        self.previous_action = 0

    def outcome(self, _action):
        if _action == self.previous_action:
            _outcome = 0
        else:
            _outcome = 1
        self.previous_action = _action
        return _outcome

## Environment4 class

Environment4 behaves like Environment1 during the first 10 cycles and then like Environment 2

In [66]:
class Environment4:
    """ Environm4 """
    def __init__(self):
        """ Initializing Environment4 """
        self.step = 0

    def outcome(self, _action):
        """Take the action and generate the next outcome """
        self.step += 1
        # Behave like environment1 during the first 10 steps
        if self.step < 10:
            if _action == 2:
                return 0
            else:
                return 1            
        # Behave like Environment2 after the first 10 steps
        else: 
            if _action == 2:
                return 1
            else:
                return 0            

## Initialize the interactions 

In [67]:
interactions = [
    Interaction(2,0,-1),
    Interaction(2,1,1),
    Interaction(3,0,-1),
    Interaction(3,1,1),
    Interaction(4,0,-1),
    Interaction(5,1,1)
]

Interactions are initialized with their action, their outcome, and their valence:

|| outcome 0 | outcome 1|
|---|---|---|
| action 2| -1 | 1 |
| action 3 | -1 | 1 |
| action 4 | -1 | 1 |

# AGENT5 DNN

Implémentons l'Agent5 qui va prédire la probabilité de chaque outcome pour chaque action possible

## Créons le réseau de neurones

Le modèle a trois entrées: previous_action, previous_outcome, action. 

In [68]:
import torch
import torch.nn as nn

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.fc1 = nn.Linear(3, 6)
        # Apply He Initialization recommended for ReLU
        nn.init.kaiming_normal_(self.fc1.weight, mode='fan_in', nonlinearity='relu')
        
        self.fc2 = nn.Linear(6, 2)
        # Apply Xavier initialisation recommended for linear activation
        nn.init.xavier_uniform_(self.fc2.weight)
        nn.init.zeros_(self.fc2.bias)  # Biases are usually set to zero

    def forward(self, x):
        x = torch.nn.functional.relu(self.fc1(x))  # Apply non-linearity
        return self.fc2(x)  # Logits (CrossEntropyLoss handles softmax)


# Définisson l'agent

In [73]:
import torch.optim as optim
import pandas as pd

class Agent5:
    """Creating our agent"""
    def __init__(self, _interactions):
        """ Initialize the dictionary of interactions"""
        # Initialise le réseau de neurone
        self._model = Model()
        self._criterion = nn.CrossEntropyLoss()  # Cross-entropy for classification
        self._optimizer = optim.SGD(self._model.parameters(), lr=0.3)  # SGD optimizer
        #self.fit([[2, 0, 2], [2, 0, 3], [3, 0, 2],[3, 0, 3], [2, 1, 2], [2, 1, 3], [3, 1, 2],[3, 1, 3]], [0, 0, 0, 0, 0, 0, 0, 0])
        #self.fit([[2, 0, 2], [2, 0, 3], [3, 0, 2],[3, 0, 3], [2, 1, 2], [2, 1, 3], [3, 1, 2],[3, 1, 3]], [1, 1, 1, 1, 1, 1, 1, 1])
        
        self._interactions = {interaction.key(): interaction for interaction in _interactions}
        self._intended_interaction = self._interactions["20"]
        self._last_interaction = None
        self._previous_interaction = None
        # Store the selection dataframe as a class attribute so we can display it in the notebook
        self.selection_df = None


    def fit(self, inputs, targets):
        """La fonction d'apprentissage"""
        input_tensor = torch.tensor(inputs, dtype=torch.float)
        # input_tensor = torch.randn_like(input_tensor) * 0.01 (voir si le modèle apprend des tendances)
        target_tensor = torch.tensor(targets, dtype=torch.long)
        labels = torch.nn.functional.one_hot(target_tensor, num_classes=2).to(torch.float)
        #labels = torch.argmax(target_tensor, dim=1)  # Convert one-hot to class indices

        self._optimizer.zero_grad()  # Reset gradients
        outputs = self._model(input_tensor)  # Forward pass
        loss = self._criterion(outputs, labels)  # Compute loss
        loss.backward()  # Backpropagation
        self._optimizer.step()  # Update weights

        # Check accuracy (we expect 100% accuracy)
        predictions = torch.argmax(outputs, dim=1)
        accuracy = (predictions == target_tensor).float().mean().item()

        print(f"Loss: {loss.item():.6f}, Accuracy: {accuracy * 100:.0f}%")

    
    def predict(self, inputs):
        """La fonction de prediction"""
        input_tensor = torch.tensor(inputs, dtype=torch.float)
        outputs = self._model(input_tensor)
        print("prediction", torch.argmax(outputs, dim=1))
        return torch.softmax(outputs, dim=1) 
        
    
    def action(self, _outcome):
        """ Tracing the previous cycle """
        self._previous_interaction = self._last_interaction
        self._last_interaction = self._interactions[f"{self._intended_interaction.action}{_outcome}"]
        print(f"Action: {self._intended_interaction.action}, Prediction: {self._intended_interaction.outcome}, "
              f"Outcome: {_outcome}, Prediction_correct: {self._intended_interaction.outcome == _outcome}, "
              f"Valence: {self._last_interaction.valence})")

        """ Computing the next interaction to try to enact """
        # Entraine le réseau de neurone avec les informations du dernier cycle d'interaction
        if self._previous_interaction is not None:
            self.fit([[self._previous_interaction.action, self._previous_interaction.outcome, self._intended_interaction.action]], [outcome])

        # Prédit les résultats pour les différentes actions
        probabilities = self.predict([[self._intended_interaction.action, outcome, 2], [self._intended_interaction.action, outcome, 3]])
        
        # TODO: Implement the agent's decision mechanism
        # Le dataframe pour trouver la meilleure expected valence
        data = {'action': [2, 2, 3, 3],
                'outcome': [0, 1, 0, 1],
                'valence': [self._interactions[i].valence for i in ["20", "21", "30", "31"]],
                'probability': probabilities.flatten().tolist()}
        self.selection_df = pd.DataFrame(data)
        self.selection_df['expected_valence'] = self.selection_df['valence'] * self.selection_df['probability']
        print(self.selection_df)

        # Aggregate by action
        grouped_df = self.selection_df.groupby('action').agg({'expected_valence': 'sum'}).reset_index()
        # Sort by descending order of expected valence
        grouped_df = grouped_df.sort_values(by=['expected_valence'], ascending=[False]).reset_index(drop=True)
        print(grouped_df)
        # Select the action that has the higest expected valence
        intended_action = grouped_df.loc[0, 'action']

        # TODO: Implement the agent's prediction mechanism
        predictions = torch.argmax(probabilities, dim=1)
        intended_outcome = predictions.tolist()[intended_action - 2]
        
        # Memorize the intended interaction
        self._intended_interaction = self._interactions[f"{intended_action}{intended_outcome}"]
        return intended_action


## Test your Agent5 in Environment1

In [74]:
torch.manual_seed(42)

a = Agent5(interactions)
e = Environment1()
outcome = 0
for i in range(20):
    action = a.action(outcome)
    outcome = e.outcome(action)

Action: 2, Prediction: 0, Outcome: 0, Prediction_correct: True, Valence: -1)
prediction tensor([0, 0])
   action  outcome  valence  probability  expected_valence
0       2        0       -1     0.783214         -0.783214
1       2        1        1     0.216786          0.216786
2       3        0       -1     0.759586         -0.759586
3       3        1        1     0.240414          0.240414
   action  expected_valence
0       3         -0.519171
1       2         -0.566429
Action: 3, Prediction: 0, Outcome: 1, Prediction_correct: False, Valence: 1)
Loss: 1.425392, Accuracy: 0%
prediction tensor([1, 1])
   action  outcome  valence   probability  expected_valence
0       2        0       -1  6.636618e-07     -6.636618e-07
1       2        1        1  9.999993e-01      9.999993e-01
2       3        0       -1  2.795956e-10     -2.795956e-10
3       3        1        1  1.000000e+00      1.000000e+00
   action  expected_valence
0       3          1.000000
1       2          0.999999
Ac

In [71]:
torch.manual_seed(42)

a = Agent5(interactions)
a.fit([[3, 0, 3]], [1])

Loss: 1.187031, Accuracy: 25%
Loss: 0.000007, Accuracy: 100%


## Test your Agent5 in Environment2

In [75]:
a = Agent5(interactions)
e = Environment2()
outcome = 0
for i in range(20):
    action = a.action(outcome)
    outcome = e.outcome(action)

Action: 2, Prediction: 0, Outcome: 0, Prediction_correct: True, Valence: -1)
prediction tensor([1, 1])
   action  outcome  valence  probability  expected_valence
0       2        0       -1     0.169938         -0.169938
1       2        1        1     0.830062          0.830062
2       3        0       -1     0.215397         -0.215397
3       3        1        1     0.784603          0.784603
   action  expected_valence
0       2          0.660124
1       3          0.569206
Action: 2, Prediction: 1, Outcome: 1, Prediction_correct: True, Valence: 1)
Loss: 0.186255, Accuracy: 100%
prediction tensor([1, 1])
   action  outcome  valence  probability  expected_valence
0       2        0       -1     0.052126         -0.052126
1       2        1        1     0.947874          0.947874
2       3        0       -1     0.038383         -0.038383
3       3        1        1     0.961617          0.961617
   action  expected_valence
0       3          0.923234
1       2          0.895748
Action

## Test your Agent5 in Environment3

In [76]:
a = Agent5(interactions)
e = Environment3()
outcome = 0
for i in range(100):
    action = a.action(outcome)
    outcome = e.outcome(action)

Action: 2, Prediction: 0, Outcome: 0, Prediction_correct: True, Valence: -1)
prediction tensor([1, 1])
   action  outcome  valence  probability  expected_valence
0       2        0       -1     0.292750         -0.292750
1       2        1        1     0.707250          0.707250
2       3        0       -1     0.086159         -0.086159
3       3        1        1     0.913841          0.913841
   action  expected_valence
0       3          0.827683
1       2          0.414500
Action: 3, Prediction: 1, Outcome: 1, Prediction_correct: True, Valence: 1)
Loss: 0.090098, Accuracy: 100%
prediction tensor([1, 1])
   action  outcome  valence  probability  expected_valence
0       2        0       -1     0.227897         -0.227897
1       2        1        1     0.772103          0.772103
2       3        0       -1     0.034054         -0.034054
3       3        1        1     0.965946          0.965946
   action  expected_valence
0       3          0.931892
1       2          0.544207
Action

## Test your Agent5 in Environment4

In [77]:
a = Agent5(interactions)
e = Environment4()
outcome = 0
for i in range(20):
    action = a.action(outcome)
    outcome = e.outcome(action)

Action: 2, Prediction: 0, Outcome: 0, Prediction_correct: True, Valence: -1)
prediction tensor([1, 1])
   action  outcome  valence  probability  expected_valence
0       2        0       -1     0.027986         -0.027986
1       2        1        1     0.972014          0.972014
2       3        0       -1     0.006208         -0.006208
3       3        1        1     0.993792          0.993792
   action  expected_valence
0       3          0.987585
1       2          0.944029
Action: 3, Prediction: 1, Outcome: 1, Prediction_correct: True, Valence: 1)
Loss: 0.006227, Accuracy: 100%
prediction tensor([1, 1])
   action  outcome  valence  probability  expected_valence
0       2        0       -1     0.021104         -0.021104
1       2        1        1     0.978896          0.978896
2       3        0       -1     0.004538         -0.004538
3       3        1        1     0.995462          0.995462
   action  expected_valence
0       3          0.990924
1       2          0.957791
Action

## Test your Agent5 with interactions that have other valences

Replace the valences of interactions with your choice in the code below

In [None]:
# Choose different valence of interactions
interactions = [
    Interaction(0,0,1),
    Interaction(0,1,0),
    Interaction(1,0,-1),
    Interaction(1,1,1),
    Interaction(2,0,-1),
    Interaction(2,1,1)
]
# Run the agent
a = Agent5(interactions)
e = Environment4()
outcome = 0
for i in range(20):
    action = a.action(outcome)
    outcome = e.outcome(action)

## Test your agent in the Turtle environment 

In [None]:
# @title Install the turtle environment
!pip3 install ColabTurtle
from ColabTurtle.Turtle import *

In [3]:
# @title Initialize the turtle environment

BORDER_WIDTH = 20

class ColabTurtleEnvironment:

    def __init__(self):
        """ Creating the Turtle window """
        bgcolor("lightGray")
        penup()
        goto(window_width() / 2, window_height()/2)
        face(0)
        pendown()
        color("green")

    def outcome(self, action):
        """ Enacting an action and returning the outcome """
        _outcome = 0
        for i in range(10):
            # _outcome = 0
            if action == 0:
                # move forward
                forward(10)
            elif action == 1:
                # rotate left
                left(4)
                forward(2)
            elif action == 2:
                # rotate right
                right(4)
                forward(2)

            # Bump on screen edge and return outcome 1
            if xcor() < BORDER_WIDTH:
                goto(BORDER_WIDTH, ycor())
                _outcome = 1
            if xcor() > window_width() - BORDER_WIDTH:
                goto(window_width() - BORDER_WIDTH, ycor())
                _outcome = 1
            if ycor() < BORDER_WIDTH:
                goto(xcor(), BORDER_WIDTH)
                _outcome = 1
            if ycor() > window_height() - BORDER_WIDTH:
                goto(xcor(), window_height() -BORDER_WIDTH)
                _outcome = 1

            # Change color
            if _outcome == 0:
                color("green")
            else:
                # Finit l'interaction
                color("red")
                # if action == 0:
                #     break
                if action == 1:
                    for j in range(10):
                        left(4)
                elif action == 2:
                    for j in range(10):
                        right(4)
                break

        return _outcome

In [None]:
# @title Run the turtle environment
initializeTurtle()

# Parameterize the rendering
bgcolor("lightGray")
penup()
goto(window_width() / 2, window_height()/2)
face(0)
pendown()
color("green")
speed(10)

# Some valences to avoid bumping into walls
interactions = [
    Interaction(0,0,3),
    Interaction(0,1,-3),
    Interaction(1,0,-1),
    Interaction(1,1,-1),
    Interaction(2,0,-2),
    Interaction(2,1,-2)
]

a = Agent5(interactions)
e = ColabTurtleEnvironment()

outcome = 0
for i in range(50):
    action = a.action(outcome)
    outcome = e.outcome(action)

## Report 

Explain what you programmed and what results you observed. Export this document as PDF including your code, the traces you obtained, and your explanations below (no more than a few paragraphs):