[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/PetiteIA/schema_mechanism/blob/master/notebooks/agent5-DNN.ipynb)

# UTILISATION D'UNE LSTM POUR GENERER UNE SEQUENCE DE DEUX INTERACTIONS

Ce notrebook présente notre troisième agent dotté d'un LSTM. 
Nous définissons l'embedding des interaction dans la class Interaction: une dimension pour l'action et une dimension pour l'outcome.


# La classe Interaction

On définit l'embedding de chaque interaction.

In [53]:
BASE_ACTION = 2 
class Interaction:
    """An interaction is a tuple (action, outcome) with a valence"""
    def __init__(self, action, outcome, valence):
        self.action = action
        self.outcome = outcome
        self.valence = valence

    def key(self):
        """ The key to find this interaction in the dictinary. """
        return self.action * BASE_ACTION + self.outcome 
        # return f"{self.action}{self.outcome}"

    def __str__(self):
        """ Print interaction in the form '<action><outcome:<valence>' for debug."""
        return f"{self.action}{self.outcome}:{self.valence}"

    def __eq__(self, other):
        """ Interactions are equal if they have the same key """
        return self.key() == other.key()

    def embedding(self):
        """return a list used as the embedding of this interaction"""
        return [self.action, self.outcome]

On définit les actions et les outcome chacun dans l'intervalle [0, 1] car c'est sans doute préférable quand on les utilise comme embedding

In [338]:
ACTION0 = 0
ACTION1 = 1
OUTCOME0 = 0
OUTCOME1 = 1

# L'environnement

L'agent a deux actions possibles: aller à droite ou aller à gauche.

L'environnement renvoie 1 la première fois que l'agent se cogne dans le mur qui est vert clair, et le mur devient vert foncé. 
Tant que le mur est vert foncé, l'environnement renvoie 0 jusqu'a ce que le l'agent s'éloigne et le mur redevient vert clair. 

In [433]:
import numpy as np
import matplotlib.pyplot as plt
from ipywidgets import Output
from IPython.display import display

GREEN = 1
DARK = 2

class Environment6:
    """ The grid """
    def __init__(self):
        """ Initialize the grid """
        self.grid = np.array([[1, 0, 0, 1]])
        self.position = 1

    def outcome(self, _action):
        """Take the action and generate the next outcome """
        if _action == ACTION0:
            # Move left
            if self.position > 1:
                # No bump
                self.position -= 1
                self.grid[0, 3] = GREEN
                _outcome = OUTCOME0
            elif self.grid[0, 0] == GREEN:
                # First bump
                _outcome = OUTCOME1
                self.grid[0, 0] = DARK
            else:
                # Subsequent bumps
                _outcome = OUTCOME0
        else:
            # Move right
            if self.position < 2:
                # No bump
                self.position += 1
                self.grid[0, 0] = GREEN
                _outcome = OUTCOME0
            elif self.grid[0, 3] == GREEN:
                # First bump
                _outcome = OUTCOME1
                self.grid[0, 3] = DARK
            else:
                # Subsequent bumps
                _outcome = OUTCOME0
        return _outcome
        
    def display(self):
        """Display the grid"""
        out.clear_output(wait=True)
        with out:
            fig, ax = plt.subplots()
            # Hide the ticks
            ax.set_xticks([])
            ax.set_yticks([])
            # Display the grid
            ax.imshow(self.grid, cmap='Greens', vmin=0, vmax=2)
            plt.scatter(self.position, 0, s=1000)
            plt.show()

# AGENT LSTM

Implémentons l'Agent3 qui va prédire la probabilité des prochains tokens d'une séquence

## Créons le modèle de LSTM

Le modèle a deux entrées: previous_interaction, last_interaction. 

In [697]:
import torch
import torch.nn as nn

class LSTM(nn.Module):
    def __init__(self):
        super(LSTM, self).__init__()
        self.len_vocab = 4
        self.num_layers = 1
        self.hidden_size = 64

        embedding_dim = 2  # self.len_vocab 
        # Create an embedding layer to convert token indices to dense vectors
        # self.embedding = nn.Embedding(self.len_vocab, embedding_dim )
        
        # Define the LSTM layer
        self.lstm = nn.LSTM(input_size=embedding_dim, hidden_size=self.hidden_size, num_layers=self.num_layers, batch_first=True) # , dropout=0.5)
        
        # Define the output fully connected layer
        self.fc_out = nn.Linear(self.hidden_size, self.len_vocab)

        self._optimizer = torch.optim.Adam(self.parameters(), lr=0.001, weight_decay=0.0001)
        self._loss_func = nn.CrossEntropyLoss()

        # Initialize the weights
        # Embedding
        # nn.init.constant_(self.embedding.weight, 0.5)  # Exemple : tous les poids à 0.5
        # Initialisation manuelle des poids et biais du LSTM
        for name, param in self.lstm.named_parameters():
            if 'weight_ih' in name:
                nn.init.xavier_uniform_(param)  # ou utilisez .copy_() pour valeurs fixes
            elif 'weight_hh' in name:
                nn.init.orthogonal_(param)
            elif 'bias' in name:
                nn.init.constant_(param, 0.0)
        # Initialisation du fully connected
        #nn.init.zeros_(self.fc_out.weight)
        nn.init.constant_(self.fc_out.weight, 0.5)
        nn.init.constant_(self.fc_out.bias, 0.1)
    
    def forward(self, input_seq, hidden_in, mem_in):
        # Convert token indices to dense vectors
        # input_embs = self.embedding(input_seq)
        input_embs = input_seq.type(torch.float32)

        # Pass the embeddings through the LSTM layer
        output, (hidden_out, mem_out) = self.lstm(input_embs, (hidden_in, mem_in))
                
        # Pass the LSTM output through the fully connected layer to get the final output
        return self.fc_out(output), hidden_out, mem_out

    def fit(self, inputs, targets):

        input_tensor = torch.tensor(inputs) # , dtype=torch.int)
        # print("input tensor", input_tensor)
        labels = torch.tensor(targets)
        # print("label tensor", labels)
        
        # Loop through each epoch
        for epoch in range(20):    
            # Set model to training mode
            self.train()
            train_acc = 0
    
            # Initialize hidden and memory states
            hidden = torch.zeros(self.num_layers, input_tensor.shape[0], self.hidden_size, device="cpu")
            memory = torch.zeros(self.num_layers, input_tensor.shape[0], self.hidden_size, device="cpu")
    
            # Forward pass through the model
            pred, hidden, memory = self(input_tensor, hidden, memory)

            # Calculate the loss
            loss = self._loss_func(pred[:, -1, :], labels)
        
            # Backpropagation and optimization
            self._optimizer.zero_grad()
            loss.backward()
            self._optimizer.step()
    
            # Append training loss to logger
            # training_loss_logger.append(loss.item())
    
            # Calculate training accuracy
            train_acc += (pred[:, -1, :].argmax(1) == labels).sum()
        print(f"acc : {train_acc/len(labels):.3f} = {train_acc}/{len(labels)} for epoch {epoch}")

    def predict(self, sequence):
        # Construct the context sequence
        sequence = torch.tensor(sequence, dtype=torch.int)

        h = torch.zeros(self.num_layers, sequence.shape[0], self.hidden_size, device="cpu")
        cell = torch.zeros(self.num_layers, sequence.shape[0], self.hidden_size, device="cpu")
        
        with torch.no_grad():  # Pas de calcul de gradients en mode prédiction
            logits, _, _ = self(sequence, h, cell)
        ## probabilities = nn.functional.softmax(logits[0, -1, :], dim=0).tolist()
        # Compute the probability of each outcome for each action
        pairwise_logits = logits[0, -1, :].reshape(-1, 2)
        probabilities = nn.functional.softmax(pairwise_logits, dim=1).flatten().tolist()
        # print("probabilities", probabilities)
        return probabilities
    

# Définisson l'agent

In [698]:
import torch.optim as optim
import pandas as pd

class Agent:
    """Creating our agent"""
    def __init__(self, _interactions):
        """ Initialize the dictionary of interactions"""
        # Initialise le réseau de neurone
        self._model = LSTM()
        
        self._interactions = {interaction.key(): interaction for interaction in _interactions}
        self._intended_interaction = list(self._interactions.values())[0]
        self._last_interaction = None
        self._previous_interaction = None
        self._penultimate_interaction = None
        # Le dataframe pour mémoriser les séquences d'interactions
        self.sequences_df = pd.DataFrame({
            'i1': pd.Series(dtype='int'),
            'i2': pd.Series(dtype='int'),
            'i3': pd.Series(dtype='int'),
            'action': pd.Series(dtype='int'),
            'valence': pd.Series(dtype='int'),
            'count': pd.Series(dtype='int'),
            'proclivity': pd.Series(dtype='int'),
        })
    
    def action(self, _outcome):
        """ Tracing the previous cycle """
        self._penultimate_interaction = self._previous_interaction 
        self._previous_interaction = self._last_interaction
        self._last_interaction = self._interactions[self._intended_interaction.action * BASE_ACTION + _outcome ]
        print(f"Action: {self._intended_interaction.action}, Prediction: {self._intended_interaction.outcome}, "
              f"Outcome: {_outcome}, Prediction_correct: {self._intended_interaction.outcome == _outcome}, "
              f"Valence: {self._last_interaction.valence})")

        """ Computing the next interaction to try to enact """
        # Enregistre les séquences dans sequences_df et entraine le LSTM
        self.learn()       

        # Prédit les probabilités des prochaines interactions
        expected_df = self.create_expected_df(self._previous_interaction, self._last_interaction)

        self.add_next_intention(self._last_interaction, expected_df)
        print(expected_df)

        # Sélectionne l'intended interaction
        self._intended_interaction = self.decide(expected_df)        

        # Return the action
        return self._intended_interaction.action

    def learn(self):
        """Record sequences"""
        if self._previous_interaction is not None and self._last_interaction is not None and self._penultimate_interaction is not None:
            # Record or increment the last sequence
            condition = ((self.sequences_df['i1'] == self._penultimate_interaction.key()) & 
                        (self.sequences_df['i2'] == self._previous_interaction.key()) & 
                        (self.sequences_df['i3'] == self._last_interaction.key()))
            if self.sequences_df[condition].empty:
                new_sequence = pd.DataFrame({
                    'i1': [self._penultimate_interaction.key()], 
                    'i2': [self._previous_interaction.key()], 
                    'i3': [self._last_interaction.key()], 
                    'action': [self._last_interaction.action], 
                    'valence': [self._last_interaction.valence],
                    'count': [1], 
                    'proclivity': [0]
                })
                self.sequences_df = pd.concat([self.sequences_df, new_sequence], ignore_index=True)
            else:
                # On incrémente le compte (pas utilisé pour l'instant)
                self.sequences_df.loc[condition, 'count'] += 1
            # Entraine le réseau de neurone avec les séquences enregistrées dans sequences_df
            sequence_keys = self.sequences_df[['i1', 'i2']].values.tolist()
            x = [[self._interactions[s[0]].embedding(), self._interactions[s[1]].embedding()] for s in sequence_keys]
            y = self.sequences_df['i3'].tolist()
            self._model.fit(x, y)

    def create_expected_df(self, previous_interaction, last_interaction):
        """Create the dataframe of expected valence per interaction"""
        if previous_interaction is not None and last_interaction is not None:
            probabilities = self._model.predict([[previous_interaction.embedding(), last_interaction.embedding()]])       
        else: 
            probabilities = [0.5] * len(self._interactions) # Assume all interactions are equiprobable
        # Le dataframe qui donne les expected valence pour chaque interaction
        expected_df = pd.DataFrame({
            'interaction': [i.key() for i in self._interactions.values()],
            'action': [i.action for i in self._interactions.values()],
            'outcome': [i.outcome for i in self._interactions.values()],
            'valence': [i.valence for i in self._interactions.values()],
            'probability': probabilities})
        expected_df['expected_valence'] = expected_df['valence'] * expected_df['probability']
        return expected_df

    def add_next_intention(self, last_interaction, expected_df):
        """Add the next best action and expected valence"""
        if last_interaction is not None:
            expected_df['next_action'] =  np.zeros(len(self._interactions), dtype='int')
            expected_df['next_expected_valence'] = pd.Series(dtype='float')
            for row in expected_df.itertuples(index=True):
                # Crée un expected_df2 sur la base de last_action et de chaque interaction anticipée
                expected_df2 = self.create_expected_df(last_interaction, self._interactions[row.interaction])
                expected_df2 = expected_df2.groupby('action').agg({'expected_valence': 'sum'}).reset_index()
                # Ajoute la meilleure action suivante et son expected_valence dans expected_df
                idxmax = expected_df2['expected_valence'].idxmax()
                expected_df.loc[row.Index, 'next_action'] = expected_df2['action'].iloc[idxmax]
                expected_df.loc[row.Index, 'next_expected_valence'] = expected_df2['expected_valence'].iloc[idxmax]
    
    def decide(self, expected_df):
        """Decide the intended interaction based on the dataframe of expected valences"""
        # On aggrege par action en sommant l'expected valence
        action_expectation_df = expected_df.groupby('action').agg({'expected_valence': 'sum'}).reset_index()
        # On trie les actions par expected valence décroissante
        action_expectation_df = action_expectation_df.sort_values(by=['expected_valence'], ascending=[False]).reset_index(drop=True)
        print(action_expectation_df)

        # Si la plus grande expected valence est négative
        if action_expectation_df.loc[0, 'expected_valence'] < 0:
            # On cherche une interaction qui mène à une next_expected valence elevée
            next_expected_df = expected_df[(expected_df['probability'] > 0.8) & (expected_df['next_expected_valence'] > 0.8)]
            if not next_expected_df.empty:
                intended_interaction = next_expected_df['interaction'].iloc[0]
                print(f"Intend interaction {intended_interaction} because anticipation")
                return self._interactions[intended_interaction]
        
        # On sélectionne l'action qui a l'expected valence la plus élevée 
        intended_action = action_expectation_df.loc[0, 'action']
        # Trouve l'outcome le plus probable pour l'action sélectionnée
        outcome_df = expected_df[expected_df['action'] == intended_action]
        intended_outcome = outcome_df.loc[outcome_df['probability'].idxmax(), 'outcome']
        # On construit l'intended interaction 
        return self._interactions[intended_action * BASE_ACTION + intended_outcome]


# Testons l'agent dans Environment6

In [700]:
torch.manual_seed(0)

interactions = [
    Interaction(ACTION0,OUTCOME0,-1),
    Interaction(ACTION0,OUTCOME1,1),
    Interaction(ACTION1,OUTCOME0,-1),
    Interaction(ACTION1,OUTCOME1,1),
]

a = Agent(interactions)
e = Environment6()

# Output widget for displaying the plot
out = Output()

# Run the interaction loop
step = 0
outcome = 0

In [795]:
print(f"Step {step}")
step += 1
e.display()
display(out)
action = a.action(outcome)
outcome = e.outcome(action)
# a.sequences_df

Step 94


Output(outputs=({'output_type': 'display_data', 'data': {'text/plain': '<Figure size 640x480 with 1 Axes>', 'i…

Action: 1, Prediction: 1, Outcome: 1, Prediction_correct: True, Valence: 1)
acc : 0.556 = 10/18 for epoch 19
   interaction  action  outcome  valence  probability  expected_valence  \
0            0       0        0       -1     0.999924         -0.999924   
1            1       0        1        1     0.000076          0.000076   
2            2       1        0       -1     0.999938         -0.999938   
3            3       1        1        1     0.000062          0.000062   

   next_action  next_expected_valence  
0            0               0.998556  
1            0               0.960102  
2            0              -0.970976  
3            0              -0.998560  
   action  expected_valence
0       0         -0.999848
1       1         -0.999875


A partir du Step 32, le mécanisme de décision se base sur la next_expecte_valence.