[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/PetiteIA/schema_mechanism/blob/master/notebooks/agent5-DNN.ipynb)

# UTILISATION D'UNE LSTM POUR GENERER UNE SEQUENCE DE DEUX INTERACTIONS

Ce notrebook présente notre troisième agent dotté d'un LSTM. 
Nous définissons l'embedding des interaction dans la class Interaction: une dimension pour l'action et une dimension pour l'outcome.


# La classe Interaction

On définit l'embedding de chaque interaction.

In [1]:
BASE_ACTION = 2 
class Interaction:
    """An interaction is a tuple (action, outcome) with a valence"""
    def __init__(self, action, outcome, valence):
        self.action = action
        self.outcome = outcome
        self.valence = valence

    def key(self):
        """ The key to find this interaction in the dictinary. """
        return self.action * BASE_ACTION + self.outcome 
        # return f"{self.action}{self.outcome}"

    def __str__(self):
        """ Print interaction in the form '<action><outcome:<valence>' for debug."""
        return f"{self.action}{self.outcome}:{self.valence}"

    def __eq__(self, other):
        """ Interactions are equal if they have the same key """
        return self.key() == other.key()

    def embedding(self):
        """return a list used as the embedding of this interaction"""
        return [self.action, self.outcome]

# L'environnement SmallLoop

L'agent a deux actions possibles: aller à droite ou aller à gauche.

L'environnement renvoie 1 la première fois que l'agent se cogne dans le mur qui est vert clair, et le mur devient vert foncé. 
Tant que le mur est vert foncé, l'environnement renvoie 0 jusqu'a ce que le l'agent s'éloigne et le mur redevient vert clair. 

In [55]:
save_dir = "sav"

FORWARD = 0
FEEL_FRONT = 1
FEEL_LEFT = 2
FEEL_RIGHT = 3
TURN_LEFT = 4
TURN_RIGHT = 5


In [56]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap, BoundaryNorm
from ipywidgets import Button, HBox,VBox, Output
from IPython.display import display

LEFT = 0
DOWN = 1
RIGHT = 2
UP = 3
FEELING_EMPTY = 2
FEELING_WALL = 3
BUMPING = 4

colors = ["#b0b0b0", '#b0b0b0', '#ffffff', '#535865', "#F93943"]  # Hidden environment
colors = ["#D6D6D6", '#5C946E', '#FAE2DB', '#535865', "#F93943"]
agent_color = "#1976D2"

class SmallLoop():
    def __init__(self, position, direction):
        self.grid = np.array([
            [1, 1, 1, 1, 1, 1], 
            [1, 0, 0, 0, 1, 1],
            [1, 0, 1, 0, 0, 1],
            [1, 0, 1, 1, 0, 1],
            [1, 0, 0, 0, 0, 1],
            [1, 1, 1, 1, 1, 1]
        ])
        self.maze = self.grid.copy()
        self.position = np.array(position) 
        self.direction = direction
        self.cmap = ListedColormap(colors)
        self.norm = BoundaryNorm([-0.5, 0.5, 1.5, 2.5, 3.5, 4.5], self.cmap.N)
        self.marker_size = 400
        self.marker_map = {LEFT: '<', DOWN: 'v', RIGHT: '>', UP: '^'}
        self.marker_color = agent_color
        self.directions = np.array([
            [0, -1],  # Left
            [1, 0],   # Down
            [0, 1],   # Right
            [-1, 0]   # Up
            ])

    def outcome(self, action):
        """Update the grid. Return the outcome of the action."""
        result = 0
        # x, y = self.position

        if action == FORWARD:  
            target_position = self.position + self.directions[self.direction]
            if self.grid[tuple(target_position)] == 0:
                self.position[:] = target_position
            else:
                result = 1
                self.maze[tuple(target_position)] = BUMPING
        
        elif action == TURN_RIGHT:
            self.direction = {LEFT: UP, DOWN: LEFT, RIGHT: DOWN, UP: RIGHT}[self.direction]
        
        elif action == TURN_LEFT:
            self.direction = {LEFT: DOWN, DOWN: RIGHT, RIGHT: UP, UP: LEFT}[self.direction]
        
        elif action == FEEL_FRONT:
            feeling_position = self.position + self.directions[self.direction]
            if self.grid[tuple(feeling_position)] == 0:
                self.maze[tuple(feeling_position)] = FEELING_EMPTY
            else:
                result = 1
                self.maze[tuple(feeling_position)] = FEELING_WALL
        
        elif action == FEEL_LEFT:
            feeling_position = self.position + self.directions[(self.direction + 1) % 4]
            if self.grid[tuple(feeling_position)] == 0:
                self.maze[tuple(feeling_position)] = FEELING_EMPTY
            else:
                result = 1
                self.maze[tuple(feeling_position)] = FEELING_WALL
        
        elif action == FEEL_RIGHT:
            feeling_position = self.position + self.directions[self.direction - 1]
            if self.grid[tuple(feeling_position)] == 0:
                self.maze[tuple(feeling_position)] = FEELING_EMPTY
            else:
                result = 1
                self.maze[tuple(feeling_position)] = FEELING_WALL

        print(f"Line: {self.position[0]}, Column: {self.position[1]}, direction: {self.direction}")
        return result  
    
    def display(self):
        """Display the grid in the notebook"""
        out.clear_output(wait=True)
        with out:
            fig, ax = plt.subplots()
            ax.imshow(self.maze, cmap=self.cmap, norm=self.norm)
            plt.scatter(self.position[1], self.position[0], s=self.marker_size, marker=self.marker_map[self.direction], c=self.marker_color)
            ax.text(4.5, 0, f"{step:>3}", fontsize=12, color='White')
            plt.show()
    
    def save(self, step):
        """Save the display as a PNG file"""
        fig, ax = plt.subplots()
        ax.set_xticks([])
        ax.set_yticks([])
        ax.axis('off')
        ax.imshow(self.maze, cmap=self.cmap, norm=self.norm)
        plt.scatter(self.position[1], self.position[0], s=self.marker_size, marker=self.marker_map[self.direction], c=self.marker_color)
        ax.text(4.5, 0, f"{step:>4}", fontsize=12, color='White')
        plt.savefig(f"{save_dir}/{step:04}.png", bbox_inches='tight', pad_inches=0, transparent=True)
        plt.close(fig)
    
    def clear(self, clear):
        """Clear the grid display"""
        if clear:
            self.maze[:, :] = self.grid


# AGENT LSTM

Implémentons l'Agent3 qui va prédire la probabilité des prochains tokens d'une séquence

## Créons le modèle de LSTM

Le modèle a deux entrées: previous_interaction, last_interaction. 

In [215]:
import torch
import torch.nn as nn

class LSTM(nn.Module):
    def __init__(self):
        super(LSTM, self).__init__()
        self.len_vocab = 12
        self.num_layers = 1
        self.hidden_size = 64

        embedding_dim = 2  # self.len_vocab 
        # Create an embedding layer to convert token indices to dense vectors
        # self.embedding = nn.Embedding(self.len_vocab, embedding_dim )
        
        # Define the LSTM layer
        self.lstm = nn.LSTM(input_size=embedding_dim, hidden_size=self.hidden_size, num_layers=self.num_layers, batch_first=True) # , dropout=0.5)
        
        # Define the output fully connected layer
        self.fc_out = nn.Linear(self.hidden_size, self.len_vocab)

        self._optimizer = torch.optim.Adam(self.parameters(), lr=0.001, weight_decay=0.0001)
        self._loss_func = nn.CrossEntropyLoss()

        # Initialize the weights
        # Embedding
        # nn.init.constant_(self.embedding.weight, 0.5)  # Exemple : tous les poids à 0.5
        # Initialisation manuelle des poids et biais du LSTM
        for name, param in self.lstm.named_parameters():
            if 'weight_ih' in name:
                nn.init.xavier_uniform_(param)  # ou utilisez .copy_() pour valeurs fixes
            elif 'weight_hh' in name:
                nn.init.orthogonal_(param)
            elif 'bias' in name:
                nn.init.constant_(param, 0.0)
        # Initialisation du fully connected
        #nn.init.zeros_(self.fc_out.weight)
        nn.init.constant_(self.fc_out.weight, 0.5)
        nn.init.constant_(self.fc_out.bias, 0.1)
    
    def forward(self, input_seq, hidden_in, mem_in):
        # Convert token indices to dense vectors
        # input_embs = self.embedding(input_seq)
        input_embs = input_seq.type(torch.float32)

        # Pass the embeddings through the LSTM layer
        output, (hidden_out, mem_out) = self.lstm(input_embs, (hidden_in, mem_in))
                
        # Pass the LSTM output through the fully connected layer to get the final output
        return self.fc_out(output), hidden_out, mem_out

    def fit(self, inputs, targets):

        input_tensor = torch.tensor(inputs) # , dtype=torch.int)
        # print("input tensor", input_tensor)
        labels = torch.tensor(targets)
        # print("label tensor", labels)
        
        # Loop through each epoch
        for epoch in range(20):    
            # Set model to training mode
            self.train()
            train_acc = 0
    
            # Initialize hidden and memory states
            hidden = torch.zeros(self.num_layers, input_tensor.shape[0], self.hidden_size, device="cpu")
            memory = torch.zeros(self.num_layers, input_tensor.shape[0], self.hidden_size, device="cpu")
    
            # Forward pass through the model
            pred, hidden, memory = self(input_tensor, hidden, memory)

            # Calculate the loss
            loss = self._loss_func(pred[:, -1, :], labels)
        
            # Backpropagation and optimization
            self._optimizer.zero_grad()
            loss.backward()
            self._optimizer.step()
    
            # Append training loss to logger
            # training_loss_logger.append(loss.item())
    
            # Calculate training accuracy
            train_acc += (pred[:, -1, :].argmax(1) == labels).sum()
        print(f"acc : {train_acc/len(labels):.3f} = {train_acc}/{len(labels)} for epoch {epoch}")

    def predict(self, sequence):
        # Construct the context sequence
        sequence = torch.tensor(sequence, dtype=torch.int)

        h = torch.zeros(self.num_layers, sequence.shape[0], self.hidden_size, device="cpu")
        cell = torch.zeros(self.num_layers, sequence.shape[0], self.hidden_size, device="cpu")
        
        with torch.no_grad():  # Pas de calcul de gradients en mode prédiction
            logits, _, _ = self(sequence, h, cell)
        ## probabilities = nn.functional.softmax(logits[0, -1, :], dim=0).tolist()
        # Compute the probability of each outcome for each action
        pairwise_logits = logits[0, -1, :].reshape(-1, 2)
        probabilities = nn.functional.softmax(pairwise_logits, dim=1).flatten().tolist()
        # print("probabilities", probabilities)
        return probabilities
    

# Définisson l'agent

In [442]:
import torch.optim as optim
import pandas as pd

class Agent:
    """Creating our agent"""
    def __init__(self, _interactions):
        """ Initialize the dictionary of interactions"""
        # Initialise le réseau de neurone
        self._model = LSTM()
        
        self._interactions = dict(sorted({interaction.key(): interaction for interaction in _interactions}.items()))
        self._intended_interaction = list(self._interactions.values())[0]
        self._last_interaction = None
        self._previous_interaction = None
        self._penultimate_interaction = None
        # Le dataframe pour mémoriser les séquences d'interactions
        self.sequences_df = pd.DataFrame({
            'i1': pd.Series(dtype='int'),
            'i2': pd.Series(dtype='int'),
            'i3': pd.Series(dtype='int'),
            'action': pd.Series(dtype='int'),
            'valence': pd.Series(dtype='int'),
            'count': pd.Series(dtype='int'),
            'proclivity': pd.Series(dtype='int'),
        })
        self.expected_df = None
    
    def action(self, _outcome):
        """ Tracing the previous cycle """
        self._penultimate_interaction = self._previous_interaction 
        self._previous_interaction = self._last_interaction
        self._last_interaction = self._interactions[self._intended_interaction.action * BASE_ACTION + _outcome ]
        print(f"Action: {self._intended_interaction.action}, Prediction: {self._intended_interaction.outcome}, "
              f"Outcome: {_outcome}, Prediction_correct: {self._intended_interaction.outcome == _outcome}, "
              f"Valence: {self._last_interaction.valence})")

        """ Computing the next interaction to try to enact """
        # Enregistre les séquences dans sequences_df et entraine le LSTM
        self.learn()       

        # Prédit les probabilités des prochaines interactions
        self.expected_df = self.create_expected_df(self._previous_interaction, self._last_interaction)

        # Sélectionne l'intended interaction
        self._intended_interaction = self.decide()        

        # Return the action
        return self._intended_interaction.action

    def learn(self):
        """Record sequences"""
        if self._previous_interaction is not None and self._last_interaction is not None and self._penultimate_interaction is not None:
            # Record or increment the last sequence
            condition = ((self.sequences_df['i1'] == self._penultimate_interaction.key()) & 
                        (self.sequences_df['i2'] == self._previous_interaction.key()) & 
                        (self.sequences_df['i3'] == self._last_interaction.key()))
            if self.sequences_df[condition].empty:
                new_sequence = pd.DataFrame({
                    'i1': [self._penultimate_interaction.key()], 
                    'i2': [self._previous_interaction.key()], 
                    'i3': [self._last_interaction.key()], 
                    'action': [self._last_interaction.action], 
                    'valence': [self._last_interaction.valence],
                    'count': [1], 
                    'proclivity': [0]
                })
                self.sequences_df = pd.concat([self.sequences_df, new_sequence], ignore_index=True)
            else:
                # On incrémente le compte (pas utilisé pour l'instant)
                self.sequences_df.loc[condition, 'count'] += 1
            # Entraine le réseau de neurone avec les séquences enregistrées dans sequences_df
            sequence_keys = self.sequences_df[['i1', 'i2']].values.tolist()
            x = [[self._interactions[s[0]].embedding(), self._interactions[s[1]].embedding()] for s in sequence_keys]
            y = self.sequences_df['i3'].tolist()
            self._model.fit(x, y)

    def create_expected_df(self, previous_interaction, last_interaction):
        """Create the dataframe of expected valence per interaction"""
        if previous_interaction is not None and last_interaction is not None:
            # On prédit les probabilités
            probabilities = self._model.predict([[previous_interaction.embedding(), last_interaction.embedding()]])       
            # On compte les activations
            activated_df = self.sequences_df[(self.sequences_df['i1'] == previous_interaction.key()) & (self.sequences_df['i2'] == last_interaction.key())]
            count_df = pd.merge(pd.DataFrame({'i3': [i for i in self._interactions]}), activated_df, on='i3', how='left')
            count = count_df['count'].fillna(0).astype(int)
        else: 
            probabilities = [0.5] * len(self._interactions) # Assume all interactions are equiprobable
            count = np.zeros(len(self._interactions), dtype='int')
        # Le dataframe qui donne les expected valence pour chaque interaction
        expected_df = pd.DataFrame({
            'interaction': [i for i in self._interactions],
            'action': [i.action for i in self._interactions.values()],
            'outcome': [i.outcome for i in self._interactions.values()],
            'valence': [i.valence for i in self._interactions.values()],
            'probability': probabilities, 
            'count': count})
        expected_df['expected_valence'] = expected_df['valence'] * expected_df['probability']
        return expected_df
    
    def add_next_intention(self, last_interaction, expected_df):
        """Add the next best action and expected valence"""
        if last_interaction is not None:
            expected_df['next_action'] =  np.zeros(len(self._interactions), dtype='int')
            expected_df['next_expected_valence'] = pd.Series(dtype='float')
            for row in expected_df.itertuples(index=True):
                # Crée un expected_df2 sur la base de last_action et de chaque interaction anticipée
                expected_df2 = self.create_expected_df(last_interaction, self._interactions[row.interaction])
                expected_df2 = expected_df2.groupby('action').agg({'expected_valence': 'sum'}).reset_index()
                # Ajoute la meilleure action suivante et son expected_valence dans expected_df
                idxmax = expected_df2['expected_valence'].idxmax()
                expected_df.loc[row.Index, 'next_action'] = expected_df2['action'].iloc[idxmax]
                expected_df.loc[row.Index, 'next_expected_valence'] = expected_df2['expected_valence'].iloc[idxmax]

    
    def decide(self):
        """Decide the intended interaction based on the dataframe of expected valences"""
        # On aggrege par action en sommant l'expected valence
        action_expectation_df = self.expected_df.groupby('action').agg({'expected_valence': 'sum', 'count': 'sum'}).reset_index()
        # On trie les actions par expected valence décroissante
        action_expectation_df['proclivity'] = action_expectation_df['expected_valence'] * action_expectation_df['count']
        action_expectation_df = action_expectation_df.sort_values(by=['proclivity'], ascending=[False]).reset_index(drop=True)
        print(action_expectation_df)

        # Si la plus grande expected valence est négative
        if action_expectation_df.loc[0, 'expected_valence'] < 0:  # Note used
            self.add_next_intention(self._last_interaction, self.expected_df)
            # On cherche une interaction qui mène à une next_expected valence elevée
            next_expected_df = self.expected_df[(self.expected_df['probability'] > 0.8) & (self.expected_df['next_expected_valence'] > 0.8)]
            if not next_expected_df.empty:
                intended_interaction = next_expected_df['interaction'].iloc[0]
                print(f"Intend interaction {intended_interaction} because anticipation")
                return self._interactions[intended_interaction]
        
        # On sélectionne l'action qui a l'expected valence la plus élevée 
        intended_action = action_expectation_df.loc[0, 'action']
        # Trouve l'outcome le plus probable pour l'action sélectionnée
        outcome_df = self.expected_df[self.expected_df['action'] == intended_action]
        intended_outcome = outcome_df.loc[outcome_df['probability'].idxmax(), 'outcome']
        # On construit l'intended interaction 
        return self._interactions[intended_action * BASE_ACTION + intended_outcome]


# Testons l'agent dans le Small Loop

In [443]:
torch.manual_seed(0)

# Instanciate the small loop environment
e = SmallLoop([1, 1], 0)

# Instanciate the agent 
interactions = [
    Interaction(FORWARD,0,5),
    Interaction(FORWARD,1,-10),
    Interaction(TURN_LEFT,0,-3),
    Interaction(TURN_LEFT,1,-3),
    Interaction(TURN_RIGHT,0,-3),
    Interaction(TURN_RIGHT,1,-3),
    Interaction(FEEL_FRONT,0,-1),
    Interaction(FEEL_FRONT,1,-1),
    Interaction(FEEL_LEFT,0,-1),
    Interaction(FEEL_LEFT,1,-1),
    Interaction(FEEL_RIGHT,0,-1),
    Interaction(FEEL_RIGHT,1,-1)
]
a = Agent(interactions)

# Initialize the interaction loop
step = 0
outcome = 0

# Display
out = Output()
e.display()
display(out)

Output()

In [622]:
print(f"Step: {step}")
step += 1
action = a.action(outcome)
e.display()
e.save(step)  # Save the image file 
e.clear(True)  
outcome = e.outcome(action)
#a.sequences_df
a.expected_df

Step: 178
Action: 1, Prediction: 0, Outcome: 1, Prediction_correct: False, Valence: -1)
acc : 0.397 = 54/136 for epoch 49
   action  expected_valence  count  proclivity
0       1         -1.000000      0   -0.000000
1       2         -1.000000      0   -0.000000
2       4         -3.000000      0   -0.000000
3       3         -1.000000      0   -0.000000
4       5         -3.000000      0   -0.000000
5       0         -9.999735      1   -9.999735
Line: 3, Column: 4, direction: 2


Unnamed: 0,interaction,action,outcome,valence,probability,count,expected_valence,next_action,next_expected_valence
0,0,0,0,5,1.8e-05,0,8.8e-05,1,-1.0
1,1,0,1,-10,0.999982,1,-9.999824,1,-1.0
2,2,1,0,-1,0.042569,0,-0.042569,1,-1.0
3,3,1,1,-1,0.957431,0,-0.957431,3,-1.0
4,4,2,0,-1,0.086089,0,-0.086089,1,-1.0
5,5,2,1,-1,0.913911,0,-0.913911,3,-1.0
6,6,3,0,-1,0.987054,0,-0.987054,3,-1.0
7,7,3,1,-1,0.012946,0,-0.012946,2,-1.0
8,8,4,0,-3,0.999105,0,-2.997314,1,-1.0
9,9,4,1,-3,0.000895,0,-0.002686,3,-1.0
