# Architecture draft 

Formalisation du jeu et du narratif. On se place entre un joueur et un agent. On a N **etats**. Le jeu se joue au tour par tour, chaque **action** met à jour la probabilité de changer d'état, sachant l'état dans lequel on se trouve. On stocke une séquence d'actions et le jeu se termine si on prend deux actions consécutives dans le même état. L'objet mis à jour est la **matrice de transition** qui définit les passages entre les états. 

Dans une version ultérieure, on peut avoir un méta-agent qui prend tous les états de tout le monde et les synthétise comme état du monde.

In [1]:
import numpy as np

In [14]:
import numpy as np

def generate_state_matrix(n, init_type='uniform', random_seed=None):
    """
    Generates a state (or transition) matrix.

    Parameters:
    - n (int): Number of states (size of the matrix will be n x n).
    - init_type (str): Type of initialization:
        - 'zeros': All transitions start at 0.
        - 'random': Random values between 0 and 1.
        - 'uniform': Uniform probabilities (all rows sum to 1).
    - random_seed (int, optional): Seed for reproducible random matrices.

    Returns:
    - np.ndarray: The generated state matrix.
    """
    if random_seed is not None:
        np.random.seed(random_seed)

    if init_type == 'zeros':
        # All values initialized to 0
        matrix = np.zeros((n, n))
    elif init_type == 'random':
        # Random values between 0 and 1
        matrix = np.random.rand(n, n)
    elif init_type == 'uniform':
        # Uniform probabilities for each row
        matrix = np.random.rand(n, n)
        matrix = matrix / matrix.sum(axis=1, keepdims=True)  # Normalize rows to sum to 1
    else:
        raise ValueError(f"Invalid init_type '{init_type}'. Use 'zeros', 'random', or 'uniform'.")
    
    return matrix

def update_transition_matrix(transition_matrix, current_state_index, action_scores):
    """
    Updates the transition matrix by adding action scores to the current state's row.
    Optionally normalizes the row to maintain valid transition probabilities.

    Parameters:
    - transition_matrix (np.ndarray): The transition matrix (n x n).
    - current_state_index (int): The index of the current state (row).
    - action_scores (np.ndarray): The action scores for all states (n, values can be unnormalized).

    Returns:
    - np.ndarray: The updated transition matrix.
    """
    # Ensure the matrices and vectors have the correct shape
    if action_scores.shape[0] != transition_matrix.shape[1]:
        raise ValueError("Action scores must have the same length as the number of states.")
    
    # Add the action scores to the current state row
    transition_matrix[current_state_index] += action_scores

    # Optional: Normalize the row so that it sums to 1 (if the values represent probabilities)
    row_sum = transition_matrix[current_state_index].sum()
    if row_sum != 0:
        transition_matrix[current_state_index] /= row_sum  # Normalize the row

    return transition_matrix

class Architecture():
    def __init__(self,
                 n_states,
                 initial_state,
                 final_state,
                 random_seed=42
                 ):
        """
        n_states the number of states to consider
        initial_state: the index of the initial state in which 
                       the game starts
        final state: the index of the state in which the game ends if the 
                    player is in this state for two indices in a row
        """

        self.n_states=n_states
        self.initial_state=initial_state

        self.final_state=final_state # is the state in which the action ends.
        self.end=False # will be the end condition 

        # initialize a list that keep track of the states
        # of the agent
        self.states_list=[initial_state]

        # create a transition matrix between the states
        # initially set as completely random
        self.transition_matrix=generate_state_matrix(
            n_states,random_seed=random_seed
        )

    def update_transition_matrix(self,
                            action):
        """
        
        given the action which is the consequence
        of the interaction between the agent and the 
        player, updates the transition matrix
        """

        current_state=self.states_list[-1]
        updated_transition_matrix=update_transition_matrix(self.transition_matrix, 
                                                           current_state, 
                                                           action)
        
        self.transition_matrix=updated_transition_matrix


    def update_state(self):
        """
        updates the state of the game given the updated 
        transition matrix. 

        done by sampling given the updated
        probability vector.
        """

        current_state=self.states_list[-1]

        self.states_list.append(
            np.random.choice(self.n_states, p=self.transition_matrix[current_state])
        )

        print('The former state was {}. The new state is {}.'.format(
            self.states_list[-1], self.states_list[-1]
        ))

    def terminate(self):
        self.end=(self.states_list[-1]==self.final_state) & (self.states_list[-2]==self.final_state)

    def step(self,action):

        # given the action update the transition matrix
        self.update_transition_matrix(action)

        # and update the state
        self.update_state()

        # update the final condition
        self.terminate()

In [27]:
n_states=3
final_state=0
initial_state=2

test=Architecture(n_states,initial_state,final_state)
print('Initial state is {}'.format(test.initial_state))

print(test.transition_matrix)

action=np.array([2,0,0]) 

for _ in range(10):

    test.step(action)
    print(test.end)



Initial state is 2
[[0.18205878 0.46212909 0.35581214]
 [0.65738127 0.17132261 0.17129612]
 [0.03807826 0.56784481 0.39407693]]
The former state was 1. The new state is 1.
False
The former state was 0. The new state is 0.
False
The former state was 2. The new state is 2.
False
The former state was 0. The new state is 0.
False
The former state was 0. The new state is 0.
True
The former state was 0. The new state is 0.
True
The former state was 0. The new state is 0.
True
The former state was 0. The new state is 0.
True
The former state was 0. The new state is 0.
True
The former state was 0. The new state is 0.
True
