# Active Blockference Single Agent Revisited: Best Practices for Modular Blockference Development

This notebook explores an alternative way of structuring the POMDP with a more granular approach.

In [None]:
from environment import Grid

The `Agent` class holds information about the generative model, i.e. ABCDE.

The `Grid` class represents the environment, in this case a grid-world. 

The `environment` module will gradually have more environments that can be used in simulations.

In [None]:
env = Grid()
agent = Agent(env)

The `Agent` takes `env` as input to get information about the number of observations and the dynamics of the environment, also whether it's globally observable (i.e. $A$ and $B$ stay fixed) or only locally observable (i.e. $A$ and $B$ will be updated).

In [1]:
env.update(agent) # takes either single agent or a list of agents, initialized the environment

NameError: name 'env' is not defined

In [None]:
initial_state = {
    'agent': agent,
    'env': env
}

In [None]:
params = {
    initial_location: [], # run the simulation with a different initial location each time
    preferred_location: [], # run the simulation with a different preferred location each time
    barrier_location: [], # adding random barriers in the environment for the agent to avoid
    policy_depth: [] # sweep over different planning depths
}

### Policy functions

In [None]:
def p_planning(params, substep, state_history, previous_state):
    policies = construct_policies([act.n_states], [len(act.E)], policy_len = act.policy_len)
    return 'update_policies': policies


def p_actinf(params, substep, state_history, previous_state):
    # get obs_idx
    obs_idx = grid.index(previous_state['env_state'])

    # infer_states
    qs_current = u.infer_states(obs_idx, previous_state['prior_A'], previous_state['prior'])

    # calc efe
    policies = agent.policies
    G = u.calculate_G_policies(previous_state['prior_A'], previous_state['prior_B'], previous_state['prior_C'], qs_current, policies=policies)

    # calc action posterior
    Q_pi = u.softmax(-G)

    # compute the probability of each action
    P_u = u.compute_prob_actions(act.E, policies, Q_pi)

    # sample action
    chosen_action = u.sample(P_u)

    # calc next prior
    prior = previous_state['prior_B'][:,:,chosen_action].dot(qs_current) 

    return {'update_prior': prior,
            'update_action': chosen_action,
            'update_inference': qs_current}

def p_env(params, substep, state_history, previous_state):
    
    (Y, X) = previous_state['env_state']
    Y_new = Y
    X_new = X

    if chosen_action == 0: # UP
          
        Y_new = Y - 1 if Y > 0 else Y
        X_new = X

    elif chosen_action == 1: # DOWN

        Y_new = Y + 1 if Y < act.border else Y
        X_new = X

    elif chosen_action == 2: # LEFT
        Y_new = Y
        X_new = X - 1 if X > 0 else X

    elif chosen_action == 3: # RIGHT
        Y_new = Y
        X_new = X +1 if X < act.border else X

    elif chosen_action == 4: # STAY
        Y_new, X_new = Y, X 
        
    current_state = (Y_new, X_new) # store the new grid location
    return 'update_env': current_state,


## State Update Functions