### Imports

First, import `pymdp` and the modules we'll need.

In [62]:
!pip install seaborn




You should consider upgrading via the 'c:\users\mao\appdata\local\programs\python\python38\python.exe -m pip install --upgrade pip' command.


In [139]:
# automatically reimport modified external python files 
%load_ext autoreload 
%autoreload 2 # automatically format running cell with black 


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [140]:
import os
import sys
import pathlib
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

path = pathlib.Path(os.getcwd())
module_path = str(path.parent) + '/'
sys.path.append(module_path)

from pymdp.agent import Agent
from pymdp.core import utils
from pymdp.core.maths import softmax
from pymdp.core.utils import create_A_matrix_stub, create_B_matrix_stubs, read_A_matrix
from pymdp.distributions import Categorical, Dirichlet
import copy
from pandas import ExcelWriter


In [141]:
A_filled = True #set to True if your A matrix file is already filled out 
B_filled = True #set to True if your B matrix file is already filled out

Functions to be used in the simulation. In the future, these will be elsewhere, in a utils file, but we can keep them here for sake of understanding.

In [169]:
def get_empty_A_excel(A_filled = False):
    if not A_filled:
        excel_dir = 'tmp_dir'
        if not os.path.exists(excel_dir):
            os.mkdir(excel_dir)

        excel_path = os.path.join(excel_dir, 'my_a_matrix.xlsx')

        A_stub.to_excel(excel_path)

    return os.path.join('tmp_dir', 'my_a_matrix.xlsx')

def get_A(excel_path, model_labels_A):
    A_stub = read_A_matrix(excel_path)
    A = utils.convert_A_stub_to_ndarray(A_stub, model_labels_A)  
    return A 

def get_empty_B_excel(B_filled = False):
    if not B_filled:
        xls_dir = 'tmp_dir'
        if not os.path.exists(xls_dir):
            os.mkdir(xls_dir)

        xls_fpath = os.path.join(xls_dir, 'my_b_matrices (1).xlsx')

        with ExcelWriter(xls_fpath) as writer:
            for factor_name, B_stub_f in B_stubs.items():
                B_stub_f.to_excel(writer,'%s' % factor_name)
        
    return os.path.join('tmp_dir', 'my_b_matrices (1).xlsx')

def get_B(excel_path, model_labels_B):
    B_stubs = utils.read_B_matrices(excel_path)
    B = utils.convert_B_stubs_to_ndarray(B_stubs, model_labels_B)
    return B

def get_C(observations):
    num_obs = [len(observations[o]) for o in observations]
    C = utils.obj_array_zeros(num_obs)
    C[1][0] = .5

    C[1][1] = 0.2
    C[1][2] = 0.1
    C[1][3] = 0.05
    C[1][4] = 0.04
    C[1][5] = 0.01
   
    #C[0][0] = 0.0

    #C[0][1] = 0.0
    #C[0][2] = 0.0
    #C[0][3] = 1.0
    return C



## The world (as represented by the agent's generative model)

### Hidden states

The agent represents its environment using latent variables - hidden factors

There are 3 hidden factors, with two states each.  ''STATION", "DISTANCE", "DESTINATION

#### 1. `STATION`
The first factor is a binary variable representing whether the agent is on station 1 or 2. 

#### 2. `DISTANCE`

The second factor is a binary variable representing the distance left to the destination (0, or 5)

#### 3. `DESTINATION`

The third factor is a binary variable representing the station the agent is aiming for.


In [170]:
observations = {"location": ["station1", "station2", "station3","station4", "station5", "station6","station7","station8"],
                "shortest_path": ["distance0","distance5", "distance10", "distance15", "distance20", "distance25"]}

states = {"current_location": ["location1", "location2", "location3", "location4", "location5", "location6", "location7", "location8"],
          "destination_location": ["destination1", "destination2", "destination3", "destination4","destination5","destination6","destination7","destination8"]}

actions = {"change_location": ["got_to1", "got_to2", "got_to3", "got_to4", "got_to5","got_to6","got_to7","got_to8"],
          "change_destination": ["null"]}


In [171]:
model_labels_A = {
            "observations": observations,
            "states": states }

In [172]:
A_stub = create_A_matrix_stub(model_labels_A)

This function will create an Excel file containing your A matrix in the tmp_dir folder. After running the code, go into that file, and fill out the matrix with the desired 
probabilities. If your A matrix file is already filled out, just skip this next cell.

In [173]:
excel_path = get_empty_A_excel(A_filled)

If you get confused filling the A matrix, see the image below for how it should look. Once that's done, run the following cell to get the A matrix in the right form. 

<img src="tmp_dir/A_matrix.png">

In [174]:
A_filled = True 
A = get_A(excel_path, model_labels_A)

In [175]:
A_stub

Unnamed: 0_level_0,current_location,location1,location1,location1,location1,location1,location1,location1,location1,location2,location2,...,location7,location7,location8,location8,location8,location8,location8,location8,location8,location8
Unnamed: 0_level_1,destination_location,destination1,destination2,destination3,destination4,destination5,destination6,destination7,destination8,destination1,destination2,...,destination7,destination8,destination1,destination2,destination3,destination4,destination5,destination6,destination7,destination8
Modality,Level,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
location,station1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
location,station2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
location,station3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
location,station4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
location,station5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
location,station6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
location,station7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
location,station8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
shortest_path,distance0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
shortest_path,distance5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Now we have the A matrix!

In [176]:
A[0]

array([[[1., 1., 1., 1., 1., 1., 1., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0., 0., 0., 0.],
        [1., 1., 1., 1., 1., 1., 1., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [1., 1., 1., 1., 1., 1., 1., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0

### Control state factors

The 'control state' factors are the agent's representation of the control states (or actions) which it believes it has control over: ''STATION CONTROL", "DISTANCE_CONTROL", "DESTINATION''

#### 1. `STATION CONTROL`
This reflects the agent's ability to influence the `STATION_STATE` using policies or actions. IT has only two actions: stay where it is, or move to the other station.

#### 2. `DISTANCE_CONTROL`
This is a control factor that reflects the agent's ability to move itself between the from distance 5 to distance 0, or vice versa.

#### 3. `DESTINATION`
This is a control factor that reflects the agent's ability to change its destination. It can either keep its destination, or change it.

In [177]:
model_labels_B = {
            "observations": observations,
            "states": states,
            "actions": actions
          }

B_stubs = create_B_matrix_stubs(model_labels_B)

B_excel_path = get_empty_B_excel(B_filled)


In [178]:
B[0][3]

array([[0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0.],
       [1., 1., 0., 1., 0., 1., 1., 1.],
       [0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.]])

Go fill out the B matrices now - remember, there are two of them. Double check with the images below that you got it right, then run the next cell.

In [179]:
B_filled = True 
B = get_B(B_excel_path, model_labels_B)

In [180]:
B_stubs

{'current_location': current_location location1                                                  \
 change_location    got_to1 got_to2 got_to3 got_to4 got_to5 got_to6 got_to7   
 location1              0.0     0.0     0.0     0.0     0.0     0.0     0.0   
 location2              0.0     0.0     0.0     0.0     0.0     0.0     0.0   
 location3              0.0     0.0     0.0     0.0     0.0     0.0     0.0   
 location4              0.0     0.0     0.0     0.0     0.0     0.0     0.0   
 location5              0.0     0.0     0.0     0.0     0.0     0.0     0.0   
 location6              0.0     0.0     0.0     0.0     0.0     0.0     0.0   
 location7              0.0     0.0     0.0     0.0     0.0     0.0     0.0   
 location8              0.0     0.0     0.0     0.0     0.0     0.0     0.0   
 
 current_location         location2          ... location7         location8  \
 change_location  got_to8   got_to1 got_to2  ...   got_to7 got_to8   got_to1   
 location1            0.0   

And now we have the B matrices

### Prior preferences

Now we parameterise the C vector, or the prior beliefs about observations. Given these preferences, the agent will reduce free energy, and try to optimize to get towards this preference state.

In [181]:
control_fac_idx = [0] # this is the controllable factor
C = get_C(observations)

In [182]:
C

array([array([0., 0., 0., 0., 0., 0., 0., 0.]),
       array([0.5 , 0.2 , 0.1 , 0.05, 0.04, 0.01])], dtype=object)

### Initialise an instance of the `Agent()` class:


In [183]:
agent = Agent(A=A, B=B, C=C, control_fac_idx=control_fac_idx, inference_algo = "MMP", policy_len=4, inference_horizon=1)

## Generative Process

In [184]:
# transition/observation matrices characterising the generative process
A_gp = copy.deepcopy(A)
B_gp = copy.deepcopy(B)

Initialise the simulation

In [185]:

T = 10 # number of timesteps in the simulation

station_list = [] #list that will take the states for the coordinates in the scatter below
observation = [1,3]
state = [1,6]

for t in range(T):
    
    print(f"\nTime {t}:")
    
    print(f"State: {[(list(states)[f], states[list(states)[f]][state[f]]) for f in range(len(states))]}")
    print(f"Observations: {[(list(observations)[g], observations[list(observations)[g]][observation[g]]) for g in range(len(observations))]}")
    print(observation)
    belief_state = agent.infer_states(observation)
    agent.infer_policies()
    action = agent.sample_action()
    station_list.append(state[0])
    # update environment
  
    for f, s in enumerate(state):
        state[f] = utils.sample(B_gp[f][:, s, int(action[f])])
    for g, _ in enumerate(observation):
        observation[g] = utils.sample(A_gp[g][:, state[0], state[1]])

   # print(f"Beliefs: {[(list(states)[f], belief_state[f].values.round(3).T) for f in range(len(states))]}")
    
    print(f"Action: {[(list(actions)[a], actions[list(actions)[a]][int(action[a])]) for a in range(len(states))]}")
 
 


Time 0:
State: [('current_location', 'location2'), ('destination_location', 'destination7')]
Observations: [('location', 'station2'), ('shortest_path', 'distance15')]
[1, 3]
Action: [('change_location', 'got_to3'), ('change_destination', 'null')]

Time 1:
State: [('current_location', 'location3'), ('destination_location', 'destination7')]
Observations: [('location', 'station3'), ('shortest_path', 'distance20')]
[2, 4]
Action: [('change_location', 'got_to7'), ('change_destination', 'null')]

Time 2:
State: [('current_location', 'location3'), ('destination_location', 'destination7')]
Observations: [('location', 'station3'), ('shortest_path', 'distance20')]
[2, 4]
Action: [('change_location', 'got_to6'), ('change_destination', 'null')]

Time 3:
State: [('current_location', 'location6'), ('destination_location', 'destination7')]
Observations: [('location', 'station6'), ('shortest_path', 'distance5')]
[5, 1]
Action: [('change_location', 'got_to7'), ('change_destination', 'null')]

Time 4:
