In [1]:
import numpy as np
from pymdp.envs import TMazeEnv
from pymdp import utils, maths
import copy

In [2]:
%load_ext autoreload
%autoreload 2

*actions*: 0: CENTER, 1: RIGHT ARM, 2: LEFT ARM, 3: CUE LOCATION (bottom)

*observations*: 0: LOCATION, 1: REWARD, 2: CUE

*states*: 0: LOCATION, 1: CONTEXT

In [4]:
env = TMazeEnv(reward_probs=[0.98, 0.02])
A = env.get_likelihood_dist()
B = env.get_transition_dist()

In [6]:
# Beliefs over the states: p(location), p(context)
D = [np.array([1,0,0,0]),     # Knows it is in the center
     np.array([0.5, 0.5])]    # but doesn't know the context ('reward condition')
# Preferences over the observations: \tilde p(Location), \tilde p(reward), \tilde(context)
C = [np.array([0., 0., 0., 0.]), # Location: Doesn't matter where it is
     np.array([ 0., 3., -3.]),   # Reward: Prefers to see reward than no reward, or punishment
     np.array([0., 0.])]         # Cue: Doesn't matter the cue (right, left)

In [7]:
obs = env.reset() # reset the environment and get an initial observation
obs # Location, reward, cue

[0, 0, 1]

In [67]:
def softmax(dist):
    """ 
    Computes the softmax function on a set of values
    """

    output = dist - dist.max(axis=0)
    output = np.exp(output)
    output = output / np.sum(output, axis=0)
    return output

In [8]:
print(obs)

[0, 0, 1]


In [16]:
EPS = 1e-16

s_location_idx = 0
s_context_idx = 1
qs_context = D[s_context_idx] # Equals D for the first iteration
n_state = len(B)

obs = [np.array([1., 0., 0., 0.]), np.array([1., 0., 0.]), np.array([1., 0.])]

old_qs = D

for i in range(n_state):
    
    prior = np.log(old_qs[i] + EPS)
    print("prior", prior)
    
    qs = np.ones(len(prior)) / len(prior)
    print("qs", qs)
    
    negH_qs = qs.dot(np.log(qs + 1e-16))
    print(negH_qs)


prior [  0.         -36.84136149 -36.84136149 -36.84136149]
qs [0.25 0.25 0.25 0.25]
[-1.38629436]
prior [-0.69314718 -0.69314718]
qs [0.5 0.5]
[-0.69314718]
