In [24]:
import numpy as np
from scipy.stats import norm
from scipy.special import softmax
from model.helpers import square_exponential_kernel, normalize_last_column

np.random.seed(123)

n_timestep = 10
n_velocity = 20
n_action = 2
n_position = 50
min_position, max_position = 0.0, 4.0
min_velocity, max_velocity = -2.0, 4.0
min_timestep, max_timestep = 0.0, 1.0

timestep = np.linspace(min_timestep, max_timestep, n_timestep)

velocity = np.linspace(min_velocity, max_velocity, n_velocity)
action = np.arange(n_action)
position = np.linspace(min_position, max_position,n_position)

friction_factor = 0.5

mu = 0.5 + 0.5*np.cos(6*(timestep + 5))
sigma = square_exponential_kernel(timestep, 0.05,  0.1)
own_force = np.random.multivariate_normal(mu, sigma, size=300)

mu = 0.4 + 2*np.cos(3 * (timestep - 2))
sigma = square_exponential_kernel(timestep, 0.05,  0.1)
push_effect = np.random.multivariate_normal(mu, sigma, size=300)

sigma_transition_position = 0.05

# Compute preferences ------------------------------------------------------------------------------------

log_prior = np.log(softmax(np.arange(n_position)))

# Compute velocity transitions --------------------------------------------------------------------------


def build_transition_velocity_tavv():

    tr = np.zeros((n_timestep, n_action, n_velocity, n_velocity))
    n_sample = push_effect.shape[0]
    bins = list(velocity) + [velocity[-1] + (velocity[-1] - velocity[-2])]

    after_friction = velocity - friction_factor*velocity  # Shape=(n_velocity,)
    after_friction = np.tile(after_friction, (n_action, n_sample, n_timestep, 1))  # Shape=(n_action, n_sample, n_timestep, n_velocity,)

    action_effect = np.tile(push_effect, (n_action, n_velocity, 1, 1, ))  # Shape=(n_action, n_velocity, n_sample, n_timestep,)
    action_effect = np.moveaxis(action_effect, 1, -1)                   # Shape=(n_action, n_sample, n_timestep, n_velocity,)
    action_effect[0] = 0  # Taking action 0 has no effect, taking action 1 is pushing

    own_force_tiled = np.tile(own_force, (n_action, n_velocity, 1, 1, ))        # Shape=(n_action, n_velocity, n_sample, n_timestep,)
    own_force_tiled = np.moveaxis(own_force_tiled, 1, -1)                      # Shape=(n_action, n_sample, n_timestep, n_velocity,)

    new_v = after_friction + action_effect + own_force_tiled
    new_v = np.clip(new_v, bins[0], bins[-1])

    for v_idx, v in enumerate(velocity):
        for a_idx, a in enumerate(action):
            for t_idx, t in enumerate(timestep):
                tr[t_idx, a, v_idx, :], _ = np.histogram(new_v[a, :, t_idx, v_idx], bins=bins)
    return normalize_last_column(tr)


transition_velocity_tavv_REF = build_transition_velocity_tavv()

In [97]:
tr = np.zeros((n_timestep, n_action, n_velocity, n_velocity))

n_sample = push_effect.shape[0]
bins = list(velocity) + [velocity[-1] + (velocity[-1] - velocity[-2])]

after_friction = velocity - friction_factor*velocity  # Shape=(n_velocity,)
after_friction = np.expand_dims(after_friction, (0, 1, 2)) 

push__ext = np.expand_dims(push_effect, (0, -1))
action_effect = np.vstack((np.zeros_like(push__ext), push__ext))

own_force__ext = np.expand_dims(own_force, (0,  -1))

new_v = after_friction + action_effect + own_force__ext
new_v = np.clip(new_v, bins[0], bins[-1])

for v_idx, v in enumerate(velocity):
    for a_idx, a in enumerate(action):
        for t_idx, t in enumerate(timestep):
            tr[t_idx, a, v_idx, :], _ = np.histogram(new_v[a, :, t_idx, v_idx], bins=bins)
tr = normalize_last_column(tr)

np.sum(tr == transition_velocity_tavv_REF) == np.size(tr)

(1, 1, 1, 20)
(1, 300, 10, 1)
(2, 300, 10, 20)


True

In [31]:
(n_timestep, n_action, n_velocity, n_velocity)

(10, 2, 20, 20)

In [91]:
action_effect = np.expand_dims(push_effect, -1)  # Shape=(n_action, n_velocity, n_sample, n_timestep,)
action_effect = np.tile(action_effect, (n_action, 1, 1, 1))       # Shape=(n_action, n_sample, n_timestep, n_velocity)
action_effect[0] = 0  # Taking action 0 has no effect, taking action 1 is pushing
action_effect_REF = action_effect.copy()

In [96]:
ex_push = np.expand_dims(push_effect, (0, -1))
action_effect = np.vstack((np.zeros_like(ex_push), ex_push))

TypeError: vstack() takes 1 positional argument but 2 were given

In [94]:
action_effect

array([[[[ 2.35703553],
         [ 1.98193337],
         [ 1.5601852 ],
         ...,
         [-1.40661336],
         [-1.59026652],
         [-1.53794062]],

        [[ 2.37714356],
         [ 2.04181282],
         [ 1.5841272 ],
         ...,
         [-1.46540313],
         [-1.59363559],
         [-1.57530219]],

        [[ 2.26525167],
         [ 1.99484961],
         [ 1.5910496 ],
         ...,
         [-1.28683901],
         [-1.52019511],
         [-1.56496372]],

        ...,

        [[ 2.32653771],
         [ 2.06949327],
         [ 1.65530835],
         ...,
         [-1.360897  ],
         [-1.56134017],
         [-1.59369984]],

        [[ 2.34453817],
         [ 1.994362  ],
         [ 1.51033785],
         ...,
         [-1.33656291],
         [-1.44537049],
         [-1.41867188]],

        [[ 2.2783719 ],
         [ 1.95926497],
         [ 1.49790525],
         ...,
         [-1.3927156 ],
         [-1.61188172],
         [-1.5279807 ]]],


       [[[ 0.        ],


(1, 300, 10)


(2, 300, 10)

In [69]:
 np.expand_dims(push_effect, -1)

(300, 10, 1)