In [3]:
import tensorflow as tf
import tensorflow_probability as tfp
import numpy as np

tfd = tfp.distributions
tfb = tfp.bijectors

# Dummy data placeholders
T, K, P = 100, 3, 2  # Example values for number of observations, classes, and input variables
u = tf.random.normal([T, P])            # Input variables
y = tf.random.uniform([T], minval=-np.pi, maxval=np.pi)  # Observed outputs (angles in radians)

# Model definition
def model():
    # Priors
    a = yield tfd.Sample(tfd.Normal(0, 5), sample_shape=K, name="a")
    b = yield tfd.Sample(tfd.Normal(0, 5), sample_shape=[K, P], name="b")
    w = yield tfd.Sample(tfd.Normal(0, 5), sample_shape=[K, P], name="w")  # Changed here
    kappa = yield tfd.Sample(tfd.Gamma(2.0, 0.1), sample_shape=K, name="kappa")
    
    # Compute eta and class probabilities
    eta = tf.linalg.matmul(u, tf.transpose(b)) + a  # Shape: [T, K]
    class_probs = tf.nn.softmax(eta)                # Shape: [T, K]
    
    # Mean direction for each component
    mu = tf.linalg.matmul(u, tf.transpose(w))       # Shape: [T, K]  # Changed here
    
    # Expand kappa dimensions if necessary
    kappa_expanded = kappa[tf.newaxis, :]           # Shape: [1, K]
    
    # Likelihood for mixture of von Mises distributions
    y_obs = yield tfd.MixtureSameFamily(
        mixture_distribution=tfd.Categorical(probs=class_probs),
        components_distribution=tfd.VonMises(loc=mu, concentration=kappa_expanded),
        name="y_obs"
    )

# Create the model's joint distribution
joint_distribution = tfd.JointDistributionCoroutineAutoBatched(model)

# Define the target log probability function for MCMC
def target_log_prob_fn(a, b, w, kappa):
    return joint_distribution.log_prob((a, b, w, kappa, y))

# Initialize the HMC transition kernel (or NUTS for automatic tuning)
num_burnin_steps = 500
num_results = 1000
kernel = tfp.mcmc.NoUTurnSampler(
    target_log_prob_fn=target_log_prob_fn,
    step_size=0.1
)

# Wrap kernel with a SimpleStepSizeAdaptation for tuning
kernel = tfp.mcmc.SimpleStepSizeAdaptation(
    kernel, num_adaptation_steps=int(0.8 * num_burnin_steps)
)

# Run MCMC to sample from the posterior
@tf.function
def run_mcmc():
    return tfp.mcmc.sample_chain(
        num_results=num_results,
        current_state=[tf.zeros(K), tf.zeros([K, P]), tf.zeros(P), tf.ones(K)],  # Initial states for [a, b, w, kappa]
        kernel=kernel,
        num_burnin_steps=num_burnin_steps,
        trace_fn=lambda current_state, kernel_results: kernel_results
    )

# Perform MCMC sampling
samples, kernel_results = run_mcmc()

# Posterior samples for each parameter
a_samples, b_samples, w_samples, kappa_samples = samples


ValueError: in user code:

    File "/tmp/ipykernel_429514/3824077815.py", line 66, in run_mcmc  *
        trace_fn=lambda current_state, kernel_results: kernel_results
    File "/home/nuttidalab/miniconda3/envs/ssm/lib/python3.10/site-packages/tensorflow_probability/python/mcmc/sample.py", line 330, in sample_chain  **
        previous_kernel_results = kernel.bootstrap_results(current_state)
    File "/home/nuttidalab/miniconda3/envs/ssm/lib/python3.10/site-packages/tensorflow_probability/python/mcmc/simple_step_size_adaptation.py", line 443, in bootstrap_results
        inner_results = self.inner_kernel.bootstrap_results(init_state)
    File "/home/nuttidalab/miniconda3/envs/ssm/lib/python3.10/site-packages/tensorflow_probability/python/mcmc/nuts.py", line 468, in bootstrap_results
        ] = leapfrog_impl.process_args(self.target_log_prob_fn, dummy_momentum,
    File "/home/nuttidalab/miniconda3/envs/ssm/lib/python3.10/site-packages/tensorflow_probability/python/mcmc/internal/leapfrog_integrator.py", line 378, in process_args
        [target, target_grad_parts] = mcmc_util.maybe_call_fn_and_grads(
    File "/home/nuttidalab/miniconda3/envs/ssm/lib/python3.10/site-packages/tensorflow_probability/python/mcmc/internal/util.py", line 297, in maybe_call_fn_and_grads
        result, grads = _value_and_gradients(fn, fn_arg_list, result, grads)
    File "/home/nuttidalab/miniconda3/envs/ssm/lib/python3.10/site-packages/tensorflow_probability/python/mcmc/internal/util.py", line 265, in _value_and_gradients
        return tfp_math_value_and_gradients(fn, fn_arg_list)
    File "/home/nuttidalab/miniconda3/envs/ssm/lib/python3.10/site-packages/tensorflow_probability/python/math/gradient.py", line 108, in value_and_gradient
        return _value_and_grad_impl(
    File "/home/nuttidalab/miniconda3/envs/ssm/lib/python3.10/site-packages/tensorflow_probability/python/math/gradient.py", line 378, in _value_and_grad_impl
        y, dydx, aux = grad_fn(lambda: f(*args, **kwargs) if _has_args(f) else f(),
    File "/home/nuttidalab/miniconda3/envs/ssm/lib/python3.10/site-packages/tensorflow_probability/python/math/gradient.py", line 330, in _gradient_old
        y, aux = f()
    File "/home/nuttidalab/miniconda3/envs/ssm/lib/python3.10/site-packages/tensorflow_probability/python/math/gradient.py", line 378, in <lambda>
        y, dydx, aux = grad_fn(lambda: f(*args, **kwargs) if _has_args(f) else f(),
    File "/home/nuttidalab/miniconda3/envs/ssm/lib/python3.10/site-packages/tensorflow_probability/python/math/gradient.py", line 375, in <lambda>
        f = lambda *args, **kwargs: (real_f(*args, **kwargs)  # pylint: disable=g-long-lambda
    File "/tmp/ipykernel_429514/3824077815.py", line 43, in target_log_prob_fn
        return joint_distribution.log_prob((a, b, w, kappa, y))
    File "/home/nuttidalab/miniconda3/envs/ssm/lib/python3.10/site-packages/tensorflow_probability/python/distributions/joint_distribution.py", line 899, in log_prob
        return self._call_log_prob(self._resolve_value(*args, **kwargs), name=name)
    File "/home/nuttidalab/miniconda3/envs/ssm/lib/python3.10/site-packages/tensorflow_probability/python/distributions/distribution.py", line 1269, in _call_log_prob
        return self._log_prob(value, **kwargs)
    File "/home/nuttidalab/miniconda3/envs/ssm/lib/python3.10/site-packages/tensorflow_probability/python/distributions/joint_distribution.py", line 677, in _log_prob
        self._map_measure_over_dists('log_prob', value))
    File "/home/nuttidalab/miniconda3/envs/ssm/lib/python3.10/site-packages/tensorflow_probability/python/distributions/joint_distribution.py", line 742, in _map_measure_over_dists
        return self._call_execute_model(
    File "/home/nuttidalab/miniconda3/envs/ssm/lib/python3.10/site-packages/tensorflow_probability/python/distributions/joint_distribution.py", line 850, in _call_execute_model
        return self._execute_model(
    File "/home/nuttidalab/miniconda3/envs/ssm/lib/python3.10/site-packages/tensorflow_probability/python/distributions/joint_distribution.py", line 1047, in _execute_model
        d = gen.send(next_value)
    File "/tmp/ipykernel_429514/3824077815.py", line 26, in model
        mu = tf.linalg.matmul(u, tf.transpose(w))       # Shape: [T, K]  # Changed here

    ValueError: Shape must be rank 2 but is rank 1 for '{{node mcmc_sample_chain/simple_step_size_adaptation___init__/_bootstrap_results/NoUTurnSampler/.bootstrap_results/process_args/maybe_call_fn_and_grads/value_and_gradients/value_and_gradient/JointDistributionCoroutineAutoBatched_CONSTRUCTED_AT_top_level/log_prob/MatMul_1}} = MatMul[T=DT_FLOAT, grad_a=false, grad_b=false, transpose_a=false, transpose_b=false](mcmc_sample_chain/simple_step_size_adaptation___init__/_bootstrap_results/NoUTurnSampler/.bootstrap_results/process_args/maybe_call_fn_and_grads/value_and_gradients/value_and_gradient/MatMul/a, mcmc_sample_chain/simple_step_size_adaptation___init__/_bootstrap_results/NoUTurnSampler/.bootstrap_results/process_args/maybe_call_fn_and_grads/value_and_gradients/value_and_gradient/JointDistributionCoroutineAutoBatched_CONSTRUCTED_AT_top_level/log_prob/transpose_1)' with input shapes: [100,2], [2].


In [4]:
import tensorflow as tf
import tensorflow_probability as tfp

# Dummy data placeholders
T, K, P = 100, 3, 2  # example values for number of observations, classes, and input variables
u = tf.random.normal([T, P])
y = tf.random.uniform([T], minval=-tf.constant(np.pi), maxval=tf.constant(np.pi))

tfd = tfp.distributions

class VonMisesMixtureModel(tf.Module):
    def __init__(self, T, K, P):
        self.T = T
        self.K = K
        self.P = P

        # Parameters
        self.a = tf.Variable(tf.zeros([K]), name="a")
        self.b = tf.Variable(tf.zeros([K, P]), name="b")
        self.w = tf.Variable(tf.zeros([P]), name="w")
        self.kappa = tf.Variable(tf.ones([K]), name="kappa")

    def __call__(self, u):
        eta = tf.linalg.matmul(u, self.b, transpose_b=True) + self.a  # Linear predictors
        class_probs = tf.nn.softmax(eta)  # Softmax for class probabilities
        
        mu = tf.linalg.matmul(u, tf.expand_dims(self.w, -1))[:, 0]  # Mean direction
        components = [tfd.VonMises(mu=mu, concentration=k) for k in self.kappa]
        
        mixture_dist = tfd.MixtureSameFamily(
            mixture_distribution=tfd.Categorical(probs=class_probs),
            components_distribution=tfd.VonMises(
                loc=mu, concentration=self.kappa  # von Mises components
            )
        )
        return mixture_dist.log_prob(y)

# Instantiate model and call on data
model = VonMisesMixtureModel(T, K, P)
log_likelihood = model(u)  # Log-likelihood for given data

# Define optimizer and minimize negative log-likelihood
optimizer = tf.optimizers.Adam()
for step in range(1000):  # Training loop
    with tf.GradientTape() as tape:
        loss = -tf.reduce_mean(log_likelihood)  # Negative log-likelihood
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))


TypeError: missing a required argument: 'loc'