In [78]:
import time

import numpy as np
import pandas as pd
import scipy.special as sp
import scipy.stats as sts
import tensorflow as tf
import tensorflow_probability as tfp

In [79]:
tfd = tfp.distributions

In [80]:
# Confirm GPU in use
tf.config.list_physical_devices('GPU')

[]

In [81]:
# Parameters
P = [3, 5, 1, 4]
N = 10 ** 2
SEED = 1729
K = len(P)

In [82]:
# Ground truth
np.random.seed(SEED)

true_transmission_rate_mu = np.random.beta(2, 10, len(P))
true_transmission_rate_std = np.sqrt(sts.invgamma.rvs(a=100, size=len(P)))
true_transmission_rate = np.concatenate([
    sp.expit(sp.logit(mu) + np.random.normal(0, std, p))
    for p, mu, std
    in zip(P, true_transmission_rate_mu, true_transmission_rate_std)
])

true_occurrence_rate_mu = np.random.beta(2, 10, len(P))
true_occurrence_rate_std = np.sqrt(sts.invgamma.rvs(a=50, size=len(P)))
true_occurrence_rate = np.concatenate([
    sp.expit(sp.logit(mu) + np.random.normal(0, std, p))
    for p, mu, std
    in zip(P, true_occurrence_rate_mu, true_occurrence_rate_std)
])

base_rate = np.random.beta(2, 10, 1)

In [83]:
# Simulate data
data = {}
# TODO: Add covariances between same type of activity
for p in range(sum(P)):
    occurrence = np.random.binomial(1, true_occurrence_rate[p], N)
    transmission = occurrence * np.random.binomial(1, true_transmission_rate[p], N)
    data[f'O{p+1}'] = occurrence
    data[f'T{p+1}'] = transmission

data['T0'] = np.random.binomial(1, base_rate, N)
X = pd.DataFrame(data)
z = X.loc[:, X.columns.str.startswith('T')].sum(axis=1)
y = (z > 0).astype(int)
X = X.loc[:, X.columns.str.startswith('O')]
c = np.array([i + 1 for i, p in enumerate(P) for __ in range(p)])

In [84]:
# Convert to tensors
X = tf.convert_to_tensor(X, dtype=tf.float32)
y = tf.convert_to_tensor(y, dtype=tf.float32)
c = tf.convert_to_tensor(c, dtype=tf.int32)
# Move to GPU
X = X + tf.fill(X.shape, 0.0)
y = y + tf.fill(y.shape, 0.0)
c = c + tf.fill(c.shape, 0)

In [85]:
c

<tf.Tensor: shape=(13,), dtype=int32, numpy=array([1, 1, 1, 2, 2, 2, 2, 2, 3, 4, 4, 4, 4], dtype=int32)>

In [86]:
c-1

<tf.Tensor: shape=(13,), dtype=int32, numpy=array([0, 0, 0, 1, 1, 1, 1, 1, 2, 3, 3, 3, 3], dtype=int32)>

In [87]:
tf.gather(tf.random.uniform((4,)), c-1)

<tf.Tensor: shape=(13,), dtype=float32, numpy=
array([0.9714577 , 0.9714577 , 0.9714577 , 0.42643607, 0.42643607,
       0.42643607, 0.42643607, 0.42643607, 0.9886533 , 0.03208542,
       0.03208542, 0.03208542, 0.03208542], dtype=float32)>

In [96]:
# Define log-likelihood
@tf.function
def censored_poisbinom_loglike(theta, mu, sigma2, rho):
    target = 0
    # Pre-computation
    log1m_theta = tf.math.log(1-theta)
    logit_mu = tf.math.sigmoid(mu)
    # Support
    if tf.math.reduce_any(tf.math.logical_or(theta <= 0., theta >= 1.)):
        return -np.inf
    if tf.math.logical_or(rho <= 0., rho >= 1.):
        return -np.inf
    if tf.math.reduce_any(tf.math.logical_or(mu <= 0., mu >= 1.)):
        return -np.inf
    if tf.math.reduce_any(sigma2 <= 0.):
        return -np.inf
    # Priors
    target += tf.reduce_sum((1 - mu) ** 4)  # beta
    target += (1 - rho) ** 2  # beta
    target += tf.reduce_sum(-11 * tf.math.log(sigma2) - 1 / sigma2)  # inverse gamma
    # Likelihood (classes)
    target += tf.math.reduce_sum(tf.math.log(1 / theta + 1 / (1 - theta)) -
                                 (tf.math.sigmoid(theta) - tf.gather(logit_mu, c-1)) ** 2 / (2 * tf.gather(sigma2, c-1)))
    # Likelihood (observations)
    s = tf.einsum('ij,j->i', X, log1m_theta) + tf.math.log(1-rho)
    ll = tf.math.reduce_sum(tf.where(y == 1, tfp.math.log1mexp(s), s))
    return ll

In [97]:
# Define negative log-likelihood and use AD to compute gradients
@tf.function
def censored_poisbinom_negloglike(params):
    theta, mu, sigma2, rho = tf.split(params, [sum(P), K, K, 1], axis=0)
    # need to take these back down to vectors and scalars:
    theta = tf.reshape(theta,(sum(P),))
    mu = tf.reshape(mu,(K,))
    sigma2 = tf.reshape(sigma2,(K,))
    rho = tf.reshape(rho,())
    return -1 * censored_poisbinom_loglike(theta, mu, sigma2, rho)

@tf.function
def censored_poisbinom_negloglike_and_grad(params):
    return tfp.math.value_and_gradient(
        censored_poisbinom_negloglike, 
        params
    )

In [98]:
# Approximate MLE using gradient descent
start = tf.fill(sum(P) + 2 * K + 1, 0.5)

optim_results = tfp.optimizer.bfgs_minimize(
    censored_poisbinom_negloglike_and_grad, start, tolerance=1e-8
)

est_params = optim_results.position.numpy()
est_serr = np.sqrt(np.diagonal(optim_results.inverse_hessian_estimate.numpy()))

In [99]:
# Set model parameters
nuts_samples = 1000
nuts_burnin = 200
init_step_size=.3
init = [est_params[:sum(P)], est_params[sum(P):sum(P)+K],
        est_params[sum(P)+K:sum(P)+2*K], est_params[sum(P)+2*K]]

In [101]:
# Fit model
@tf.function
def nuts_sampler(init):
    nuts_kernel = tfp.mcmc.NoUTurnSampler(
        target_log_prob_fn=censored_poisbinom_loglike, 
        step_size=init_step_size,
    )
    adapt_nuts_kernel = tfp.mcmc.DualAveragingStepSizeAdaptation(
        inner_kernel=nuts_kernel,
        num_adaptation_steps=nuts_burnin,
        step_size_getter_fn=lambda pkr: pkr.step_size,
        log_accept_prob_getter_fn=lambda pkr: pkr.log_accept_ratio,
        step_size_setter_fn=lambda pkr, new_step_size: pkr._replace(step_size=new_step_size)
    )

    samples = tfp.mcmc.sample_chain(
        num_results=nuts_samples,
        current_state=init,
        kernel=adapt_nuts_kernel,
        num_burnin_steps=nuts_burnin,
        parallel_iterations=10,
        trace_fn=None
    )
    return samples

start = time.time()
samples = nuts_sampler(init)
print(f"{time.time() - start:.02f} seconds elapsed")

ValueError: in user code:

    <ipython-input-77-cffa94a0959c>:16 nuts_sampler  *
        samples = tfp.mcmc.sample_chain(
    /home/tim/miniconda3/envs/tf-gpu/lib/python3.8/site-packages/tensorflow_probability/python/mcmc/sample.py:332 sample_chain  **
        previous_kernel_results = kernel.bootstrap_results(current_state)
    /home/tim/miniconda3/envs/tf-gpu/lib/python3.8/site-packages/tensorflow_probability/python/mcmc/dual_averaging_step_size_adaptation.py:528 bootstrap_results
        inner_results = self.inner_kernel.bootstrap_results(init_state)
    /home/tim/miniconda3/envs/tf-gpu/lib/python3.8/site-packages/tensorflow_probability/python/mcmc/nuts.py:474 bootstrap_results
        ] = leapfrog_impl.process_args(self.target_log_prob_fn, dummy_momentum,
    /home/tim/miniconda3/envs/tf-gpu/lib/python3.8/site-packages/tensorflow_probability/python/mcmc/internal/leapfrog_integrator.py:385 process_args
        [target, target_grad_parts] = mcmc_util.maybe_call_fn_and_grads(
    /home/tim/miniconda3/envs/tf-gpu/lib/python3.8/site-packages/tensorflow_probability/python/mcmc/internal/util.py:309 maybe_call_fn_and_grads
        raise ValueError('Encountered `None` gradient.\n'

    ValueError: Encountered `None` gradient.
      fn_arg_list: [<tf.Tensor 'init:0' shape=(13,) dtype=float32>, <tf.Tensor 'init_1:0' shape=(4,) dtype=float32>, <tf.Tensor 'init_2:0' shape=(4,) dtype=float32>, <tf.Tensor 'init_3:0' shape=() dtype=float32>]
      grads: [<tf.Tensor 'mcmc_sample_chain/dual_averaging_step_size_adaptation___init__/_bootstrap_results/NoUTurnSampler/.bootstrap_results/process_args/maybe_call_fn_and_grads/value_and_gradients/value_and_gradient/gradients/mcmc_sample_chain/dual_averaging_step_size_adaptation___init__/_bootstrap_results/NoUTurnSampler/.bootstrap_results/process_args/maybe_call_fn_and_grads/value_and_gradients/value_and_gradient/PartitionedCall_grad/PartitionedCall:0' shape=(13,) dtype=float32>, None, None, <tf.Tensor 'mcmc_sample_chain/dual_averaging_step_size_adaptation___init__/_bootstrap_results/NoUTurnSampler/.bootstrap_results/process_args/maybe_call_fn_and_grads/value_and_gradients/value_and_gradient/gradients/mcmc_sample_chain/dual_averaging_step_size_adaptation___init__/_bootstrap_results/NoUTurnSampler/.bootstrap_results/process_args/maybe_call_fn_and_grads/value_and_gradients/value_and_gradient/PartitionedCall_grad/PartitionedCall:1' shape=() dtype=float32>]
