In [1]:
import time

import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_probability as tfp

In [2]:
tfd = tfp.distributions

In [3]:
# Confirm GPU in use
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [4]:
# Parameters
P = 10
N = 10 ** 6
SEED = 1729

In [5]:
# Ground truth
np.random.seed(SEED)
true_transmission_rate = np.random.beta(2, 10, P)
true_occurrence_rate = np.random.beta(2, 10, P)
base_rate = np.random.beta(2, 10, 1)

In [6]:
# Simulate data
data = {}
for p in range(P):
    occurrence = np.random.binomial(1, true_occurrence_rate[p], N)
    transmission = occurrence * np.random.binomial(1, true_transmission_rate[p], N)
    data[f'O{p+1}'] = occurrence
    data[f'T{p+1}'] = transmission
data['T0'] = np.random.binomial(1, base_rate, N)
X = pd.DataFrame(data)

z = X.loc[:, X.columns.str.startswith('T')].sum(axis=1)
X = X.loc[:, X.columns.str.startswith('O')]
y = (z > 0).astype(int)

In [7]:
# Convert to tensors
X = tf.convert_to_tensor(X, dtype=tf.float32)
y = tf.convert_to_tensor(y, dtype=tf.float32)
# Move to GPU
X = X + tf.fill(X.shape, 0.0)
y = y + tf.fill(y.shape, 0.0)

In [8]:
theta = tf.constant([0.3, 0.4, 0.5])
X_test = tf.constant([
    [1., 0., 1.],
    [0., 0., 0.],
    [0., 0., 1.]
])
tf.einsum('ij,j->i', X_test, theta)

<tf.Tensor: shape=(3,), dtype=float32, numpy=array([0.8, 0. , 0.5], dtype=float32)>

In [9]:
# Define log-likelihood
@tf.function
def censored_poisbinom_loglike(theta, rho):
    if tf.math.reduce_any(tf.math.logical_or(theta <= 0., theta >= 1.)):
        return -np.inf
    if tf.math.logical_or(rho <= 0., rho >= 1.):
        return -np.inf
    log1m_theta = tf.math.log(1-theta)
    s = tf.einsum('ij,j->i', X, log1m_theta) + tf.math.log(1-rho)
    ll = tf.math.reduce_sum(tf.where(y == 1, tfp.math.log1mexp(s), s))
    return ll

In [10]:
# Define negative log-likelihood and use AD to compute gradients
@tf.function
def censored_poisbinom_negloglike(params):
    theta, rho = tf.split(params, [P, 1], axis=0)
    # need to take these back down to vectors and scalars:
    theta = tf.reshape(theta,(P,))
    rho = tf.reshape(rho,())
    return -1 * censored_poisbinom_loglike(theta, rho)

@tf.function
def censored_poisbinom_negloglike_and_grad(params):
    return tfp.math.value_and_gradient(
        censored_poisbinom_negloglike, 
        params
    )

In [11]:
# Test AD
test_val = tf.fill(P + 1, 0.5)
out = censored_poisbinom_negloglike_and_grad(test_val)
print("Function value: ", out[0].numpy())
print("Gradients: ", out[1].numpy())

Function value:  1095003.5
Gradients:  [  58669.26   155057.3     65305.16   390140.8    193293.45   206793.22
   90571.164  136026.36    86457.08   125233.49  1040800.25 ]


In [12]:
# Approximate MLE using gradient descent
start = tf.fill(P + 1, 0.5)

optim_results = tfp.optimizer.bfgs_minimize(
    censored_poisbinom_negloglike_and_grad, start, tolerance=1e-8
)

est_params = optim_results.position.numpy()
est_serr = np.sqrt(np.diagonal(optim_results.inverse_hessian_estimate.numpy()))
display(pd.DataFrame(
    np.c_[est_params, est_serr, np.concatenate([true_transmission_rate, base_rate])],
    columns=['estimate', 'std err', 'true_val'],
    index=[f'theta_{i}' for i in range(1, P + 1)] + ['rho']
))

Unnamed: 0,estimate,std err,true_val
theta_1,0.1930581,0.993102,0.11229
theta_2,0.08910096,0.908628,0.21544
theta_3,0.1749475,0.987107,0.134004
theta_4,0.2721431,0.399606,0.03496
theta_5,0.03052485,0.871792,0.14024
theta_6,0.3784133,0.924917,0.389962
theta_7,0.07630392,0.964148,0.061519
theta_8,0.01006017,0.907173,0.096669
theta_9,0.09565976,0.96794,0.08617
theta_10,1.776357e-15,0.919623,0.052647


In [13]:
# Set model parameters
nuts_samples = 5000
nuts_burnin = 2000
chains = 4
init_step_size=.3
init = [est_params[:P], est_params[-1]]

In [14]:
def tile_init(init, num_repeats):
    """
    create tiled initial values for multiple chains
    idea from pymc4.inference.sample:
    https://github.com/pymc-devs/pymc4/blob/master/pymc4/inference/sampling.py    
    """
    return [np.tile(np.expand_dims(tens, 0), [num_repeats] + [1] * tens.ndim) for tens in init]

#init = tile_init(init, chains)
print("Multiple chains requested: ", chains)
init

Multiple chains requested:  4


[array([1.9305813e-01, 8.9100957e-02, 1.7494754e-01, 2.7214310e-01,
        3.0524850e-02, 3.7841335e-01, 7.6303922e-02, 1.0060174e-02,
        9.5659755e-02, 1.7763568e-15], dtype=float32),
 0.573726]

In [None]:
# Fit model
@tf.function
def nuts_sampler(init):
    nuts_kernel = tfp.mcmc.NoUTurnSampler(
        target_log_prob_fn=censored_poisbinom_loglike, 
        step_size=init_step_size,
    )
    adapt_nuts_kernel = tfp.mcmc.DualAveragingStepSizeAdaptation(
        inner_kernel=nuts_kernel,
        num_adaptation_steps=nuts_burnin,
        step_size_getter_fn=lambda pkr: pkr.step_size,
        log_accept_prob_getter_fn=lambda pkr: pkr.log_accept_ratio,
        step_size_setter_fn=lambda pkr, new_step_size: pkr._replace(step_size=new_step_size)
    )

    samples_nuts_, stats_nuts_ = tfp.mcmc.sample_chain(
        num_results=nuts_samples,
        current_state=init,
        kernel=adapt_nuts_kernel,
        num_burnin_steps=nuts_burnin,
        parallel_iterations=10,
        trace_fn=None
    )
    return samples_nuts_, stats_nuts_

start = time.time()
samples_nuts, stats_nuts = nuts_sampler(init)
print(f"{time.time() - start:.02f} seconds elapsed")

In [None]:
# View results
trace_rho = stats_nuts.numpy()
trace_theta = samples_nuts.numpy()
est_nuts = np.r_[trace_theta.mean(axis=0), trace_rho.mean()]
std_nuts = np.r_[trace_theta.std(axis=0), trace_rho.std()]
# assemble and print
display(pd.DataFrame(
    np.c_[est_nuts, std_nuts, np.concatenate([true_transmission_rate, base_rate])],
    columns=['estimate', 'std err', 'true_val'],
    index=[f'theta_{i}' for i in range(1, P + 1)] + ['rho']
))