In [1]:
import time

import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_probability as tfp

In [2]:
tfd = tfp.distributions

In [3]:
# Confirm GPU in use
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [4]:
# Parameters
P = 10
N = 10 ** 4
SEED = 1729

In [5]:
# Ground truth
np.random.seed(SEED)
true_transmission_rate = np.random.beta(2, 10, P)
true_occurrence_rate = np.random.beta(2, 10, P)
base_rate = np.random.beta(2, 10, 1)

In [6]:
# Simulate data
data = {}
for p in range(P):
    occurrence = np.random.binomial(1, true_occurrence_rate[p], N)
    transmission = occurrence * np.random.binomial(1, true_transmission_rate[p], N)
    data[f'O{p+1}'] = occurrence
    data[f'T{p+1}'] = transmission
data['T0'] = np.random.binomial(1, base_rate, N)
X = pd.DataFrame(data)

z = X.loc[:, X.columns.str.startswith('T')].sum(axis=1)
X = X.loc[:, X.columns.str.startswith('O')]
y = (z > 0).astype(int)

In [7]:
# Convert to tensors
X = tf.convert_to_tensor(X, dtype=tf.float32)
y = tf.convert_to_tensor(y, dtype=tf.float32)

In [8]:
# Define log-likelihood
@tf.function
def censored_poisbinom_loglike(theta, rho):
    if tf.math.reduce_any(tf.math.logical_or(theta <= 0., theta >= 1.)):
        return -np.inf
    if tf.math.logical_or(rho <= 0., rho >= 1.):
        return -np.inf
    log1m_theta = tf.math.log(1-theta)
    grid = tf.reshape(tf.tile(log1m_theta, (N,)), (N, P))
    s = tf.math.reduce_sum(tf.math.multiply(X, grid) + tf.math.log(1-rho), 1)
    s_0 = s[y == 0]
    s_1 = s[y == 1]
    ll = tf.math.reduce_sum(s_0) + tf.math.reduce_sum(tfp.math.log1mexp(s_1))
    return ll

In [9]:
# Define negative log-likelihood and use AD to compute gradients
@tf.function
def censored_poisbinom_negloglike(params):
    theta, rho = tf.split(params, [P, 1], axis=0)
    # need to take these back down to vectors and scalars:
    theta = tf.reshape(theta,(P,))
    rho = tf.reshape(rho,())
    return -1 * censored_poisbinom_loglike(theta, rho)

@tf.function
def censored_poisbinom_negloglike_and_grad(params):
    return tfp.math.value_and_gradient(
        censored_poisbinom_negloglike, 
        params
    )

In [10]:
# Test AD
test_val = tf.fill(P + 1, 0.5)
out = censored_poisbinom_negloglike_and_grad(test_val)
print("Function value: ", out[0].numpy())
print("Gradients: ", out[1].numpy())

Function value:  48682.43
Gradients:  [   633.8817   1761.6609    701.8873   4299.384    2239.622    2575.1855
    905.8743   1549.767     947.8628   1465.8041 123353.78  ]


In [11]:
# Approximate MLE using gradient descent
start = tf.fill(P + 1, 0.5)

optim_results = tfp.optimizer.bfgs_minimize(
    censored_poisbinom_negloglike_and_grad, start, tolerance=1e-8
)

est_params = optim_results.position.numpy()
est_serr = np.sqrt(np.diagonal(optim_results.inverse_hessian_estimate.numpy()))
display(pd.DataFrame(
    np.c_[est_params, est_serr, np.concatenate([true_transmission_rate, base_rate])],
    columns=['estimate', 'std err', 'true_val'],
    index=[f'theta_{i}' for i in range(1, P + 1)] + ['rho']
))

Unnamed: 0,estimate,std err,true_val
theta_1,0.05997945,0.855493,0.11229
theta_2,0.1597942,0.398355,0.21544
theta_3,4.440892e-16,0.814028,0.134004
theta_4,0.1957998,0.020067,0.03496
theta_5,0.2108718,0.161463,0.14024
theta_6,0.4165927,0.028979,0.389962
theta_7,0.03561807,0.86557,0.061519
theta_8,0.1209561,0.736039,0.096669
theta_9,0.07682238,0.890446,0.08617
theta_10,0.1104139,0.682468,0.052647


In [12]:
# Set model parameters
nuts_samples = 20000
nuts_burnin = 10000
chains = 16
init_step_size=.3
init = [est_params[:P], est_params[-1]]

In [13]:
# Fit model
@tf.function
def nuts_sampler(init):
    nuts_kernel = tfp.mcmc.NoUTurnSampler(
        target_log_prob_fn=censored_poisbinom_loglike, 
        step_size=init_step_size,
    )
    adapt_nuts_kernel = tfp.mcmc.DualAveragingStepSizeAdaptation(
        inner_kernel=nuts_kernel,
        num_adaptation_steps=nuts_burnin,
        step_size_getter_fn=lambda pkr: pkr.step_size,
        log_accept_prob_getter_fn=lambda pkr: pkr.log_accept_ratio,
        step_size_setter_fn=lambda pkr, new_step_size: pkr._replace(step_size=new_step_size)
    )

    samples_nuts_, stats_nuts_ = tfp.mcmc.sample_chain(
        num_results=nuts_samples,
        current_state=init,
        kernel=adapt_nuts_kernel,
        num_burnin_steps=100,
        parallel_iterations=5,
        trace_fn=None
    )
    return samples_nuts_, stats_nuts_

start = time.time()
samples_nuts, stats_nuts = nuts_sampler(init)
print(f"{time.time() - start:.02f} seconds elapsed")

346.35 seconds elapsed


In [14]:
# View results
trace_rho = samples_nuts[1].numpy()
trace_theta = samples_nuts[0].numpy()
est_nuts = np.r_[trace_theta.mean(axis=0), trace_rho.mean()]
std_nuts = np.r_[trace_theta.std(axis=0), trace_rho.std()]
# assemble and print
display(pd.DataFrame(
    np.c_[est_nuts, std_nuts, np.concatenate([true_transmission_rate, base_rate])],
    columns=['estimate', 'std err', 'true_val'],
    index=[f'theta_{i}' for i in range(1, P + 1)] + ['rho']
))

Unnamed: 0,estimate,std err,true_val
theta_1,0.176921,0.027117,0.188545
theta_2,0.094382,0.022881,0.077791
theta_3,0.06735,0.027811,0.067408
theta_4,0.030377,0.018024,0.052992
theta_5,0.404156,0.016261,0.377972
theta_6,0.2171,0.028435,0.240681
theta_7,0.121046,0.017849,0.139686
theta_8,0.042036,0.017117,0.050818
theta_9,0.012239,0.011494,0.033721
theta_10,0.180073,0.023018,0.181393
