In [None]:
%config InlineBackend.figure_format = "retina"
import numpy as np
import matplotlib.pyplot as plt
import scipy
import scipy.integrate
import scipy.stats as st

plt.rcParams["font.size"] = 14

# Forward probability

## Example 1

**Q**: Suppose the incubation period for a viral disease follows an exponential distribution with rate 7. A patient is then infected with the virus. What is the probability that s/he will show symptoms within the first 3 days?

**A**: The exponential density is given by:

$$p(y\vert\beta) = \frac{1}{\beta}\exp(-\frac{y}{\beta}) $$

To answer the question, we need to calculate $Pr(y\leq 3 \vert \beta = 7)$, which is just the cumulative distribution evaluated at $y=3$:

$$ \int_0^3 \frac{1}{7} \exp (-7y) \mathrm{d}y $$


In [None]:
rate = 7

In [None]:
def exponential_density(y, beta):
    return np.exp(-y/beta) / beta

In [None]:
# We can compute the integral "ourselves"
result, error = scipy.integrate.quad(exponential_density, 0, 3, args=(rate,))
print(f"Result: {result:0.5f}")

In [None]:
# Or, in this case, use the readily available CDF
result = st.expon(scale=rate).cdf(3)
print(f"Result: {result:0.5f}")

## Example 2

Suppose the incubation period for a viral disease follows an exponential distribution with rate 7. Now 10 patients are infected with the virus at the same time. How long should we wait for all patients to show symptoms? Provide 95% confidence intervals

**Answer**: This question would require us to do some more complicated integrals. We won't bother. Instead, we solve it via data simulations:

In [None]:
n_patients = 10
incubation_period = 7
n_simulations = 10_000 # If we want higher precision, we increase the number of iterations

In [None]:
results = []
for _ in range(n_simulations):
    times = np.random.exponential(incubation_period, size=n_patients)
    max_time = max(times)
    results.append(max_time)

In [None]:
plt.hist(results, bins=30, density=True)
plt.xlabel("Days")
plt.show()

In [None]:
mean = np.mean(results)
low, high = np.quantile(results, [0.025, 0.975])

print(f"Expected:     {np.mean(results):2.1f} days")
print(f"95% interval: {low:2.1f} - {high:2.1f} days")

# Inverse probability

Suppose 10 patients are infected with a given virus at the same time. They start showing symptoms some time later (see table below). What is the incubation period of this virus?

In [None]:
ys = [9.3, 3.0, 0.7, 11.3, 3.3, 2.3, 14.9, 0.2, 29.5, 16.2]

In [None]:
# consider first a single observation, say y=9.3, what does this say about the incubation period?

In [None]:
# likelihood for y=9.3, beta = 1, 10, 20
exponential_density(9.3, beta=np.array([1, 10, 20]))

In [None]:
# likelihood for y=3.0, beta = 1, 10, 20
exponential_density(3, beta=np.array([1, 10, 20]))

In [None]:
# likelihood for y=14.9, beta = 1, 10, 20
exponential_density(14.9, beta=np.array([1, 10, 20]))

In [None]:
# lets actually plot the likelihoods for many more values of beta so that we can understand the behaviour

beta_space = np.linspace(0.1, 40, num=300)
for y in [9.3, 3.0, 14.9]: # selected patients
    likelihood = exponential_density(y, beta_space)
    plt.plot(beta_space, likelihood, label=f"y={y:2.1f}")
plt.xlabel("Beta")
plt.ylabel("Likelihood")
plt.legend()
plt.show()

In [None]:
# Assuming that the observations are conditionally independent, we can compute the likelihood of all our data for any value of beta
# by simply multiplying the individual likelihoods

In [None]:
def exponential_likelihood(y, beta):
    y = np.atleast_1d(y)  # cast to 1d so that this works also on scalar input
    n = len(y)
    num = np.exp(-(1/beta) * sum(y))
    den = beta**n
    return num / den

In [None]:
# example, likelihood for beta = 3
exponential_likelihood(ys, beta=3)

In [None]:
# example, likelihood for beta = 10
exponential_likelihood(ys, beta=10)

In [None]:
# now do it for many values of beta from 0 to 40

In [None]:
likelihoods = exponential_likelihood(ys, beta_space)

In [None]:
plt.plot(beta_space, likelihoods)
plt.xlabel("beta")
plt.ylabel("likelihood")
plt.show()

In [None]:
# becasuse of the incredibly small scale on the y axis,
# it is often better to work with the logarithm of likelihood.
# This does not affect the position of the maximum because the log is a monotonic function

In [None]:
def exponential_log_likelihood(ys, beta):
    n = len(np.atleast_1d(ys)) # cast to 1d so that this works also on scalar values
    log_num = -(1/beta) * sum(ys)
    log_den = n * np.log(beta)
    return log_num - log_den

In [None]:
log_likelihoods = exponential_log_likelihood(ys, beta_space)

In [None]:
plt.plot(beta_space, log_likelihoods)
plt.xlabel("beta")
plt.ylabel("log-likelihood")
plt.show()

In [None]:
# let's zoom in to better resolve what is happening
plt.plot(beta_space[40:140], log_likelihoods[40:140])
plt.xlabel("beta")
plt.ylabel("log-likelihood")
plt.show()

In [None]:
# find the maximum-likelihood estimate (MLE) by
# minimizing the negative of the log likelihood
mle = scipy.optimize.minimize_scalar(
    lambda x: -exponential_log_likelihood(ys, x),
    bounds=(0, 40),
    method='bounded',
)

In [None]:
print(mle)

In [None]:
plt.plot(beta_space[40:140], log_likelihoods[40:140])
plt.plot(mle["x"], -mle["fun"], marker='o', color='r', lw=0, label='MLE')
plt.xlabel("beta")
plt.ylabel("log-likelihood")
plt.legend()
plt.show()