In [None]:
%%time
import arviz as az
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pymc3 as pm
import theano
from pymc3.ode import DifferentialEquation
from scipy.integrate import odeint
plt.style.use("seaborn-darkgrid")
rng = np.random.default_rng(42)

noise = 0.3  # amount of noise to inject into ode-solved values
tune = 1000  # number of tuning steps before we start keeping good data
cores = 4    # kaggle kernels have 4 cores, so we can use each one to run chains in parallel
eps = 0.5    # we want prior distributions centered near, but not right at, the true value.  eps pushes the prior away from true

# p.129
theta = {'beta' : 0.00237, 'alpha': 0.465}

# p.126
def SIR(y, t, p):
    ds = -p[0] * y[0] * y[1]
    di = p[0] * y[0] * y[1] - p[1] * y[1]
    return [ds, di]
times = np.arange(1, 15)
y0 = [760, 3]

# Run ODE
y = odeint(SIR, y0, times, (list(theta.values()),), rtol=1e-8)

# normalize and adjust parameters accordingly
# alpha does not change, but beta must be multiplied by n
n = y[0].sum()
y_n = y.T/n
theta_n = theta.copy()
theta_n['beta'] *= n

def bayesiate(start=1):
    plt.plot(times, y_n[0], 'r')
    plt.plot(times, y_n[1], 'b')

    z = y_n.copy()
    # Replace I class with observations in Table 6.1 p.126
    z[1,2:] = np.array([25,75,227,296,258,236,192,126,71,28,11,7]) / n
    # Add noise to S class predictions from odeint 
    z[0] = rng.lognormal(mean=np.log(z[0]), sigma=noise)

    # shift to start at specified start time
    t0 = times[0]
    t  = times[start:]
    z  = z[:,start:]
    
    plt.plot(t, z[0], 'r.')
    plt.plot(t, z[1], 'b.')
    plt.show()

    # guess loc for priors, but inject some error so Bayes doesn't get to start at the correct answer
    theta_loc = {key: val * rng.uniform(1-eps, 1+eps) for key,val in theta_n.items()}

    # create pymc3 ODE object
    sir_model = DifferentialEquation(
        func=SIR,
        times=t,
        t0=t0,
        n_states=len(y0),
        n_theta=len(theta),
    )

    with pm.Model() as model:
        sigma_prior = pm.HalfCauchy("sigma", 1.0, shape=len(y0))
        theta_prior = [pm.Lognormal(key, pm.math.log(val), 1) for key,val in theta_loc.items()]
        sir_curves_n = sir_model(y0=y_n.T[0], theta=theta_prior)
        Z = pm.Lognormal("Z", mu=pm.math.log(sir_curves_n), sigma=sigma_prior, observed=z.T)
        trace = pm.sample(tune=tune, draws=4*tune, cores=cores, chains=cores, target_accept=0.9)
        pm.save_trace(trace, '/kaggle/working/disease')
        data = az.from_pymc3(trace=trace)
    az.plot_posterior(data, round_to=2, hdi_prob=0.95);

    theta_df = pd.DataFrame({key: trace.get_values(key) for key in theta.keys()})
    theta_stats = theta_df.describe().T
    theta_stats['prior_loc'] = theta_loc.values()
    theta_stats['true'] = theta_n.values()
    display(theta_stats)
    
bayesiate(1)
bayesiate(2)