In [68]:
# load packages
import numpy as np
import scipy as sp
import pandas as pd
from scipy import optimize as opt
from scipy.integrate import odeint 
from scipy.optimize import minimize
os.getcwd()


'c:\\Users\\chloe\\Desktop\\ChloeYou\\UBC_grad\\Term3\\MATH561\\math561-gp-SuEIR\\code'

Now consider parameter learning for the SuEIR model. Given the model parameters $\boldsymbol{\theta}$ and initial quantities $S_0, E_0, I_0$, and $R_0$, we can compute the number of individuals in each group (i.e., $S, E, I$, and $R$ ) at time $t$, denoted by $\widehat{S}_t, \widehat{E}_t, \widehat{I}_t$ and $\widehat{R}_t$, via applying  numerical ODE solvers onto the ODE. Then we propose to learn the model parameter $\widehat{\boldsymbol{\theta}}=(\widehat{\beta}, \widehat{\sigma}, \widehat{\gamma}, \widehat{\mu})$ by minimizing the following logarithmic-type mean square error (MSE):
$$
L(\boldsymbol{\theta} ; \mathbf{I}, \mathbf{R})=\frac{1}{T} \sum_{t=1}^T\left[\left(\log \left(\widehat{I}_t+p\right)-\log \left(I_t+p\right)\right)^2+\left(\log \left(\widehat{R}_t+p\right)-\log \left(R_t+p\right)\right)^2\right],
$$
where $\mathbf{I}=\left\{I_t\right\}_{t=1}^T, \mathbf{R}=\left\{R_t\right\}_{t=1}^T$ with $I_t$ and $R_t$ denote the reported numbers of infected cases and removed cases (including both recovered cases and fatality cases) at time $t$ (i.e., date), and $p$ is the smoothing parameter used to ensure numerical stability. Note that given $S_0, E_0, I_0$ and $R_0, \widehat{I}_t$ and $\widehat{R}_t$ can be described as differentiable functions of the parameter $\boldsymbol{\theta}$. Then the model parameter $\widehat{\boldsymbol{\theta}}=\operatorname{argmin}_{\boldsymbol{\theta}} L(\boldsymbol{\theta} ; \mathbf{I}, \mathbf{R})$ can be learnt by applying standard gradient based optimizer (e.g., BFGS) onto the loss function under the constraint that $\beta, \sigma, \gamma, \mu \in[0,1]$.

In [98]:
def loss(pred, target, smoothing=20): 
    return np.mean((np.log(pred+smoothing) - np.log(target+smoothing))**2)


$$
L(\boldsymbol{\theta} ; \mathbf{I}, \mathbf{R})=\frac{1}{T} \sum_{t=1}^T\left[\left(\log \left(\widehat{I}_t+p\right)-\log \left(I_t+p\right)\right)^2+\left(\log \left(\widehat{R}_t+p\right)-\log \left(R_t+p\right)\right)^2\right],
$$


In [99]:
# SEIR: the 'reported' SEIR. In our case, the simulated dataset. 
def optim(SEIR):
    # maybe these could be passed into the function as  
    # parameters if the estimation parts change
    y0 = [800.0, 100.0, 50.0, 50.0]
    t = np.linspace(0, 59, 60)
    N, beta, mu, sigma, gamma = 1000.0, 0.1, 0.1, 0.09, 0.12 
   
    def deriv(y, t, N, beta, mu, sigma, gamma):
        s,e,i,r= y
        dsdt = -((beta * (e + i) * s) / N)
        dedt = (((beta * (e + i) * s) / N) - sigma * e)
        didt = (mu * sigma * e - (gamma * i))
        drdt = (gamma * i)  
        return dsdt, dedt, didt, drdt
    
    def objective(x):
        beta, mu, sigma, gamma = x
        est_SEIR = odeint(deriv, y0, t, args=(N, beta, mu, sigma, gamma))
        est_df = pd.DataFrame(est_SEIR, columns = ['S','E','I','R'])
        return loss(est_df.loc[:,"I"], SEIR.loc[:,"I"]) +  loss(est_df.loc[:,"R"], SEIR.loc[:,"R"])

    # scipy optimizer
    optimal = minimize(
        objective,
        [0.2, .5e-2, 2.5e-1, 0.01], # initial estimate
        method='Nelder-Mead',
        bounds=[(0.0001, .3), (0.001, 0.3), (0.01, 1), (0.001, 1.)]
    )

    return optimal.x

Load datasets and then minimize the objective function and obtain parameter estimates:

In [100]:
# TODO automate reading in simulation data
SEIR = pd.read_csv(r'c:\\Users\\chloe\\Desktop\\ChloeYou\\UBC_grad\\Term3\\MATH561\\math561-gp-SuEIR\\data\\python\\sim-1-p.csv')
optim(SEIR) # beta, mu, sigma, gamma

array([0.3       , 0.001     , 0.15973786, 0.07900593])

Calculate R_0 for each simulation: