In [254]:
# load packages
import numpy as np
import scipy as sp
import pandas as pd
from scipy import optimize as opt
from scipy.integrate import odeint 
from scipy.optimize import minimize

Now consider parameter learning for the SuEIR model. Given the model parameters $\boldsymbol{\theta}$ and initial quantities $S_0, E_0, I_0$, and $R_0$, we can compute the number of individuals in each group (i.e., $S, E, I$, and $R$ ) at time $t$, denoted by $\widehat{S}_t, \widehat{E}_t, \widehat{I}_t$ and $\widehat{R}_t$, via applying  numerical ODE solvers onto the ODE. Then we propose to learn the model parameter $\widehat{\boldsymbol{\theta}}=(\widehat{\beta}, \widehat{\sigma}, \widehat{\gamma}, \widehat{\mu})$ by minimizing the following logarithmic-type mean square error (MSE).

Where $\mathbf{I}=\left\{I_t\right\}_{t=1}^T, \mathbf{R}=\left\{R_t\right\}_{t=1}^T$ with $I_t$ and $R_t$ denote the reported numbers of infected cases and removed cases (including both recovered cases and fatality cases) at time $t$ (i.e., date), and $p$ is the smoothing parameter used to ensure numerical stability. Note that given $S_0, E_0, I_0$ and $R_0, \widehat{I}_t$ and $\widehat{R}_t$ can be described as differentiable functions of the parameter $\boldsymbol{\theta}$. Then the model parameter $\widehat{\boldsymbol{\theta}}=\operatorname{argmin}_{\boldsymbol{\theta}} L(\boldsymbol{\theta} ; \mathbf{I}, \mathbf{R})$ can be learnt by applying standard gradient based optimizer (e.g., BFGS) onto the loss function under the constraint that $\beta, \sigma, \gamma, \mu \in[0,1]$.

$$
L(\boldsymbol{\theta} ; \mathbf{I}, \mathbf{R})=\frac{1}{T} \sum_{t=1}^T\left[\left(\log \left(\widehat{I}_t+p\right)-\log \left(I_t+p\right)\right)^2+\left(\log \left(\widehat{R}_t+p\right)-\log \left(R_t+p\right)\right)^2\right],
$$


In [255]:
def loss(pred, target, smoothing=20): 
    return np.mean((np.log(pred+smoothing) - np.log(target+smoothing))**2)
    
# SEIR: the 'reported' SEIR. In our case, the simulated dataset. 
def optim(SEIR, method = 'L-BFGS-B', init= [0.05,0.05,0.05,0.05]):
    # maybe these could be passed into the function as  
    # parameters if the estimation parts change
    y0 = [800.0, 100.0, 50.0, 50.0]
    t = np.linspace(0, 59, 60)
    N  = 1000.0  
    def deriv(y, t, N, beta, mu, sigma, gamma):
        s,e,i,r= y
        dsdt = -((beta * (e + i) * s) / N)
        dedt = (((beta * (e + i) * s) / N) - sigma * e)
        didt = (mu * sigma * e - (gamma * i))
        drdt = (gamma * i)  
        return dsdt, dedt, didt, drdt
    
    def objective(x):
        beta, mu, sigma, gamma = x
        est_SEIR = odeint(deriv, y0, t, args=(N, beta, mu, sigma, gamma))
        est_df = pd.DataFrame(est_SEIR, columns = ['S','E','I','R'])
        return loss(est_df.loc[:,"I"], SEIR.loc[:,"I"]) +  loss(est_df.loc[:,"R"], SEIR.loc[:,"R"])

    # scipy optimizer
    optimal = minimize(
        objective,
        init, # initial estimate
        method=method,  
        bounds=[(0.0001, 0.3),  (0.001, 1.), (0.01, 1), (0.001, 0.3)] # bounded based on prior knowledge: beta, mu, sigma, gamma
    )

    return optimal.x

Load datasets and then minimize the objective function and obtain parameter estimates:

In [256]:
os.chdir('c:\\Users\\chloe\\Desktop\\ChloeYou\\UBC_grad\\Term3\\MATH561\\math561-gp-SuEIR\\data\\python')

params_trad = pd.DataFrame([])
for file in os.listdir(os.getcwd()) :
    df = pd.read_csv(file)
    params_trad[file] = optim(df)
    
params_trad # rows: beta, mu, sigma, gamma


Unnamed: 0,sim-1-p.csv,sim-10-p.csv,sim-2-p.csv,sim-3-p.csv,sim-4-p.csv,sim-5-p.csv,sim-6-p.csv,sim-7-p.csv,sim-8-p.csv,sim-9-p.csv
0,0.0001,0.055113,0.273961,0.061084,0.232099,0.085347,0.3,0.3,0.070909,0.055911
1,0.535861,0.973282,1.0,0.707803,0.134974,0.424747,0.113158,0.114356,0.557338,1.0
2,0.130974,0.037506,0.010096,0.049646,0.124357,0.072149,0.183636,0.193247,0.05558,0.036574
3,0.116957,0.120996,0.117069,0.120777,0.121667,0.120812,0.125316,0.126875,0.119958,0.120454


In [257]:
os.chdir('c:\\Users\\chloe\\Desktop\\ChloeYou\\UBC_grad\\Term3\\MATH561\\math561-gp-SuEIR\\data\\matlab')

csv = os.listdir(os.getcwd())
del csv[0] # this is the ground truth so we will only calculate the R_0 from it
print(csv)
params_nn = pd.DataFrame([])
for file in csv :
    df = pd.read_csv(file)
    params_nn[file] = optim(df)
    
params_nn # rows: beta, mu, sigma, gamma

['sim-1-m.csv', 'sim-10-m.csv', 'sim-2-m.csv', 'sim-3-m.csv', 'sim-4-m.csv', 'sim-5-m.csv', 'sim-6-m.csv', 'sim-7-m.csv', 'sim-8-m.csv', 'sim-9-m.csv']


Unnamed: 0,sim-1-m.csv,sim-10-m.csv,sim-2-m.csv,sim-3-m.csv,sim-4-m.csv,sim-5-m.csv,sim-6-m.csv,sim-7-m.csv,sim-8-m.csv,sim-9-m.csv
0,0.3,0.100043,0.3,0.299506,0.200063,0.100062,0.3,0.3,0.299867,0.200032
1,0.060383,0.099964,0.055187,0.050026,0.049987,0.049969,0.118021,0.108908,0.100018,0.099989
2,0.16341,0.090013,0.12995,0.090199,0.089968,0.090013,0.166598,0.130574,0.090045,0.089981
3,0.119317,0.120001,0.119582,0.120006,0.120001,0.119999,0.119263,0.119539,0.120002,0.120002


Calculate R_0 for each simulation:
$$
R_0 = \frac{\beta}{\sigma} +\frac{\beta\mu}{\gamma}
$$

In [258]:
R_0_trad = (params_trad.loc[0,]/params_trad.loc[2,]) + (params_trad.loc[0,]*params_trad.loc[1,]/params_trad.loc[3,])
R_0_trad


sim-1-p.csv      0.001222
sim-10-p.csv     1.912777
sim-2-p.csv     29.474735
sim-3-p.csv      1.588375
sim-4-p.csv      2.123876
sim-5-p.csv      1.482987
sim-6-p.csv      1.904558
sim-7-p.csv      1.822815
sim-8-p.csv      1.605260
sim-9-p.csv      1.992881
dtype: float64

In [259]:
R_0_nn = (params_nn.loc[0,]/params_nn.loc[2,]) + (params_nn.loc[0,]*params_nn.loc[1,]/params_nn.loc[3,])
R_0_nn

sim-1-m.csv     1.987690
sim-10-m.csv    1.194771
sim-2-m.csv     2.447027
sim-3-m.csv     3.445355
sim-4-m.csv     2.307048
sim-5-m.csv     1.153310
sim-6-m.csv     2.097619
sim-7-m.csv     2.570869
sim-8-m.csv     3.580129
sim-9-m.csv     2.389719
dtype: float64

Let's compare these estimated values to the 'true' $R_0 = \frac{0.1}{0.09} +\frac{0.1*0.1}{0.12} = 1.19$ 

Also, since the objective function is non-convex, the initial parameter used for optimization
varies the estimate a lot. 

We look at the sum of the absolute value of bias:

In [260]:
sum(abs(R_0_trad - 1.19))

34.38704136850016

In [261]:

sum(abs(R_0_nn - 1.19)) 

11.346918069655649

#### Testing different optimization algorithms and initial parameter estimates 

In [262]:
os.chdir('c:\\Users\\chloe\\Desktop\\ChloeYou\\UBC_grad\\Term3\\MATH561\\math561-gp-SuEIR\\data\\python')

# params_trad = pd.DataFrame([])
# for file in os.listdir(os.getcwd()) :
#     df = pd.read_csv(file)
#     params_trad[file] = optim(df, method='BFGS', init=[0.1,0.1,0.1,0.1])
    
# params_trad # rows: beta, mu, sigma, gamma

# R_0_trad = (params_trad.loc[0,]/params_trad.loc[2,]) + (params_trad.loc[0,]*params_trad.loc[1,]/params_trad.loc[3,])
# R_0_trad

# sum(abs(R_0_trad - 1.19)) # 326.1673000557157

In [263]:
# params_trad = pd.DataFrame([])
# for file in os.listdir(os.getcwd()) :
#     df = pd.read_csv(file)
#     params_trad[file] = optim(df, method='CG', init=[0.1,0.1,0.1,0.1]) 
    
# params_trad # rows: beta, mu, sigma, gamma

# R_0_trad = (params_trad.loc[0,]/params_trad.loc[2,]) + (params_trad.loc[0,]*params_trad.loc[1,]/params_trad.loc[3,])
# R_0_trad

# sum(abs(R_0_trad - 1.19)) # 1032.5439092183417

In [264]:
params_trad = pd.DataFrame([])
for file in os.listdir(os.getcwd()) :
    df = pd.read_csv(file)
    params_trad[file] = optim(df, method='Powell', init=[0.05,0.05,0.05,0.05])
    
params_trad # rows: beta, mu, sigma, gamma

R_0_trad = (params_trad.loc[0,]/params_trad.loc[2,]) + (params_trad.loc[0,]*params_trad.loc[1,]/params_trad.loc[3,])
R_0_trad

sum(abs(R_0_trad - 1.19))

15.133294217610082

In [265]:
params_trad = pd.DataFrame([])
for file in os.listdir(os.getcwd()) :
    df = pd.read_csv(file)
    params_trad[file] = optim(df, method='Powell', init=[0.3,0.5,0.5,0.3])
    
params_trad # rows: beta, mu, sigma, gamma

R_0_trad = (params_trad.loc[0,]/params_trad.loc[2,]) + (params_trad.loc[0,]*params_trad.loc[1,]/params_trad.loc[3,])
R_0_trad

sum(abs(R_0_trad - 1.19))

107.04476795732802

In [None]:
params_trad = pd.DataFrame([])
for file in os.listdir(os.getcwd()) :
    df = pd.read_csv(file)
    params_trad[file] = optim(df, method='Powell', init=[0.15,0.5,0.5,0.15])
    
params_trad # rows: beta, mu, sigma, gamma

R_0_trad = (params_trad.loc[0,]/params_trad.loc[2,]) + (params_trad.loc[0,]*params_trad.loc[1,]/params_trad.loc[3,])
R_0_trad

sum(abs(R_0_trad - 1.19))

In [266]:
params_trad = pd.DataFrame([])
for file in os.listdir(os.getcwd()) :
    df = pd.read_csv(file)
    params_trad[file] = optim(df, method='Nelder-Mead', init=[0.05,0.05,0.05,0.05])
    
params_trad # rows: beta, mu, sigma, gamma

R_0_trad = (params_trad.loc[0,]/params_trad.loc[2,]) + (params_trad.loc[0,]*params_trad.loc[1,]/params_trad.loc[3,])
R_0_trad

sum(abs(R_0_trad - 1.19))

146.59785552045662

In [267]:
params_trad = pd.DataFrame([])
for file in os.listdir(os.getcwd()) :
    df = pd.read_csv(file)
    params_trad[file] = optim(df, method='Nelder-Mead', init=[0.3,0.5,0.5,0.3])
    
params_trad # rows: beta, mu, sigma, gamma

R_0_trad = (params_trad.loc[0,]/params_trad.loc[2,]) + (params_trad.loc[0,]*params_trad.loc[1,]/params_trad.loc[3,])
R_0_trad

sum(abs(R_0_trad - 1.19))

168.5150620370362

In [None]:
params_trad = pd.DataFrame([])
for file in os.listdir(os.getcwd()) :
    df = pd.read_csv(file)
    params_trad[file] = optim(df, method='Nelder-Mead', init=[0.15,0.5,0.5,0.15])
    
params_trad # rows: beta, mu, sigma, gamma

R_0_trad = (params_trad.loc[0,]/params_trad.loc[2,]) + (params_trad.loc[0,]*params_trad.loc[1,]/params_trad.loc[3,])
R_0_trad

sum(abs(R_0_trad - 1.19))

In [268]:
os.chdir('c:\\Users\\chloe\\Desktop\\ChloeYou\\UBC_grad\\Term3\\MATH561\\math561-gp-SuEIR\\data\\matlab')

csv = os.listdir(os.getcwd())
del csv[0] # this is the ground truth so we will only calculate the R_0 from it
print(csv)
params_nn = pd.DataFrame([])
for file in csv :
    df = pd.read_csv(file)
    params_nn[file] = optim(df,method='Powell', init=[0.05,0.05,0.05,0.05])
    
params_nn # rows: beta, mu, sigma, gamma

R_0_nn = (params_nn.loc[0,]/params_nn.loc[2,]) + (params_nn.loc[0,]*params_nn.loc[1,]/params_nn.loc[3,])
R_0_nn

sum(abs(R_0_nn - 1.19)) 

['sim-1-m.csv', 'sim-10-m.csv', 'sim-2-m.csv', 'sim-3-m.csv', 'sim-4-m.csv', 'sim-5-m.csv', 'sim-6-m.csv', 'sim-7-m.csv', 'sim-8-m.csv', 'sim-9-m.csv']


11.629456041132238

In [269]:
params_nn = pd.DataFrame([])
for file in csv :
    df = pd.read_csv(file)
    params_nn[file] = optim(df,method='Powell', init=[0.3,0.5,0.5,0.3])
    
params_nn # rows: beta, mu, sigma, gamma

R_0_nn = (params_nn.loc[0,]/params_nn.loc[2,]) + (params_nn.loc[0,]*params_nn.loc[1,]/params_nn.loc[3,])
R_0_nn

sum(abs(R_0_nn - 1.19)) 

9.74589373983309

In [None]:
params_nn = pd.DataFrame([])
for file in csv :
    df = pd.read_csv(file)
    params_nn[file] = optim(df,method='Powell', init=[0.15,0.5,0.5,0.15])
    
params_nn # rows: beta, mu, sigma, gamma

R_0_nn = (params_nn.loc[0,]/params_nn.loc[2,]) + (params_nn.loc[0,]*params_nn.loc[1,]/params_nn.loc[3,])
R_0_nn

sum(abs(R_0_nn - 1.19)) 

In [270]:
params_nn = pd.DataFrame([])
for file in csv :
    df = pd.read_csv(file)
    params_nn[file] = optim(df,method='Nelder-Mead', init=[0.05,0.05,0.05,0.05])
    
params_nn # rows: beta, mu, sigma, gamma

R_0_nn = (params_nn.loc[0,]/params_nn.loc[2,]) + (params_nn.loc[0,]*params_nn.loc[1,]/params_nn.loc[3,])
R_0_nn

sum(abs(R_0_nn - 1.19)) 

11.662736520197909

In [271]:
params_nn = pd.DataFrame([])
for file in csv :
    df = pd.read_csv(file)
    params_nn[file] = optim(df,method='Nelder-Mead', init=[0.3,0.5,0.5,0.3])
    
params_nn # rows: beta, mu, sigma, gamma

R_0_nn = (params_nn.loc[0,]/params_nn.loc[2,]) + (params_nn.loc[0,]*params_nn.loc[1,]/params_nn.loc[3,])
R_0_nn

sum(abs(R_0_nn - 1.19)) 

44.38654913936948

In [None]:
params_nn = pd.DataFrame([])
for file in csv :
    df = pd.read_csv(file)
    params_nn[file] = optim(df,method='Nelder-Mead', init=[0.15,0.5,0.5,0.15])
    
params_nn # rows: beta, mu, sigma, gamma

R_0_nn = (params_nn.loc[0,]/params_nn.loc[2,]) + (params_nn.loc[0,]*params_nn.loc[1,]/params_nn.loc[3,])
R_0_nn

sum(abs(R_0_nn - 1.19)) 