In [1]:
from scipy.stats import truncnorm
import pandas as pd
import numpy as np
import itertools
import datetime
import tqdm
import sys
import os

def flatten_list(list_array):
    return list(itertools.chain(*list_array))

sys.path.insert(0,"../")
from global_config import config

results_dir           = config.get_property('results_dir')
data_dir              = config.get_property('data_dir')
paper_dir             = config.get_property('paper_dir')
data_db_dir           = config.get_property('data_db_dir')
feb_hosp_records_path = os.path.join(data_db_dir, 'long_files_8_25_2021')
path_to_save          = os.path.join(results_dir, "real_testing", "community")

COLOR_LIST1           = ["#F8AFA8", "#FDDDA0", "#F5CDB4", "#74A089"]

In [2]:
def binomial_transition(n, prob):
    return np.random.binomial(n, prob)

def poisson_transition(n, rate):
    return np.random.poisson(np.nan_to_num(n * rate))


def check_state_space(x, pop=None):
    return np.clip(x, 0, pop)

def f(t, x, gamma, beta, delta, Nmean, N, A, D, M):
    C = x[0, :, :]
    S = np.clip(N - C,0,N)

    c = np.clip(np.nan_to_num(C/N), 0, 1)

    λ = beta * C / Nmean # force of infection

    # moving out and in colonized
    Cout  = binomial_transition(list(np.sum(M, axis=1, keepdims=True)), c)
    Cin   = M.T @ c

    a2c   = binomial_transition(list(A), gamma)                              # people admitted colonized.
    c2d   = binomial_transition(list(D), c)  # discharged colonized

    s2c  = poisson_transition(S, λ)     # new colonized
    c2s  = poisson_transition(C, delta) # decolonizations

    C    = C + a2c - c2d + s2c + c2s + Cin - Cout
    C    = np.clip(C, 0, N)

    return check_state_space(np.array([C, a2c, s2c]))

def g(t, x, N, rho, tests, model_settings):
    """ Observational model
        Args:
            t (int):      Time
            x (np.array): State space
            rho (float):  Observation probability
        Returns:
            y (np.array): Observed carriers ~ Binomial(C, rho)
    """

    m       = model_settings["m"]
    num_pop = model_settings["num_pop"]

    C       = x[0, :, :]
    tests   = tests * np.ones((num_pop, m))

    with np.errstate(divide='ignore', invalid='ignore'):
        observed_colonized = np.random.binomial(tests.astype('int'), rho * np.nan_to_num(C/N))

    return observed_colonized

def f0(N0, c0, model_settings):
    m       = model_settings["m"]
    num_pop = model_settings["num_pop"]

    N0 = np.expand_dims(N0, -1) * np.ones((num_pop, m))
    C0   = c0 * N0
    AC   = np.zeros((num_pop, m))
    newC = np.zeros((num_pop, m))

    return np.array([C0, AC, newC])

In [3]:
adht_ward_df = pd.read_csv(os.path.join(data_db_dir, "long_files_8_25_2021", "counts_ward.csv" ), parse_dates=['date'])

date_start = pd.to_datetime('2020-02-01')
date_end   = pd.to_datetime('2021-02-28')

dates_sim  = pd.date_range(date_start, date_end)

adht_ward_df   = adht_ward_df[adht_ward_df.date.isin(dates_sim)]
#selected_ward = ['Allen Hospital', 'Harkness Pavilion', 'Milstein Hospital', 'Mschony', 'Presbyterian Hospital']

A_df     = pd.pivot(adht_ward_df, index='ward', columns='date', values='num_admitted')
D_df     = pd.pivot(adht_ward_df, index='ward', columns='date', values='num_discharged')
H_df     = pd.pivot(adht_ward_df, index='ward', columns='date', values='num_hospitalized')
tests_df = pd.pivot(adht_ward_df, index='ward', columns='date', values='num_tested')

pop        = H_df.mean(axis=1)
num_pop    = len(pop)
ward_names = pop.index

ward_num          = len(ward_names)
ward_transfers_df = pd.read_csv(os.path.join(data_db_dir, "long_files_8_25_2021", "transfers_ward.csv"), parse_dates=['date'])
ward_transfers_df = ward_transfers_df[ward_transfers_df.date.isin(dates_sim)]

M_df = np.zeros((ward_num, ward_num, len(dates_sim)+1))

for i in range(ward_num):
    ward_from = ward_names[i]
    for j in range(ward_num):
        ward_to      = ward_names[j]
        transfers_ij = ward_transfers_df[(ward_transfers_df.ward_from==ward_from) & (ward_transfers_df.ward_to==ward_to)]

        if(transfers_ij.shape[0] > 0) :
            dates_ij                = transfers_ij.date.values
            dates_ind               = np.where(np.in1d(dates_ij, dates_sim))[0]
            transfered              = transfers_ij.num_transfered.values
            M_df[i, j, dates_ind-1] = transfered

In [4]:
# parameters
if_settings = {
   "Nif"                : 50,          # number of iterations of the IF
   "type_cooling"       : "geometric", # type of cooling schedule
   "shrinkage_factor"   : 0.9,         # shrinkage factor for the cooling schedule
   "inflation"          : 1.01,        # inflation factor for spreading the variance after the EAKF step
}

model_settings = {
    "param_name"  : ["ρ", "β"],   # importation and transmission rate
    "p"           : 2,              # number of parameters
    "k"           : num_pop,        # number of observations | We are just observing carriage
    "n"           : 3*num_pop,      # number of state variables / dimension of the state space
    "dt"          : 1,              # time step
    "T"           : len(dates_sim), # time to run
    "m"           : 300,           # number of ensembles
    "stochastic"  : True,           # is stochastic
    "num_pop"     : num_pop,
    "dates"       : dates_sim
    }

p = model_settings["p"]
m = model_settings["m"]
T = model_settings["T"]

delta = 1/120  # decolonization rate

A = A_df.to_numpy() # admissions       shape: [num_pop, T]
D = D_df.to_numpy() # discharges       shape: [num_pop, T]
H = H_df.to_numpy() # hospitalizations shape: [num_pop, T]
M = M_df            # transfers        shape: [num_pop, num_pop, T]

#tests = tests_df.to_numpy()
tests = np.zeros((num_pop, T))
tests = tests_df.to_numpy()


In [38]:
n = model_settings["n"]
k = model_settings["k"]
m = model_settings["m"]
T = model_settings["T"]

γ = 20/100
ρ = 0.5
β = 0.2

θsim          = np.array([[ρ, β]]).T * np.ones((2, m)) # true parameters

process_model = lambda t, x, θ: f(t, x,
                                    gamma = γ * np.ones(m),
                                    beta  = θ[1, :],
                                    delta = delta,
                                    Nmean = np.expand_dims(pop, -1),
                                    N     = H[:, [t]],
                                    A     = A[:, [t]],
                                    D     = D[:, [t]],
                                    M     = M[:, :, t])


def g(t, x, N, rho, num_tests, model_settings):
    """ Observational model
        Args:
            t (int):      Time
            x (np.array): State space
            rho (float):  Observation probability
        Returns:
            y (np.array): Observed carriers ~ Binomial(C, rho)
    """

    m       = model_settings["m"]
    num_pop = model_settings["num_pop"]
    C       = x[0, :, :]

    with np.errstate(divide='ignore', invalid='ignore'):
        observed_colonized = np.random.binomial(list(num_tests * np.ones((num_pop, m))), rho * np.clip(np.nan_to_num(C/N), 0, 1))

    return observed_colonized

observational_model = lambda t, x, θ: g(t, x,
                                        rho            = θ[0, :],
                                        N              = H[:, [t]],
                                        num_tests      = tests[:, [t]],
                                        model_settings = model_settings)

init_state = lambda θ: f0(  N0 = H[:, 0],
                            c0             = γ, # importation rate
                            model_settings = model_settings)


x0 = init_state(θsim)
if(x0.shape[0] != n or x0.shape[1] != m) :
    print('error in x0 dimensions')

x_sim = np.full((3, model_settings["num_pop"], m, T), np.nan)
y_sim = np.full((k, m, T), np.nan)

x_sim[:, :, :, 0] = x0
y_sim[:, :, 0]    = observational_model(0, x0, θsim)
for t in range(1, T-1):
    x_sim[:, :, :, t] = process_model(t, x_sim[:, :, :, t-1], θsim)
    y_sim[:, :, t]    = observational_model(t, x_sim[:, :, :, t], θsim)


error in x0 dimensions


  c = np.clip(np.nan_to_num(C/N), 0, 1)
  c = np.clip(np.nan_to_num(C/N), 0, 1)
