In [11]:
from scipy.stats import truncnorm
import pandas as pd
import numpy as onp
import itertools
import datetime
import tqdm
import sys
import os

import matplotlib.pyplot as plt

def flatten_list(list_array):
    return list(itertools.chain(*list_array))

sys.path.insert(0,"../")
from global_config import config

results_dir           = config.get_property('results_dir')
data_dir              = config.get_property('data_dir')
paper_dir             = config.get_property('paper_dir')
data_db_dir           = config.get_property('data_db_dir')
feb_hosp_records_path = os.path.join(data_db_dir, 'long_files_8_25_2021')
path_to_save          = os.path.join(results_dir, "real_testing", "community")


COLOR_LIST1 = ["#F8AFA8", "#FDDDA0", "#F5CDB4", "#74A089"]

In [12]:
dates_simulation = pd.date_range(start="2020-02-01", end="2021-02-28", freq="D")

movement_df               = pd.read_csv(os.path.join(data_db_dir, "long_files_8_25_2021", 'patient_movement_2022-Nov.csv'), parse_dates=['date']).drop_duplicates(subset=["date", "mrn"], keep="first")
movement_df["ward_total"] = movement_df.apply(lambda x: x["ward"]+"-"+x["building"]+"-"+x["place"], axis=1)
movement_df               = movement_df[movement_df["date"].isin(dates_simulation)]

mrd2id  = {mrn: id for id, mrn in enumerate(movement_df.mrn.unique())}
ward2id = {ward_name: id for id, ward_name in enumerate(movement_df.ward_total.unique())}

movement_df["mrn_id"]        = movement_df.mrn.map(mrd2id)
movement_df["ward_id"]       = movement_df.ward_total.map(ward2id)

ward_size_df                 = movement_df.reset_index()
ward_size_df["ward_id"]      = ward_size_df["ward_total"].apply(lambda x: ward2id[x])
ward_size_df["num_patients"] = 1
ward_size_df                 = ward_size_df.groupby(["date", "ward", "ward_id"]).sum()[["num_patients"]].reset_index().drop(columns=["date"])
ward_size_df                 = ward_size_df.groupby(["ward", "ward_id"]).mean().reset_index().sort_values(by="num_patients")
ward2size                    = {r.ward_id: r.num_patients for idx_r, r in ward_size_df.iterrows()}

id2ward                      = dict((v, k) for k, v in ward2id.items())

###-###-###-###-###-###-###-###-###-###-###-###
cluster_diag_df              = pd.read_csv(os.path.join("..", "data", "infomap_nondiag.csv"), sep=" ").rename(columns={"node_id": "ward_id"})
cluster_diag_df["ward_name"] = cluster_diag_df["ward_id"].map(id2ward)
cluster_diag_df["cluster"]   = cluster_diag_df.apply(lambda x: int(str(x.path).split(":")[0]), axis=1)
cluster_diag_df              = cluster_diag_df[["cluster", "ward_id", "ward_name"]].sort_values(by="cluster")
cluster_diag_df['num_wards'] = cluster_diag_df["cluster"].apply(lambda x: onp.sum(cluster_diag_df["cluster"] == x))

cluster_diag_df["cluster"][cluster_diag_df.cluster>=6] = 6
cluster_diag_df["cluster"] = cluster_diag_df["cluster"].map(lambda x: int(x-1))

cluster_diag_df['num_wards']                           = cluster_diag_df["cluster"].apply(lambda x: onp.sum(cluster_diag_df["cluster"] == x))
###-###-###-###-###-###-###-###-###-###-###-###

wardid2cluster         = dict(zip(cluster_diag_df["ward_id"], cluster_diag_df["cluster"]))
movement_df["cluster"] = movement_df["ward_id"].map( wardid2cluster )
movement_df["cluster"] = movement_df["cluster"].fillna(cluster_diag_df.cluster.max())
movement_df["cluster"] = movement_df["cluster"].map(lambda x: int(x))


  ward_size_df                 = ward_size_df.groupby(["date", "ward", "ward_id"]).sum()[["num_patients"]].reset_index().drop(columns=["date"])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cluster_diag_df["cluster"][cluster_diag_df.cluster>=6] = 6


In [13]:
class Patient:
    susceptible = 0
    colonized   = 1

class Observed:
    no  = 0
    yes = 1

In [14]:
def amr_abm(t, agents_state, gamma, beta, alpha, movement, ward2size, Np):
    """ Agent based model tracking colonized and susceptible patients with pre-defined movement patterns.

    Args:
        agents_state : agent state. {0: Patient.susceptible, 1: Patient.colonized}  Size: (n_patients)
        movement     : pd.Dataframe with patient locations and culture information.
        parameters   : dictionary of parameters, contains importation rate (gamma), nosocomial transmission rate (beta),
                        effective sensitivity (ro), and decolonization rate (alpha)
    """
    agents_state = onp.array(agents_state)
    _, m = agents_state.shape

    γ   = gamma            # importation rate.
    β   = beta             # nosocomial transmission rate.
    α   = alpha            # decolonization rate.
    Np  = Np               # number of patients

    # Decolonize patients according to. P(C2S) = α
    #agents_state.at[:].set(np.maximum(agents_state - agents_state * (random.uniform(key=key, shape=(Np, )) < α), 0))
    #agents_state = agents_state - agents_state * (onp.random.uniform(size=(Np, )) < α)
    p_update = agents_state.copy()
    p_update = Patient.susceptible * (agents_state * onp.random.random(size=(Np, m)) <= α)

    # import patients.
    new_patients = movement[movement["first_day"]==1]["mrn_id"].values
    if new_patients.shape[0] > 0:
        # P(S2C) = \gamma - Probability of colonization given importation.
        p_update[new_patients, :] = Patient.colonized * (onp.random.random(size=(new_patients.shape[0], m)) <= γ)

    # Compute force of infection for each ward.
    for i, ward_id in enumerate(movement["ward_id"].unique()):
        patients_ward = movement[movement["ward_id"]==ward_id]["mrn_id"].values

        # λ_i = β  * C / N  - Force of infection for ward i.
        λ_i = β * onp.sum(p_update[patients_ward, :]==Patient.colonized) / ward2size[ward_id]

        # P(C2S)_i = λ_i, we add the state but if already colonized the state would be 2 so we clip it to 1.
        p_update[patients_ward, :] = p_update[patients_ward, :] + Patient.colonized * (onp.random.random(size=(patients_ward.shape[0], m)) <= λ_i)
    p_update = onp.clip(p_update, 0, 1)
    return p_update

def observe_cluster(t, patients_state, movement, rho, Nc):
    _, m           = patients_state.shape

    ρ              = rho # effective sensitivity.
    Nc             = Nc  # number of clusters

    cluster_positive  = onp.zeros((Nc, m))
    p_test            = Observed.yes * (onp.random.random(size=(patients_state.shape[0], m)) <= patients_state * ρ)

    for i, cluster in enumerate(movement["cluster"].unique()):
        patients_test_ward            = movement.query(f"cluster=={cluster} and test==True")["mrn_id"].values
        cluster_positive[cluster,  :] = onp.sum(p_test[patients_test_ward, :]    == Observed.yes, axis=0)
    return cluster_positive

def f0(parameters, model_settings):
    """ Initial state of the model.
    """
    Np = parameters["Np"]     # number of patients.
    m  = model_settings["m"]  # number of ensembles.

    patient_state = onp.zeros((Np, m))
    return patient_state

In [15]:
def simulate_scenario(gamma, beta, rho, alpha, data_movement, model_settings):

    dates_simulation = pd.to_datetime(onp.sort(data_movement.date.unique()))

    θ = onp.array([[beta], [rho]]) * onp.ones((model_settings["p"], model_settings["m"]))
    x = onp.zeros((model_settings["n"], model_settings["m"]))

    f0_if  = lambda θ:       amr_abm(0, x, gamma, θ[0, :], alpha, movement_df[movement_df["date"]==dates_simulation[0]], ward2size, model_settings["n"])
    f_if   = lambda t, x, θ: amr_abm(t, x, gamma, θ[0, :], alpha, movement_df[movement_df["date"]==dates_simulation[t]], ward2size, model_settings["n"])
    g_if   = lambda t, x, θ: observe_cluster(t, x, movement_df[movement_df["date"]==dates_simulation[t]], θ[1, :], Nc=model_settings["k"])

    # init state space
    x = f0_if(θ)

    observations          = onp.full((len(dates_simulation), model_settings["k"], model_settings["m"]), onp.nan)
    observations[0, :, :] = g_if(0, x, θ)

    for t, date in tqdm.tqdm(enumerate(dates_simulation[1:])):
        x                       = f_if(t, x, θ)
        observations[t+1, :, :] = g_if(t, x, θ)

    return observations

In [16]:
sys.path.insert(0, "./pompjax/pompjax")

from pyro.contrib.forecast import eval_crps
from eval import calibration

def compute_evals(samples, obs, beta, rho,  name_var="beta"):
    """_summary_

    Args:
        samples (_type_): num_ensembles x num_times
        obs (_type_): time series observation

    Returns:
        _type_: _description_
    """

    cal_df = calibration.calibration(onp.expand_dims(samples.T, 0), onp.expand_dims(obs, 0), observation_index=0)
    sc     = onp.mean(onp.abs(cal_df.quantiles.values-cal_df.proportion_inside.values))

    df_response              = pd.DataFrame(columns=['crps', 'calibration_score', name_var, "rho"])
    df_response['crps']      = [eval_crps(samples, obs)]
    df_response["calibration_score"] = sc
    df_response[name_var]    = [beta]
    df_response['rho']       = [rho]

    return df_response


ModuleNotFoundError: No module named 'eval'

In [None]:
import torch

def grid_search(amro, prevalence, movement_df, model_settings):
    alpha = 1/120
    gamma = prevalence


    dates_resamp    = pd.date_range(start="2020-02-01", end="2021-03-01", freq="W-Sun")

    rho_search      = onp.arange(0.05, 0.2, 0.02)
    beta_search     = onp.arange(0.005, 0.04+0.001, 0.005)

    test_df         = movement_df.copy(); test_df = test_df[test_df["test"]==1]
    amro_df         = test_df.copy()
    amro_df["keep"] = amro_df["organism_name"].apply(lambda x: amro in str(x))
    amro_df         = amro_df[amro_df["keep"]==True]
    amro_df         = amro_df.groupby(["date", "cluster"]).sum()[["test"]].unstack([1]).fillna(0).resample("W-Sun").sum()
    amro_df         = amro_df.xs("test", axis=1, drop_level=True)
    amro_df         = amro_df.sum(axis=1)
    amro_df         = amro_df.reindex(dates_resamp, fill_value=0)

    # sum across clusters
    obs_amro  = amro_df.values

    metric_df = pd.DataFrame()
    for rho in rho_search:
        for beta in beta_search:
            observations       = simulate_scenario(gamma, beta, rho, alpha, movement_df, model_settings)
            observations       = onp.sum(observations, axis=1)
            sim_df             = pd.DataFrame(columns=["date","ens_id", "values", "scenario"])
            sim_df["values"]   = observations.flatten()
            sim_df["date"]     = flatten_list([ [date]*model_settings["m"]  for date in  list(model_settings["dates"])])
            sim_df["ens_id"]   = list(range(model_settings["m"] )) * len(model_settings["dates"])
            sim_df["rho"]      = rho

            samples_t  = sim_df.set_index(["date", "ens_id", "rho"]).unstack([1, 2]).resample("W-Sun").sum().stack().stack().reset_index()
            samples_t  = pd.pivot(data=samples_t, index="date", columns="ens_id", values="values").to_numpy().T
            samples_t  = torch.tensor(samples_t);  obs_t  = torch.tensor(list(obs_amro))

            df_metrics = compute_evals(samples_t, obs_t, beta, rho)
            metric_df  = pd.concat([metric_df, df_metrics])

    return metric_df

In [None]:
def amro2cute(amro):
    if amro == 'ESCHERICHIA COLI':
        return "e_coli"
    elif amro == 'KLEBSIELLA PNEUMONIAE':
        return "k_pneumoniae"
    elif amro=="PSEUDOMONAS AERUGINOSA":
        return "p_aeruginosa"
    elif amro=="METHICILLIN-SUSCEPTIBLE STAPHYLOCOCCUS AUREUS":
        return "mssa"
    elif amro=="METHICILLIN-RESISTANT STAPHYLOCOCCUS AUREUS":
        return "mrsa"
    elif amro=="STAPHYLOCOCCUS EPIDERMIDIS":
        return "s_epidermidis"
    elif amro=="ENTEROCOCCUS FAECALIS":
        return "e_faecalis"
    elif amro=="ENTEROCOCCUS FAECIUM":
        return "e_faecium"

In [None]:
amro_prevalence_df = pd.read_csv("data/amro_prevalence.csv")

model_settings          = {}
model_settings["m"]     = 100
model_settings["p"]     = 2
model_settings["n"]     = movement_df.mrn_id.unique().shape[0]
model_settings["k"]     = movement_df.cluster.unique().shape[0]
model_settings["dates"] = pd.date_range(start="2020-02-01", end="2021-02-28", freq="D")

amro_search = ['ESCHERICHIA COLI', 'KLEBSIELLA PNEUMONIAE', 'PSEUDOMONAS AERUGINOSA', 'METHICILLIN-SUSCEPTIBLE STAPHYLOCOCCUS AUREUS',
                'METHICILLIN-RESISTANT STAPHYLOCOCCUS AUREUS', 'STAPHYLOCOCCUS EPIDERMIDIS', 'ENTEROCOCCUS FAECALIS', 'ENTEROCOCCUS FAECIUM']


In [None]:
for amro in amro_search:
    if os.path.isfile( os.path.join("..", "preliminary_results", f"crps_{amro2cute(amro)}.csv") ):
        continue
    gamma        = amro_prevalence_df[amro_prevalence_df.amro==amro]["prevalence_mean1"].values[0]/100
    crps_amro_df = grid_search( amro, gamma, movement_df, model_settings )
    crps_amro_df.to_csv( os.path.join("..", "preliminary_results", f"crps_{amro2cute(amro)}.csv") )
