In [2]:
from scipy.stats import truncnorm
import pandas as pd
import numpy as np
import itertools
import datetime
import tqdm
import sys
import os

def flatten_list(list_array):
    return list(itertools.chain(*list_array))

sys.path.insert(0,"../")
from global_config import config

results_dir           = config.get_property('results_dir')
results2_dir           = config.get_property('results2_dir')

data_dir              = config.get_property('data_dir')
paper_dir             = config.get_property('paper_dir')
data_db_dir           = config.get_property('data_db_dir')
feb_hosp_records_path = os.path.join(data_db_dir, 'long_files_8_25_2021')
path_to_save          = os.path.join(results_dir, "real_testing", "community")

COLOR_LIST1           = ["#F8AFA8", "#FDDDA0", "#F5CDB4", "#74A089"]

In [3]:
def create_population_data(path_to_file, date_start=pd.to_datetime("2020-02-01"), date_end=pd.to_datetime("2021-02-28")):

    dates_simulation = pd.date_range(start=date_start, end=date_end, freq="D")

    data_df  = pd.read_csv( path_to_file, parse_dates=['date'])
    data_df  = data_df[data_df.date.isin(dates_simulation)]
    A_df     = pd.pivot(data_df, index='ward', columns='date', values='num_admitted')
    D_df     = pd.pivot(data_df, index='ward', columns='date', values='num_discharged')
    H_df     = pd.pivot(data_df, index='ward', columns='date', values='num_hospitalized')
    tests_df = pd.pivot(data_df, index='ward', columns='date', values='num_tested')
    Hmean_df = H_df.mean(axis=1)

    return A_df, D_df, H_df, tests_df, Hmean_df

def create_time_transfers(path_to_file, num_wards, ward_names, date_start=pd.to_datetime("2020-02-01"), date_end=pd.to_datetime("2021-02-28")):

    dates_simulation = pd.date_range(start=date_start, end=date_end, freq="D")
    transfers_df     = pd.read_csv(path_to_file, parse_dates=['date'])
    transfers_df     = transfers_df[transfers_df.date.isin(dates_simulation)]
    M_df             = np.zeros((num_wards, num_wards, len(dates_simulation)+1))

    for i in range(num_wards):
        ward_from = ward_names[i]
        for j in range(num_wards):
            ward_to      = ward_names[j]
            transfers_ij = transfers_df[(transfers_df.ward_from==ward_from) & (transfers_df.ward_to==ward_to)]

            if(transfers_ij.shape[0] > 0) :
                dates_ij                = transfers_ij.date.values
                dates_ind               = np.where(np.in1d(dates_ij, dates_simulation))[0]
                transfered              = transfers_ij.num_transfered.values
                M_df[i, j, dates_ind-1] = transfered

    return M_df


In [4]:
path_to_ward_counts = os.path.join(data_db_dir, "long_files_8_25_2021", "counts_ward.csv" )
path_to_ward_transf = os.path.join(data_db_dir, "long_files_8_25_2021", "transfers_ward.csv" )


A_df, D_df, H_df, tests_df, Hmean_df = create_population_data(path_to_ward_counts)

num_wards  = len(Hmean_df)
ward_names = list(Hmean_df.index)
M_df       = create_time_transfers(path_to_ward_transf, num_wards=num_wards, ward_names=ward_names)

# we want to choose synthetic scenarios that overall reproduce the synthetic observations, so we are going to use the stuff above to sample from the parameter space to create the synthetic scenarios randomly.

In [60]:
from scipy.interpolate import UnivariateSpline

def return_score_cutoff(score, cut_off_prob=0.05):
    freq, score = np.histogram(score, bins=100, density=True)
    freq_cum    = np.cumsum(freq); freq_cum = freq_cum/freq_cum[-1]
    score       = score[1:]
    f_cum       = UnivariateSpline(score, freq_cum, s=0.001)
    sc_range    = np.linspace(np.min(score), np.max(score), 1000)
    score_cut   = sc_range[np.argmin(np.abs(f_cum(sc_range) * 100 - cut_off_prob*100))]
    return score_cut

In [61]:
cut_off_prob = 5/100

amro         = "ESCHERICHIA COLI"
gs_df        = pd.read_csv( os.path.join(results2_dir, "grid_search", "metapopulation", f"{amro2cute(amro)}.csv") ).drop(columns=["Unnamed: 0"])
sc_cutoff    = return_score_cutoff(gs_df.crps, cut_off_prob=5/100)
gs_df        = gs_df[gs_df.crps <= sc_cutoff].reset_index(drop=True)
scenarios_df = gs_df.copy()
scenarios_df = scenarios_df.sample(n=10); scenarios_df = scenarios_df[["rho", "beta", "crps", "calibration_score"]]



In [None]:
from pompjax.diagnostic_plots import convergence_plot
from pompjax.utils import create_df_response
from pompjax import ifeakf

def run_inference(inference_task, f, g, f0, if_settings, model_settings):
    id_run            = inference_task['id_run']
    scenario_name     = inference_task['scenario_name']
    scenario_idx      = inference_task['scenario_idx']
    scenarios_num     = inference_task['scenarios_num']
    obs_df            = inference_task['obs_df']

    parameters_range  = if_settings["parameters_range"]
    state_space_range = if_settings["state_space_range"]
    σ_perturb         = if_settings["σ_perturb"]

    ρ_truth        = model_settings["param_truth"][0]
    β_truth        = model_settings["param_truth"][1]
    num_pop        = model_settings["num_pop"]
    dates          = model_settings["dates"]

    print(f"Running run {id_run+1} of scenario {scenario_idx} out of {scenarios_num}, rho={ρ_truth}, beta={β_truth}")

    θ = np.array([ρ_truth, β_truth]).T * np.ones((2, 300))
    xsim, ysim    = simulate_meta_population_model(f,
                                                    g,
                                                    f0,
                                                    θ,
                                                    model_settings)

    idx_infer  =  np.random.randint(ysim.shape[1])
    obs_infer  = ysim[:, idx_infer, idx_dates_infer]

    obs_df = pd.DataFrame(index=dates_infer)
    for i in range(0, num_pop) :
        obs_df['y'+str(i+1)]   = obs_infer[i,:]
        obs_df['oev'+str(i+1)] = 1 +(0.2 * obs_df['y'+str(i+1)].values)**2
    obs_df                  = obs_df.resample("W-Sun").sum()
    obs_df.index.values[-1] = dates[-1]

    θmle, θpost    = ifeakf(process_model             = f,
                            observational_model       = g,
                            state_space_initial_guess = f0,
                            observations_df           = obs_df,
                            parameters_range          = parameters_range,
                            state_space_range         = state_space_range,
                            model_settings            = model_settings,
                            if_settings               = if_settings,
                            perturbation              = σ_perturb)

    path_to_save = os.path.join(results_dir, scenario_name)
    os.makedirs(path_to_save, exist_ok=True)
    np.savez_compressed(os.path.join(path_to_save, f"{str(id_run).zfill(3)}posterior.npz"),
                                    mle           = θmle,
                                    posterior     = θpost,
                                    state_space   = xsim,
                                    observations  = ysim,
                                    idx_infer     = idx_infer)

    # save convergence plot
    p_truth = model_settings["param_truth"]
    β_df    = create_df_response(θpost[1, :, :, :].mean(-2).T, if_settings["Nif"])
    rho_df  = create_df_response(θpost[0, :, :, :].mean(-2).T, if_settings["Nif"])

    p_df   = [rho_df, β_df]
    p_name = [r"$\rho$", r"$\beta$"]

    if(id_run==0) :
            fig, ax = plt.subplots(2, 1, figsize=(10.5, 9.2), sharex=True)
            convergence_plot(θmle, p_df, parameters_range, param_truth=p_truth, param_label=p_name, ax=ax, fig=fig,
                            path_to_save=os.path.join(path_to_save, f"{str(id_run).zfill(3)}convergence.png"))