In [1]:
from scipy.stats import truncnorm
import pandas as pd
import numpy as np
import itertools
import datetime
import tqdm
import sys
import os

def flatten_list(list_array):
    return list(itertools.chain(*list_array))

sys.path.insert(0,"../")
from global_config import config

results_dir           = config.get_property('results_dir')
data_dir              = config.get_property('data_dir')
data_db_dir           = config.get_property('data_db_dir')
feb_hosp_records_path = os.path.join(data_db_dir, 'long_files_8_25_2021')
path_to_save          = os.path.join(results_dir, "real_testing", "community")


In [2]:
from utils.data_utils import load_movement_df, ward2size
from utils.plot_utils import *

path_to_data = os.path.join('..', '..', 'data')

# load scenarios for synthetic inferences
scenarios_df              = pd.read_csv(os.path.join(path_to_data, 'scenarios.csv'))
movement_df, ward2cluster = load_movement_df(path_to_data, True) # movement data
ward2size                 = ward2size(movement_df)
ward2size                 = {r.ward_id: r.num_patients for idx_r, r in ward2size.iterrows()}

  movement_df, ward2cluster = load_movement_df(path_to_data, True) # movement data


In [3]:
date_min         = pd.to_datetime("2020-02-01")
date_max         = pd.to_datetime("2021-02-28")
dates_simulation = pd.date_range(start=date_min, end=date_max)

γ_prior  = [0.01, 0.9]
β_prior  = [0.001, 0.05]

param_prior_dict      = {}
param_prior_dict["γ"] = γ_prior
param_prior_dict["β"] = β_prior

# Agent based model settings.
abm_settings                     = {}
abm_settings["num_patients"]     = movement_df.mrn_id.unique().shape[0]
abm_settings["num_wards"]        = movement_df.ward_id.unique().shape[0]
abm_settings["num_clusters"]     = len(set(list(ward2cluster.values())))
abm_settings["dates"]            = dates_simulation
abm_settings["num_ensembles"]    = 300

# Iterated filtering settings.
if2_settings                     = {}
if2_settings["num_params"]       = len(param_prior_dict)
if2_settings["num_observations"] = len(set(list(ward2cluster.values())))
if2_settings["lambda_inf"]       = 1.01        # Inflation for the EAKF.
if2_settings["num_iters_mif"]    = 20          # Number of iterations.
if2_settings["alpha_mif"]        = 0.8         # Variance shrinking factor.
if2_settings["type_cooling"]     = "geometric" # Type of cooling.
if2_settings["num_ensembles"]    = 300
if2_settings["oev_variance"]     = 0.2


In [4]:
from utils.iterated_filtering_utils import IF2_eakf_ABM
from utils.model_utils import model_inference
from utils.infer_utils import *

def create_inference_dataframe(positive_sample, negative_sample):

    obs_chunk_df         = pd.DataFrame(columns=["date"] + [f"pos_{idx_c}" for idx_c in range(abm_settings["num_clusters"])])
    obs_chunk_df["date"] = abm_settings["dates"]

    neg_chunk_df         = pd.DataFrame(columns=["date"] + [f"pos_{idx_c}" for idx_c in range(abm_settings["num_clusters"])])
    neg_chunk_df["date"] = abm_settings["dates"]

    for idx_c in range(abm_settings["num_clusters"]):
        obs_chunk_df[f"pos_{idx_c}"] = positive_sample[:, idx_c]
        neg_chunk_df[f"pos_{idx_c}"] = negative_sample[:, idx_c]

    # Resample every week
    obs_w_chunk_df         = obs_chunk_df.set_index("date").resample("W-Sun").sum()
    neg_w_chunk_df         = neg_chunk_df.set_index("date").resample("W-Sun").sum()

    for idx_c in range(abm_settings["num_clusters"]):
        obs_w_chunk_df[f"oev_{idx_c}"]  = compute_oev(obs_w_chunk_df[f"pos_{idx_c}"] , var_obs=if2_settings["oev_variance"] )
        neg_w_chunk_df[f"oev_{idx_c}"]  = compute_oev(neg_w_chunk_df[f"pos_{idx_c}"] , var_obs=if2_settings["oev_variance"] )

    return obs_w_chunk_df, neg_w_chunk_df

In [5]:
RUN_ID = 2
ρ      = 6

for idx_row, row in scenarios_df.iterrows():
    θ      = {}
    θ['γ'] = row['γ']
    θ['β'] = row['β']
    θ['ρ'] = ρ / 100

    path_to_scenario    = os.path.join('..', '..', 'results', 'synthetic_inferences', f'ρ_{ρ}%', row.name_scenario)
    name_sims_save      = f'inference_{RUN_ID}.npz'
    path_save_inference = os.path.join(path_to_scenario, name_sims_save)

    path_to_scenario = os.path.join('..', '..', 'results', 'synthetic_inferences', f'ρ_{ρ}%', row.name_scenario)
    name_sims_save   = f"simulation_infer.npz"
    sim_samples      = np.load(os.path.join(path_to_scenario, name_sims_save))

    cluster_positive = sim_samples['cluster_positive'][:, :, sim_samples['idx_use']]
    cluster_negative = sim_samples['cluster_negative'][:, :, sim_samples['idx_use']]

    obs_w_chunk_df, neg_w_chunk_df = create_inference_dataframe(cluster_positive, cluster_negative)
    model_use                      =  lambda p_state, γ_m, β_m, α_m, movement: model_inference(p_state, γ_m, β_m, α_m, movement, ward2size, ward2cluster, θ['ρ'])
    obs_post_all_pos, obs_post_all_neg, para_post_all, param_iter, param_mean_iter = IF2_eakf_ABM(model_use, obs_w_chunk_df, neg_w_chunk_df, movement_df, param_prior_dict, if2_settings, abm_settings, perturb_time=True)

    np.savez_compressed(path_save_inference,
                        obs_posterior_pos  = obs_post_all_pos,
                        obs_posterior_neg  = obs_post_all_neg,
                        param_posterior    = para_post_all,
                        param_post_iter    = param_iter,
                        theta              = param_mean_iter)


Running MIF  



100%|██████████| 20/20 [3:45:52<00:00, 677.63s/it]  


Running MIF  



100%|██████████| 20/20 [3:46:18<00:00, 678.92s/it]  


Running MIF  



100%|██████████| 20/20 [5:50:15<00:00, 1050.79s/it]  


Running MIF  



100%|██████████| 20/20 [3:27:05<00:00, 621.27s/it]  


Running MIF  



100%|██████████| 20/20 [3:17:38<00:00, 592.95s/it]  


Running MIF  



100%|██████████| 20/20 [3:20:39<00:00, 601.98s/it]  


Running MIF  



100%|██████████| 20/20 [14:21:44<00:00, 2585.22s/it]  


Running MIF  



100%|██████████| 20/20 [20:36:55<00:00, 3710.75s/it]   


Running MIF  



100%|██████████| 20/20 [19:30:41<00:00, 3512.07s/it]   


Running MIF  



100%|██████████| 20/20 [48:59:22<00:00, 8818.11s/it]    
