In [1]:
from scipy.stats import truncnorm
import pandas as pd
import numpy as np
import itertools
import datetime
import tqdm
import sys
import os

sys.path.insert(0, "../")

from matplotlib import dates as mdates
from utils_local import plot_utils
import matplotlib.pyplot as plt


def flatten_list(list_array):
    return list(itertools.chain(*list_array))


from global_config import config

results_dir           = config.get_property('results_dir')
results2_dir          = config.get_property('results2_dir')
data_dir              = config.get_property('data_dir')
paper_dir             = config.get_property('paper_dir')
data_db_dir           = config.get_property('data_db_dir')

feb_hosp_records_path = os.path.join(data_db_dir, 'long_files_8_25_2021')
path_to_save          = os.path.join(results_dir, "real_testing", "community")
COLOR_LIST1           = ["#F8AFA8", "#FDDDA0", "#F5CDB4", "#74A089"]


In [15]:
dates_simulation = pd.date_range(start="2020-02-01", end="2021-02-28", freq="D")

movement_df                  = pd.read_csv(os.path.join(data_db_dir, "long_files_8_25_2021", 'patient_movement_2022-Nov.csv'), parse_dates=['date']).drop_duplicates(subset=["date", "mrn"], keep="first")
movement_df["ward_total"]    = movement_df.apply(lambda x: x["ward"]+"-"+x["building"]+"-"+x["place"], axis=1)
movement_df                  = movement_df[movement_df["date"].isin(dates_simulation)]

mrd2id                       = {mrn: id for id, mrn in enumerate(movement_df.mrn.unique())}
ward2id                      = {ward_name: id for id, ward_name in enumerate(np.sort(movement_df.ward_total.unique()))}

movement_df["mrn_id"]        = movement_df.mrn.map(mrd2id)
movement_df["ward_id"]       = movement_df.ward_total.map(ward2id)

ward_size_df                 = movement_df.reset_index()
ward_size_df["ward_id"]      = ward_size_df["ward_total"].apply(lambda x: ward2id[x])
ward_size_df["num_patients"] = 1
ward_size_df                 = ward_size_df.groupby(["date", "ward", "ward_id"]).sum()[["num_patients"]].reset_index().drop(columns=["date"])
ward_size_df                 = ward_size_df.groupby(["ward", "ward_id"]).mean().reset_index().sort_values(by="num_patients")
ward2size                    = {r.ward_id: r.num_patients for idx_r, r in ward_size_df.iterrows()}


wardid2building = {r.ward_id: r.ward_total.split("-")[1] for _, r in movement_df[["ward_total","ward_id"]].drop_duplicates().sort_values(by="ward_id").reset_index(drop=True).iterrows()}
wardid2site     = {r.ward_id: r.ward_total.split("-")[2] for _, r in movement_df[["ward_total","ward_id"]].drop_duplicates().sort_values(by="ward_id").reset_index(drop=True).iterrows()}

ward_size_df["building"] = ward_size_df["ward_id"].apply(lambda x: wardid2building[x])
ward_size_df["site"]     = ward_size_df["ward_id"].apply(lambda x: wardid2site[x])

  ward_size_df                 = ward_size_df.groupby(["date", "ward", "ward_id"]).sum()[["num_patients"]].reset_index().drop(columns=["date"])


In [24]:
ward_size_df[ward_size_df.num_patients < 2].head(30).sort_values(by="building", ascending=False)[["ward", "building", "site"]]

Unnamed: 0,ward,building,site
200,Vcl 3 Col Iictr,Vanderbilt Clinic,Columbia
103,Mil 6 Hudson North,Mschony,Chony
60,Hrt Cardiac Care,Mschony,Chony
97,Mil 6 Garden North,Mschony,Chony
80,Mil 4 Micu A,Mschony,Chony
73,Mil 1 Ip Dialysis,Milstein Hospital,Columbia
64,Hrt Myrna Daniels Infusion,Milstein Hospital,Columbia
61,Hrt Cardio Thoracic,Milstein Family Heart Ctr,Columbia
214,Zzmil 9 Hudson South,Milstein Family Heart Ctr,Columbia
130,Mil Operating Room,Milstein Family Heart Ctr,Columbia


In [33]:
selected_buildings = ['Allen Hospital', 'Harkness Pavilion', 'Milstein Hospital', 'Mschony', 'Presbyterian Hospital', "Rest"]
building2id        = {selected_buildings[i]: i for i in range(len(selected_buildings))}
building2id[5]     = "Rest"

def building2building(building):
    if building in selected_buildings:
        return building
    else:
        return "Rest"

ward_size_df["building_2"] = ward_size_df["building"].apply(lambda x: building2building(x))

ward_size_df[ward_size_df["building_2"] == "Rest"].sort_values(by="num_patients", ascending=False).head(20)[["ward", "building", "site", "num_patients"]]

Unnamed: 0,ward,building,site,num_patients
63,Hrt Myrna Daniels Infusion,Milstein Family Heart Ctr,Columbia,88.151515
41,Hip 14 Adult Infusion,Herbert Irving Pavilion,Columbia,59.136029
56,Hrt 4 Col Proc Card Med,Milstein Family Heart Ctr,Columbia,51.944637
45,Hip 7 Col Ped Hem Onc Bmt,Herbert Irving Pavilion,Columbia,24.215613
199,Vc Col Path Lab Outreach,Vanderbilt Clinic,Columbia,16.131488
47,Hip 9 Research Infusion,Herbert Irving Pavilion,Columbia,15.842697
49,Hip Laboratory Grdn,Herbert Irving Pavilion,Columbia,12.867925
33,Avn Mammography,Avon,Columbia,10.712598
43,Hip 5 Col Proc Vasc Lab,Herbert Irving Pavilion,Columbia,9.217391
38,Eye Surgery,Eye Institute,Columbia,8.425974
