In [1]:
from scipy.stats import truncnorm
import pandas as pd
import numpy as onp
import itertools
import datetime
import tqdm
import sys
import os

import jax.numpy as np

def flatten_list(list_array):
    return list(itertools.chain(*list_array))

sys.path.insert(0,"../")
from global_config import config

results_dir           = config.get_property('results_dir')
data_dir              = config.get_property('data_dir')
paper_dir             = config.get_property('paper_dir')
data_db_dir           = config.get_property('data_db_dir')
feb_hosp_records_path = os.path.join(data_db_dir, 'long_files_8_25_2021')
path_to_save          = os.path.join(results_dir, "real_testing", "community")


COLOR_LIST1 = ["#F8AFA8", "#FDDDA0", "#F5CDB4", "#74A089"]

In [2]:

path_to_data = os.path.join('..', 'data')

date_min            = pd.to_datetime("2020-02-01")
date_max            = pd.to_datetime("2021-02-28")

dates_simulation    = pd.date_range(start=date_min, end=date_max)
#patient_movement_df = pd.read_csv(os.path.join(path_to_data, 'patient_movement_2022-Nov.csv'), index_col=None)
#patient_movement_df = pd.read_csv(os.path.join(path_to_data, 'patient_movement_2023-Feb.csv'), index_col=None)
#patient_movement_df = pd.read_csv(os.path.join(path_to_data, 'patient_data_Feb_21_2023.csv'), index_col=None)

patient_df = pd.read_csv(os.path.join(data_db_dir, "long_files_8_25_2021", "patient_movement_2022-Nov.csv"), parse_dates=['date'])
patient_df = patient_df.drop_duplicates(['date','mrn'])

#duplicated_pos_tests = (patient_df[['mrn','organism_name']].duplicated() & ~patient_df['organism_name'].isnull())
duplicated_pos_tests = (patient_df[['encounter_id','organism_name']].duplicated() & ~patient_df['organism_name'].isnull())

patient_df.loc[duplicated_pos_tests,'test']          = 0
patient_df.loc[duplicated_pos_tests,'organism_name'] = np.nan


In [3]:
patient_movement_df = patient_df[["date", "first_day", "test", "encounter_id", "mrn", "place", "building", "ward", "type_discharge",
                                            "type_facility", "procedure_description", "specimen_group", "organism_name", "specimen"]]
patient_movement_df["ward_long"] = patient_movement_df.apply(lambda x: x["place"]+"-"+x["building"]+"-"+x["ward"], axis=1)
patient_movement_df["date"]      = pd.to_datetime(patient_movement_df["date"])

#####
ward2num            = patient_movement_df[["place", "building", "ward"]].drop_duplicates().reset_index(drop=True)
ward2num["ward"]    = ward2num.apply(lambda x: x["place"]+"-"+x["building"]+"-"+x["ward"], axis=1)
ward2num["ward_id"] = list(range(len(ward2num)))
ward2num            = {row.ward: row.ward_id for idx, row in ward2num.iterrows()}
#####

#####
num2ward            = {ward2num[kew_row]: kew_row  for kew_row in ward2num.keys()}
#####

#####
mrn2id            = patient_movement_df[["mrn"]].drop_duplicates().reset_index(drop=True)
mrn2id["mrn_id"]  = list(range(len(mrn2id)))
mrn2id            = {row.mrn: row.mrn_id for idx, row in mrn2id.iterrows()}
#####

patient_movement_df["mrn_id"]  = patient_movement_df["mrn"].map(mrn2id)
patient_movement_df["ward_id"] = patient_movement_df["ward_long"].map(ward2num)

patient_movement_df = patient_movement_df.drop(columns=["ward_long", "mrn"])
patient_movement_df = patient_movement_df[["date", "first_day", "test", "encounter_id", "mrn_id", "ward_id", "place", "building", "ward", "type_discharge",
                                            "type_facility", "procedure_description", "specimen_group", "organism_name", "specimen"]]

patient_movement_df["ward_total"] = patient_movement_df.apply(lambda x: x["place"]+"-"+x["building"]+"-"+x["ward"], axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  patient_movement_df["ward_long"] = patient_movement_df.apply(lambda x: x["place"]+"-"+x["building"]+"-"+x["ward"], axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  patient_movement_df["date"]      = pd.to_datetime(patient_movement_df["date"])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pat

In [4]:
movement_df           = patient_movement_df.copy() # Movement between pair of wards
movement_df.first_day = 0

wards              = patient_movement_df.ward_total.unique()
counts_ward_df     = pd.DataFrame(columns=["date", "ward_total", "num_admitted",
                                                "num_discharged", "num_hospitalized", "num_tested", "num_outpatients"])

transfered_ward_df = pd.DataFrame(columns=["date", "ward_from", "ward_to","num_transfered"])


today         = dates_simulation[0]
today_df      = movement_df[movement_df["date"]==today]

for ward in wards:
    admitted_num     = len(today_df)
    discharged_num   = 0
    hospitalized     = today_df[today_df.ward_total==ward]
    hospitalized_num = hospitalized.shape[0]
    tested_num       = hospitalized[hospitalized.test==1].shape[0]
    outpatients_num  = hospitalized[hospitalized.type_facility=='Outpatient'].shape[0]
    counts_ward_df   = pd.concat([counts_ward_df,
                                    pd.DataFrame([[today, ward, admitted_num, discharged_num, hospitalized_num, tested_num, outpatients_num]],
                                    columns=["date", "ward", "num_admitted", "num_discharged", "num_hospitalized", "num_tested", "num_outpatients"])])


for date in tqdm.tqdm(dates_simulation[1:]):

    today     = pd.to_datetime(date)
    yesterday = pd.to_datetime(date) + datetime.timedelta(days=-1)

    today_df     = movement_df[movement_df["date"]==today]
    yesterday_df = movement_df[movement_df["date"]==yesterday]

    mrns_today     = today_df.mrn_id.unique()    # people that were in the hospital today
    mrns_yesterday = yesterday_df.mrn_id.unique() # people that were in the hospital yesterday

    mrns_today_and_yesterday = list(set(mrns_today) & set(mrns_yesterday)) # people that were in both days (i.e. they stayed in the hospital)

    discharged_df = yesterday_df[~yesterday_df.mrn_id.isin(mrns_today)]
    admitted_df   = today_df[~today_df.mrn_id.isin(mrns_yesterday)]

    movement_df.loc[admitted_df.index.values,'first_day'] = 1

    transfered_today_df     = today_df[today_df.mrn_id.isin(mrns_today_and_yesterday)]
    transfered_yesterday_df = yesterday_df[yesterday_df.mrn_id.isin(mrns_today_and_yesterday)]

    transfered_df           = pd.merge(transfered_yesterday_df[['mrn_id','ward_total']], transfered_today_df[['mrn_id','ward_total']], on='mrn_id')
    transfered_df           = transfered_df[transfered_df.ward_total_x!=transfered_df.ward_total_y]
    transfered_df           = transfered_df.groupby(['ward_total_x','ward_total_y']).count()
    transfered_df           = transfered_df.reset_index()
    transfered_df           = transfered_df.rename(columns={'ward_total_x':'ward_from','ward_total_y':'ward_to','mrn_id':'num_transfered'})
    transfered_df['date']   = date
    transfered_ward_df      = pd.concat([transfered_ward_df, transfered_df], ignore_index=True)

    for ward in wards:
        admitted_num     = admitted_df[admitted_df.ward_total==ward].shape[0]
        discharged_num   = discharged_df[discharged_df.ward_total==ward].shape[0]
        hospitalized     = today_df[today_df.ward_total==ward]
        hospitalized_num = hospitalized.shape[0]
        tested_num       = hospitalized[hospitalized.test==1].shape[0]
        outpatients_num  = hospitalized[hospitalized.type_facility=='Outpatient'].shape[0]
        counts_ward_df   = pd.concat([counts_ward_df,
                                        pd.DataFrame([[date, ward, admitted_num, discharged_num, hospitalized_num, tested_num, outpatients_num]],
                                        columns=["date", "ward", "num_admitted", "num_discharged", "num_hospitalized", "num_tested", "num_outpatients"])])



 36%|███▌      | 141/393 [11:19<37:07,  8.84s/it]

In [None]:
counts_ward_df.to_csv(     os.path.join(data_db_dir, "long_files_8_25_2021", "counts_ward.csv" ),   index=False)
transfered_ward_df.to_csv( os.path.join(data_db_dir, "long_files_8_25_2021", "transfers_ward.csv"), index=False)


In [None]:
counts_ward_df