In [1]:
from scipy.stats import truncnorm
import pandas as pd
import numpy as np
import itertools
import datetime
import tqdm
import sys
import os

def flatten_list(list_array):
    return list(itertools.chain(*list_array))

sys.path.insert(0,"../")
from global_config import config

results_dir           = config.get_property('results_dir')
data_dir              = config.get_property('data_dir')
data_db_dir           = config.get_property('data_db_dir')
feb_hosp_records_path = os.path.join(data_db_dir, 'long_files_8_25_2021')
path_to_save          = os.path.join(results_dir, "real_testing", "community")

# import plotting stuff
from utils.plot_utils import *

# import data loading stuff
from utils.data_utils import load_movement_df

In [2]:
def capitalize_string_df(df_cap, col_cap):
    df_cap[col_cap] = df_cap[col_cap].map(lambda x: ' '.join([w.lower().capitalize() for w in str(x).split(' ')] ) )
    return df_cap

dates_columns = ['covid_visits_start_date', 'adt_event_time', 'adt_ed_admission', 'adt_inpatient', 'adt_discharge']

#dates_columns = ['adt_event_time', 'adt_ed_admission', 'adt_inpatient', 'adt_discharge']
adt_df                        = pd.read_csv( os.path.join(feb_hosp_records_path , 'adt_long2021-08-25.csv'))
adt_df['adt_site_number_std'] = adt_df['adt_site_number_std'].replace({1: 'Columbia', 2:'Allen', 3:'Chony'})
adt_df                        = adt_df.drop(columns=['Unnamed: 0']).set_index('mrn')
adt_df["encounter_id"]        = adt_df["encounter_id"].map(lambda x: int(x))
adt_df                        = adt_df.rename(columns={'adt_site_number_std': 'place', 'adt_site': 'building', 'adt_location':'ward', 'room_num': 'room'})
adt_df                        = adt_df.rename(columns={'adt_event_time': 'date_event', 'adt_ed_admission': 'date_admission', 'adt_inpatient': 'date_inpatient', 'adt_discharge': 'date_discharge'})

adt_df                        = capitalize_string_df(adt_df, 'place')
adt_df                        = capitalize_string_df(adt_df, 'building')
adt_df                        = capitalize_string_df(adt_df, 'ward')
adt_df                        = capitalize_string_df(adt_df, 'room')

adt_df                   = adt_df.dropna(subset=['date_admission'])

# replace ill-dates (2026 -> 2021 and 2022 -> 2021)
adt_df["date_admission"] = adt_df["date_admission"].map(lambda x: x.replace("2022", "2021").replace("2026", "2021"))
adt_df["date_event"]     = adt_df["date_event"].map(lambda x:     x.replace("2022", "2021").replace("2026", "2021"))

adt_df["date_event"]     = pd.to_datetime( adt_df["date_event"],     format='%Y-%m-%d' ).dt.normalize()
adt_df["date_admission"] = pd.to_datetime( adt_df["date_admission"], format='%Y-%m-%d' ).dt.normalize()
adt_df["date_inpatient"] = pd.to_datetime( adt_df["date_inpatient"], format='%Y-%m-%d' ).dt.normalize()
adt_df["date_discharge"] = pd.to_datetime( adt_df["date_discharge"], format='%Y-%m-%d' ).dt.normalize()

# drop unreal admissions
adt_df                   = adt_df[adt_df.date_admission<pd.to_datetime("2021-08-25")]

# drop RoM column as we don't consider room as a scale.
adt_df =  adt_df[adt_df["room"]!='Pby Lab Outreach']
adt_df = adt_df.drop(columns=['room'])


In [3]:
ward_adt_care_level_df               = adt_df[["building", "place", "ward", "adt_care_level"]].reset_index().drop(columns=["mrn"]).drop_duplicates()
ward_adt_care_level_df["ward_coded"] = ward_adt_care_level_df.apply(lambda x: x["place"] + "-" + x["building"] + "-" + x["ward"], axis=1)
wards_df                             = ward_adt_care_level_df[["ward_coded"]].drop_duplicates()
wards_df

Unnamed: 0,ward_coded
0,Columbia-Milstein Hospital-Mil 5 Garden South
1,Columbia-26 Indian Rock-Rkl 26indrk Proc Card Chv
2,Columbia-Presbyterian Hospital-Pby Lab Outreach
3,Columbia-Milstein Hospital-Pby Adult Emergency
4,Columbia-Presbyterian Hospital-Pby 14 Transpla...
...,...
491232,Columbia-Milstein Family Heart Ctr-Mil 7 Hudso...
505567,Chony-Mschony-Mil 4 Micu A
512958,Columbia-Harkness Pavilion-Mil 8 Hudson South
517591,Columbia-Milstein Hospital-Law 3s Intensive Ca...


In [4]:
ward_adt_care_level_df["value"] = 1

wards_df                  = pd.pivot_table(ward_adt_care_level_df, values='value', index=['ward_coded'], columns=['adt_care_level'], aggfunc=np.sum, fill_value=0).reset_index()
wards_outpatients_df      = wards_df[wards_df.Outpatient == 1]
wards_outpatients_df      = wards_outpatients_df.set_index("ward_coded")
wards_only_outpatients_df = wards_outpatients_df.iloc[wards_outpatients_df.T.sum().values==1]


In [7]:
print( "Outpatients proportion {:.3f}".format(100 * len(wards_outpatients_df) / len(wards_df)) )
print( "Only outpatients proportion {:.3f}".format(100 * len(wards_only_outpatients_df) / len(wards_df)) )
print( "Proportion of only outpatient in outpatients proportion {:.3f}".format(100 * len(wards_only_outpatients_df) / len(wards_outpatients_df)) )



Outpatients proportion 43.210
Only outpatients proportion 19.342
Proportion of only outpatient in outpatients proportion 44.762
