# Length of stay in ED

Look at the length of stay in emergency department for all cases.

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
from datetime import timedelta
import matplotlib.pyplot as plt
import seaborn as sns 
from scipy import stats

In [2]:
# adjust this to hide ID columns for posting to github
forrepo=False

hidecols = []
if forrepo:
    hidecols=['anon_id', 'pat_enc_csn_id_coded', 'inpatient_data_id_coded'] # these are hidden columns

In [34]:
# view data frame

# adjust this to hide ID columns for posting to github
forrepo=False

def view_df(df):
    hidecols = []
    if forrepo:
        hidecols=['anon_id', 'pat_enc_csn_id_coded', 'inpatient_data_id_coded'] # these are hidden columns
        
    return(df.drop(hidecols, axis=1, errors='ignore').head())

Pulled adt table from sch_core_2021 for the 6_7 cohort using the following SQL:

```select * 
    from `som-nero-phi-jonc101.shc_core_2021.adt` as adt
    where adt.pat_enc_csn_id_coded 
    in (
        select pat_enc_csn_id_coded from 
            `som-nero-phi-jonc101.triageTD.6_7_cohort4`
    )```

In [156]:
# read in data files
datadir = "../../DataTD/shc2021"
outdir = "../../OutputTD/shc2021"

adt_file = datadir + "/cohort3_adt_2021.csv"
# new_adt_file = datadir + "/cohort_6_7_adt.csv"
# results_file = resultsdir + "1_4_cohort_test_results.csv"
cohort_file = "../../OutputTD/shc2021/7_cohort4_3hr_labels_noOR.csv"


adt = pd.read_csv(adt_file)
# adt_2021 = pd.read_csv(new_adt_file)
# results = pd.read_csv(results_file)
cohort = pd.read_csv(cohort_file)

In [157]:
# just checking the cohorts
csns = cohort.pat_enc_csn_id_coded.unique()
adt_csns = adt.pat_enc_csn_id_coded.unique()

print("cohort csns: {}".format(len(csns)))
print("adt csns: {}".format(len(adt_csns)))

adt = adt[adt.pat_enc_csn_id_coded.isin(cohort.pat_enc_csn_id_coded)]
print(adt.pat_enc_csn_id_coded.nunique())

cohort csns: 52532
adt csns: 61176
52532


# Format data

Convert datetime columns to the correct format.

In [68]:
adt.columns

Index(['anon_id', 'pat_enc_csn_id_coded', 'effective_time_jittered_utc',
       'seq_num_in_enc', 'pat_class', 'base_pat_class_c', 'pat_lvl_of_care_c',
       'pat_lv_of_care', 'event_type', 'pat_service', 'department_id'],
      dtype='object')

In [72]:
# add year of admit
adt['effective_year'] = pd.DatetimeIndex(adt['effective_time_jittered_utc']).year
adt.drop(hidecols, axis=1, errors='ignore').head()

Unnamed: 0,anon_id,pat_enc_csn_id_coded,effective_time_jittered_utc,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,pat_lv_of_care,event_type,pat_service,department_id,effective_year
0,JC2210295,131259986092,2018-12-13 07:59:00+00:00,12,Inpatient,,5.0,Acute Care (Assessment or intervention q4-8),Census,Cystic Fibrosis Adult,2000238.0,2018
1,JC1259166,131251987857,2018-07-05 06:59:00+00:00,21,Inpatient,,9.0,IICU/Intermediate Care (Assessment or interven...,Census,Pulmonary Hypertension,2000237.0,2018
2,JC1259166,131251987857,2018-07-03 23:20:00+00:00,19,Inpatient,,9.0,IICU/Intermediate Care (Assessment or interven...,Patient Update,Pulmonary Hypertension,2000237.0,2018
3,JC652959,131293920824,2020-08-17 05:59:00+00:00,24,Inpatient,,8.0,Critical Care,Transfer In,Neurocritical Care,6001003.0,2020
4,JC652959,131293920824,2020-08-16 22:43:00+00:00,19,Inpatient,,9.0,IICU/Intermediate Care (Assessment or interven...,Patient Update,Neurocritical Care,110100014.0,2020


In [71]:
# change the effective time to datetime since read in from csv
adt.effective_time_jittered_utc = pd.to_datetime(adt.effective_time_jittered_utc)
print(adt.pat_enc_csn_id_coded.nunique())

52532


In [73]:
# add the admit time column from the prediction results csv to the adt csv
admit_time = cohort[['pat_enc_csn_id_coded', 'admit_time']]

adt_admit = adt.merge(admit_time, how='left')
print(adt.pat_enc_csn_id_coded.nunique())
# adt.head()

adt_admit.admit_time = pd.to_datetime(adt_admit.admit_time, utc=True)

adt_admit.columns

52532


Index(['anon_id', 'pat_enc_csn_id_coded', 'effective_time_jittered_utc',
       'seq_num_in_enc', 'pat_class', 'base_pat_class_c', 'pat_lvl_of_care_c',
       'pat_lv_of_care', 'event_type', 'pat_service', 'department_id',
       'effective_year', 'admit_time'],
      dtype='object')

In [74]:
# do we still have all of the CSNs? - YES
adt_admit.pat_enc_csn_id_coded.nunique()

52532

In [76]:
adt_admit.drop(hidecols, axis=1, errors='ignore').head()

Unnamed: 0,anon_id,pat_enc_csn_id_coded,effective_time_jittered_utc,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,pat_lv_of_care,event_type,pat_service,department_id,effective_year,admit_time
0,JC2210295,131259986092,2018-12-13 07:59:00+00:00,12,Inpatient,,5.0,Acute Care (Assessment or intervention q4-8),Census,Cystic Fibrosis Adult,2000238.0,2018,2018-12-11 04:40:00+00:00
1,JC1259166,131251987857,2018-07-05 06:59:00+00:00,21,Inpatient,,9.0,IICU/Intermediate Care (Assessment or interven...,Census,Pulmonary Hypertension,2000237.0,2018,2018-06-27 20:37:00+00:00
2,JC1259166,131251987857,2018-07-03 23:20:00+00:00,19,Inpatient,,9.0,IICU/Intermediate Care (Assessment or interven...,Patient Update,Pulmonary Hypertension,2000237.0,2018,2018-06-27 20:37:00+00:00
3,JC652959,131293920824,2020-08-17 05:59:00+00:00,24,Inpatient,,8.0,Critical Care,Transfer In,Neurocritical Care,6001003.0,2020,2020-08-11 16:05:00+00:00
4,JC652959,131293920824,2020-08-16 22:43:00+00:00,19,Inpatient,,9.0,IICU/Intermediate Care (Assessment or interven...,Patient Update,Neurocritical Care,110100014.0,2020,2020-08-11 16:05:00+00:00


In [77]:
# compute the time before admit for each of these events
adt_admit['time_before_admit'] = adt_admit.apply(lambda x: x.admit_time - x.effective_time_jittered_utc, axis=1)
print(adt_admit.pat_enc_csn_id_coded.nunique())
adt_admit.drop(hidecols, axis=1, errors='ignore').head()

52532


Unnamed: 0,anon_id,pat_enc_csn_id_coded,effective_time_jittered_utc,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,pat_lv_of_care,event_type,pat_service,department_id,effective_year,admit_time,time_before_admit
0,JC2210295,131259986092,2018-12-13 07:59:00+00:00,12,Inpatient,,5.0,Acute Care (Assessment or intervention q4-8),Census,Cystic Fibrosis Adult,2000238.0,2018,2018-12-11 04:40:00+00:00,-3 days +20:41:00
1,JC1259166,131251987857,2018-07-05 06:59:00+00:00,21,Inpatient,,9.0,IICU/Intermediate Care (Assessment or interven...,Census,Pulmonary Hypertension,2000237.0,2018,2018-06-27 20:37:00+00:00,-8 days +13:38:00
2,JC1259166,131251987857,2018-07-03 23:20:00+00:00,19,Inpatient,,9.0,IICU/Intermediate Care (Assessment or interven...,Patient Update,Pulmonary Hypertension,2000237.0,2018,2018-06-27 20:37:00+00:00,-7 days +21:17:00
3,JC652959,131293920824,2020-08-17 05:59:00+00:00,24,Inpatient,,8.0,Critical Care,Transfer In,Neurocritical Care,6001003.0,2020,2020-08-11 16:05:00+00:00,-6 days +10:06:00
4,JC652959,131293920824,2020-08-16 22:43:00+00:00,19,Inpatient,,9.0,IICU/Intermediate Care (Assessment or interven...,Patient Update,Neurocritical Care,110100014.0,2020,2020-08-11 16:05:00+00:00,-6 days +17:22:00


We want to find the length of stay in the ED, so we'll add some columns to help figure out which ADT events mark the admission to Emergency services and transfer to Inpatient.

We're looking for:

- the first Inpatient event that follow Emergency Services (should be admit time)
- the first Emergency Services event

In [78]:
# mark the events with some labels that make things easier later

# sort by csn and time
adt_admit.sort_values(['pat_enc_csn_id_coded', 'seq_num_in_enc'], inplace=True)

# **assumes dataframe is sorted by time

# the first inpatient event following Emergency Services must meet these three criteria:
    # mark whether previous entry had pat class Emergency Services
adt_admit['prev_emerg'] = adt_admit.pat_class.shift() == 'Emergency Services'
    # mark whether current event has pat class Inpatient
adt_admit['curr_inpatient'] = adt_admit.pat_class == 'Inpatient'
    # mark whether current event is continued csn as previous
adt_admit['continued'] = adt_admit.pat_enc_csn_id_coded.eq(
    adt_admit.pat_enc_csn_id_coded.shift())

# find our cases by taking the AND of these columns
cols = ['prev_emerg', 'curr_inpatient', 'continued']
adt_admit['first_ip'] = adt_admit[cols].all(axis=1)

print(adt_admit.pat_enc_csn_id_coded.nunique())
adt_admit.drop(hidecols, axis=1, errors='ignore').head()

52532


Unnamed: 0,anon_id,pat_enc_csn_id_coded,effective_time_jittered_utc,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,pat_lv_of_care,event_type,pat_service,department_id,effective_year,admit_time,time_before_admit,prev_emerg,curr_inpatient,continued,first_ip
283731,JC1170548,131062572931,2015-01-01 18:40:00+00:00,1,Emergency Services,3.0,,,Admission,Emergency,2001002.0,2015,2015-01-02 03:48:00+00:00,0 days 09:08:00,False,False,False,False
283906,JC1170548,131062572931,2015-01-02 03:09:00+00:00,2,Emergency Services,,,,Transfer Out,Emergency,2001002.0,2015,2015-01-02 03:48:00+00:00,0 days 00:39:00,True,False,True,False
283445,JC1170548,131062572931,2015-01-02 03:09:00+00:00,3,Emergency Services,,,,Transfer In,Emergency,2001002.0,2015,2015-01-02 03:48:00+00:00,0 days 00:39:00,True,False,True,False
283031,JC1170548,131062572931,2015-01-02 03:48:00+00:00,4,Emergency Services,,,,Transfer Out,Emergency,2001002.0,2015,2015-01-02 03:48:00+00:00,0 days 00:00:00,True,False,True,False
887228,JC1170548,131062572931,2015-01-02 03:48:00+00:00,5,Inpatient,1.0,6.0,Intermediate Care - With Cardiac Monitor,Transfer In,General Medicine (University),2000237.0,2015,2015-01-02 03:48:00+00:00,0 days 00:00:00,True,True,True,True


Find the first emergency services pat_class for each csn.

In [79]:
# start by taking events that have a different pat class from the next event (below it)
change_pat = adt_admit[~adt_admit.pat_class.eq(adt_admit.pat_class.shift())]
print(change_pat.pat_enc_csn_id_coded.nunique())
change_pat.drop(hidecols, axis=1, errors='ignore').head()

52532


Unnamed: 0,anon_id,pat_enc_csn_id_coded,effective_time_jittered_utc,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,pat_lv_of_care,event_type,pat_service,department_id,effective_year,admit_time,time_before_admit,prev_emerg,curr_inpatient,continued,first_ip
283731,JC1170548,131062572931,2015-01-01 18:40:00+00:00,1,Emergency Services,3.0,,,Admission,Emergency,2001002.0,2015,2015-01-02 03:48:00+00:00,0 days 09:08:00,False,False,False,False
887228,JC1170548,131062572931,2015-01-02 03:48:00+00:00,5,Inpatient,1.0,6.0,Intermediate Care - With Cardiac Monitor,Transfer In,General Medicine (University),2000237.0,2015,2015-01-02 03:48:00+00:00,0 days 00:00:00,True,True,True,True
354402,JC913990,131062745090,2015-01-02 01:56:00+00:00,1,Emergency Services,3.0,,,Admission,Emergency,2001002.0,2015,2015-01-02 05:53:00+00:00,0 days 03:57:00,False,False,False,False
765487,JC913990,131062745090,2015-01-02 05:53:00+00:00,9,Inpatient,1.0,5.0,Acute Care (Assessment or intervention q4-8),Transfer In,Orthopaedic Surgery,2000250.0,2015,2015-01-02 05:53:00+00:00,0 days 00:00:00,True,True,True,True
307756,JC529112,131062927111,2015-01-04 18:13:00+00:00,1,Emergency Services,3.0,,,Admission,Emergency,2001002.0,2015,2015-01-05 03:20:00+00:00,0 days 09:07:00,False,False,False,False


In [83]:
# keep the event with pat_class == Emergency Services, that occurs before the first inpatient event
# check to make sure that we're looking at the same CSN between both events
# don't take any where admit ocurred before ER event - happens when people move around a lot (ER -> Inpatient -> ER -> Inpatient)
keep_visits = change_pat[(change_pat.pat_class == 'Emergency Services') & 
                         (change_pat.shift(-1).first_ip) &
                         (change_pat.shift(-1).pat_enc_csn_id_coded == change_pat.pat_enc_csn_id_coded) & 
                         (change_pat.time_before_admit >= timedelta(days=0))] # remove this for now # ??? same results with this set

print(keep_visits.pat_enc_csn_id_coded.nunique())
keep_visits.drop(hidecols, axis=1, errors='ignore').head()

52530


Unnamed: 0,anon_id,pat_enc_csn_id_coded,effective_time_jittered_utc,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,pat_lv_of_care,event_type,pat_service,department_id,effective_year,admit_time,time_before_admit,prev_emerg,curr_inpatient,continued,first_ip
283731,JC1170548,131062572931,2015-01-01 18:40:00+00:00,1,Emergency Services,3.0,,,Admission,Emergency,2001002.0,2015,2015-01-02 03:48:00+00:00,0 days 09:08:00,False,False,False,False
354402,JC913990,131062745090,2015-01-02 01:56:00+00:00,1,Emergency Services,3.0,,,Admission,Emergency,2001002.0,2015,2015-01-02 05:53:00+00:00,0 days 03:57:00,False,False,False,False
307756,JC529112,131062927111,2015-01-04 18:13:00+00:00,1,Emergency Services,3.0,,,Admission,Emergency,2001002.0,2015,2015-01-05 03:20:00+00:00,0 days 09:07:00,False,False,False,False
325130,JC1702404,131063006922,2015-01-06 09:04:00+00:00,1,Emergency Services,3.0,,,Admission,Emergency,2001002.0,2015,2015-01-06 14:47:00+00:00,0 days 05:43:00,False,False,False,False
284725,JC523028,131063022232,2015-01-03 14:51:00+00:00,1,Emergency Services,3.0,,,Admission,Emergency,2001002.0,2015,2015-01-03 21:24:00+00:00,0 days 06:33:00,False,False,False,False


In [20]:
# look at the number of csn's that were dropped becuase admit time was before first ER event
print(change_pat.pat_enc_csn_id_coded.nunique())

print(keep_visits.pat_enc_csn_id_coded.nunique())

# we lose csns where admit occurs before the first ER - lost 2

61176
52530


In [111]:
# check to see if all of the remaining rows are with seq=1
print(keep_visits.seq_num_in_enc.describe())
keep_visits[keep_visits['seq_num_in_enc'] > 1]

count    52530.000000
mean         1.000038
std          0.008726
min          1.000000
25%          1.000000
50%          1.000000
75%          1.000000
max          3.000000
Name: seq_num_in_enc, dtype: float64


Unnamed: 0,anon_id,pat_enc_csn_id_coded,effective_time_jittered_utc,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,pat_lv_of_care,event_type,pat_service,department_id,effective_year,admit_time,time_before_admit,prev_emerg,curr_inpatient,continued,first_ip
264400,JC1926085,131246488352,2018-02-02 09:26:00+00:00,3,Emergency Services,3.0,5.0,Acute Care (Assessment or intervention q4-8),Patient Update,Emergency,2001002.0,2018,2018-02-03 00:40:00+00:00,0 days 15:14:00,False,False,True,False


In [123]:
# change_pat.dtypes

In [135]:
# no one has admit_time before ED, quickest admit
keep_visits.sort_values('time_before_admit').head(1)

Unnamed: 0,anon_id,pat_enc_csn_id_coded,effective_time_jittered_utc,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,pat_lv_of_care,event_type,pat_service,department_id,effective_year,admit_time,time_before_admit,prev_emerg,curr_inpatient,continued,first_ip
377391,JC2812249,131277235724,2019-10-22 00:02:00+00:00,1,Emergency Services,3.0,,,Admission,Emergency,2001002.0,2019,2019-10-22 00:03:00+00:00,0 days 00:01:00,False,False,False,False


In [134]:
# no one has admit_time before ED, latest admit
keep_visits.sort_values('time_before_admit').tail(1)

Unnamed: 0,anon_id,pat_enc_csn_id_coded,effective_time_jittered_utc,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,pat_lv_of_care,event_type,pat_service,department_id,effective_year,admit_time,time_before_admit,prev_emerg,curr_inpatient,continued,first_ip
290406,JC1840822,131090609481,2015-07-02 20:22:00+00:00,1,Emergency Services,3.0,,,Admission,Emergency,2001002.0,2015,2015-07-10 18:48:00+00:00,7 days 22:26:00,False,False,False,False


In [136]:
# check the first patient who was admitted 1 min after presenting in ED
adt_admit[adt_admit['pat_enc_csn_id_coded'] == 131277235724].head(2)

Unnamed: 0,anon_id,pat_enc_csn_id_coded,effective_time_jittered_utc,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,pat_lv_of_care,event_type,pat_service,department_id,effective_year,admit_time,time_before_admit,prev_emerg,curr_inpatient,continued,first_ip
377391,JC2812249,131277235724,2019-10-22 00:02:00+00:00,1,Emergency Services,3.0,,,Admission,Emergency,2001002.0,2019,2019-10-22 00:03:00+00:00,0 days 00:01:00,False,False,False,False
377293,JC2812249,131277235724,2019-10-22 00:03:00+00:00,2,Inpatient,1.0,5.0,Acute Care (Assessment or intervention q4-8),Patient Update,Emergency,2001002.0,2019,2019-10-22 00:03:00+00:00,0 days 00:00:00,True,True,True,True


In [114]:
# the only case with seq > 1 and it happned that patient was under obs before moved to emergency, 
# also pat_service is indication rather than pat_class
adt_admit[adt_admit['pat_enc_csn_id_coded'] == 131246488352].head(10)

Unnamed: 0,anon_id,pat_enc_csn_id_coded,effective_time_jittered_utc,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,pat_lv_of_care,event_type,pat_service,department_id,effective_year,admit_time,time_before_admit,prev_emerg,curr_inpatient,continued,first_ip
264480,JC1926085,131246488352,2018-02-02 00:01:00+00:00,1,Observation,2.0,,,Admission,Emergency,2001002.0,2018,2018-02-03 00:40:00+00:00,1 days 00:39:00,False,False,False,False
264475,JC1926085,131246488352,2018-02-02 07:59:00+00:00,2,Observation,,,,Census,Emergency,2001002.0,2018,2018-02-03 00:40:00+00:00,0 days 16:41:00,False,False,True,False
264400,JC1926085,131246488352,2018-02-02 09:26:00+00:00,3,Emergency Services,3.0,5.0,Acute Care (Assessment or intervention q4-8),Patient Update,Emergency,2001002.0,2018,2018-02-03 00:40:00+00:00,0 days 15:14:00,False,False,True,False
264272,JC1926085,131246488352,2018-02-02 10:28:00+00:00,4,Emergency Services,,5.0,Acute Care (Assessment or intervention q4-8),Transfer Out,Emergency,2001002.0,2018,2018-02-03 00:40:00+00:00,0 days 14:12:00,True,False,True,False
264948,JC1926085,131246488352,2018-02-02 10:28:00+00:00,5,Emergency Services,,5.0,Acute Care (Assessment or intervention q4-8),Transfer In,Emergency,2001002.0,2018,2018-02-03 00:40:00+00:00,0 days 14:12:00,True,False,True,False
264309,JC1926085,131246488352,2018-02-02 10:51:00+00:00,6,Emergency Services,,5.0,Acute Care (Assessment or intervention q4-8),Patient Update,Emergency,2001002.0,2018,2018-02-03 00:40:00+00:00,0 days 13:49:00,True,False,True,False
264115,JC1926085,131246488352,2018-02-03 00:36:00+00:00,7,Emergency Services,,5.0,Acute Care (Assessment or intervention q4-8),Transfer Out,Emergency,2001002.0,2018,2018-02-03 00:40:00+00:00,0 days 00:04:00,True,False,True,False
264276,JC1926085,131246488352,2018-02-03 00:36:00+00:00,8,Emergency Services,,5.0,Acute Care (Assessment or intervention q4-8),Transfer In,Emergency,2001002.0,2018,2018-02-03 00:40:00+00:00,0 days 00:04:00,True,False,True,False
264830,JC1926085,131246488352,2018-02-03 00:40:00+00:00,9,Emergency Services,,5.0,Acute Care (Assessment or intervention q4-8),Transfer Out,Emergency,2001002.0,2018,2018-02-03 00:40:00+00:00,0 days 00:00:00,True,False,True,False
869186,JC1926085,131246488352,2018-02-03 00:40:00+00:00,10,Inpatient,1.0,5.0,Acute Care (Assessment or intervention q4-8),Transfer In,General Medicine (University),2000237.0,2018,2018-02-03 00:40:00+00:00,0 days 00:00:00,True,True,True,True


In [138]:
# 2 CSNs in keep_visits but not in change_pat 
change_pat[~change_pat['pat_enc_csn_id_coded'].isin(keep_visits['pat_enc_csn_id_coded'])]

Unnamed: 0,anon_id,pat_enc_csn_id_coded,effective_time_jittered_utc,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,pat_lv_of_care,event_type,pat_service,department_id,effective_year,admit_time,time_before_admit,prev_emerg,curr_inpatient,continued,first_ip
562177,JC1169632,131127789342,2015-10-31 05:38:00+00:00,4,Inpatient,1.0,5.0,Acute Care (Assessment or intervention q4-8),Patient Update,Neurosurgery,2001002.0,2015,2015-10-31 05:38:00+00:00,0 days,True,True,True,True
767003,JC1928643,131164384313,2015-12-28 21:41:00+00:00,2,Inpatient,1.0,5.0,Acute Care (Assessment or intervention q4-8),Patient Update,Orthopaedic Surgery,2001002.0,2015,2015-12-28 21:41:00+00:00,0 days,True,True,True,True


In [107]:
# a correct case
adt_admit[adt_admit['pat_enc_csn_id_coded'] == 131062572931].sort_values('seq_num_in_enc').head()

Unnamed: 0,anon_id,pat_enc_csn_id_coded,effective_time_jittered_utc,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,pat_lv_of_care,event_type,pat_service,department_id,effective_year,admit_time,time_before_admit,prev_emerg,curr_inpatient,continued,first_ip
283731,JC1170548,131062572931,2015-01-01 18:40:00+00:00,1,Emergency Services,3.0,,,Admission,Emergency,2001002.0,2015,2015-01-02 03:48:00+00:00,0 days 09:08:00,False,False,False,False
283906,JC1170548,131062572931,2015-01-02 03:09:00+00:00,2,Emergency Services,,,,Transfer Out,Emergency,2001002.0,2015,2015-01-02 03:48:00+00:00,0 days 00:39:00,True,False,True,False
283445,JC1170548,131062572931,2015-01-02 03:09:00+00:00,3,Emergency Services,,,,Transfer In,Emergency,2001002.0,2015,2015-01-02 03:48:00+00:00,0 days 00:39:00,True,False,True,False
283031,JC1170548,131062572931,2015-01-02 03:48:00+00:00,4,Emergency Services,,,,Transfer Out,Emergency,2001002.0,2015,2015-01-02 03:48:00+00:00,0 days 00:00:00,True,False,True,False
887228,JC1170548,131062572931,2015-01-02 03:48:00+00:00,5,Inpatient,1.0,6.0,Intermediate Care - With Cardiac Monitor,Transfer In,General Medicine (University),2000237.0,2015,2015-01-02 03:48:00+00:00,0 days 00:00:00,True,True,True,True


In [116]:
# one of the 2 cases that were removed
# change_pat[change_pat['pat_enc_csn_id_coded'] == 131127789342].sort_values('seq_num_in_enc')
adt_admit[adt_admit['pat_enc_csn_id_coded'] == 131127789342].sort_values('seq_num_in_enc').head(4)

Unnamed: 0,anon_id,pat_enc_csn_id_coded,effective_time_jittered_utc,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,pat_lv_of_care,event_type,pat_service,department_id,effective_year,admit_time,time_before_admit,prev_emerg,curr_inpatient,continued,first_ip
334056,JC1169632,131127789342,2015-10-31 04:06:00+00:00,1,Emergency Services,3.0,,,Admission,Emergency,2001002.0,2015,2015-10-31 05:38:00+00:00,0 days 01:32:00,True,False,False,False
333938,JC1169632,131127789342,2015-10-31 04:06:00+00:00,2,Emergency Services,,,,Transfer Out,Emergency,2001002.0,2015,2015-10-31 05:38:00+00:00,0 days 01:32:00,True,False,True,False
333754,JC1169632,131127789342,2015-10-31 04:06:00+00:00,3,Emergency Services,,,,Transfer In,Emergency,2001002.0,2015,2015-10-31 05:38:00+00:00,0 days 01:32:00,True,False,True,False
562177,JC1169632,131127789342,2015-10-31 05:38:00+00:00,4,Inpatient,1.0,5.0,Acute Care (Assessment or intervention q4-8),Patient Update,Neurosurgery,2001002.0,2015,2015-10-31 05:38:00+00:00,0 days 00:00:00,True,True,True,True


In [144]:
change_pat[change_pat['pat_enc_csn_id_coded'] == 131127789342].sort_values('seq_num_in_enc').head(4)

Unnamed: 0,anon_id,pat_enc_csn_id_coded,effective_time_jittered_utc,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,pat_lv_of_care,event_type,pat_service,department_id,effective_year,admit_time,time_before_admit,prev_emerg,curr_inpatient,continued,first_ip
562177,JC1169632,131127789342,2015-10-31 05:38:00+00:00,4,Inpatient,1.0,5.0,Acute Care (Assessment or intervention q4-8),Patient Update,Neurosurgery,2001002.0,2015,2015-10-31 05:38:00+00:00,0 days,True,True,True,True


In [115]:
# one of the 2 cases that were removed
adt_admit[adt_admit['pat_enc_csn_id_coded'] == 131164384313].sort_values('seq_num_in_enc').head(3)

Unnamed: 0,anon_id,pat_enc_csn_id_coded,effective_time_jittered_utc,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,pat_lv_of_care,event_type,pat_service,department_id,effective_year,admit_time,time_before_admit,prev_emerg,curr_inpatient,continued,first_ip
367939,JC1928643,131164384313,2015-12-28 17:49:00+00:00,1,Emergency Services,3.0,,,Admission,Emergency,2001002.0,2015,2015-12-28 21:41:00+00:00,0 days 03:52:00,True,False,False,False
767003,JC1928643,131164384313,2015-12-28 21:41:00+00:00,2,Inpatient,1.0,5.0,Acute Care (Assessment or intervention q4-8),Patient Update,Orthopaedic Surgery,2001002.0,2015,2015-12-28 21:41:00+00:00,0 days 00:00:00,True,True,True,True
767049,JC1928643,131164384313,2015-12-29 00:03:00+00:00,3,Inpatient,,5.0,Acute Care (Assessment or intervention q4-8),Transfer Out,Orthopaedic Surgery,2001002.0,2015,2015-12-28 21:41:00+00:00,-1 days +21:38:00,False,True,True,False


In [143]:
change_pat[change_pat['pat_enc_csn_id_coded'] == 131062572931].sort_values('seq_num_in_enc').head(4)

Unnamed: 0,anon_id,pat_enc_csn_id_coded,effective_time_jittered_utc,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,pat_lv_of_care,event_type,pat_service,department_id,effective_year,admit_time,time_before_admit,prev_emerg,curr_inpatient,continued,first_ip
283731,JC1170548,131062572931,2015-01-01 18:40:00+00:00,1,Emergency Services,3.0,,,Admission,Emergency,2001002.0,2015,2015-01-02 03:48:00+00:00,0 days 09:08:00,False,False,False,False
887228,JC1170548,131062572931,2015-01-02 03:48:00+00:00,5,Inpatient,1.0,6.0,Intermediate Care - With Cardiac Monitor,Transfer In,General Medicine (University),2000237.0,2015,2015-01-02 03:48:00+00:00,0 days 00:00:00,True,True,True,True


In [145]:
# by pass change_pat since the above 2 cases were drop at the change_pat = adt_admit.... and then keep_visits = change_pat...
keep_visits2 = adt_admit[(adt_admit.seq_num_in_enc == 1)]

print(keep_visits2.pat_enc_csn_id_coded.nunique())
keep_visits2.drop(hidecols, axis=1, errors='ignore').head()

52532


Unnamed: 0,anon_id,pat_enc_csn_id_coded,effective_time_jittered_utc,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,pat_lv_of_care,event_type,pat_service,department_id,effective_year,admit_time,time_before_admit,prev_emerg,curr_inpatient,continued,first_ip
283731,JC1170548,131062572931,2015-01-01 18:40:00+00:00,1,Emergency Services,3.0,,,Admission,Emergency,2001002.0,2015,2015-01-02 03:48:00+00:00,0 days 09:08:00,False,False,False,False
354402,JC913990,131062745090,2015-01-02 01:56:00+00:00,1,Emergency Services,3.0,,,Admission,Emergency,2001002.0,2015,2015-01-02 05:53:00+00:00,0 days 03:57:00,False,False,False,False
307756,JC529112,131062927111,2015-01-04 18:13:00+00:00,1,Emergency Services,3.0,,,Admission,Emergency,2001002.0,2015,2015-01-05 03:20:00+00:00,0 days 09:07:00,False,False,False,False
325130,JC1702404,131063006922,2015-01-06 09:04:00+00:00,1,Emergency Services,3.0,,,Admission,Emergency,2001002.0,2015,2015-01-06 14:47:00+00:00,0 days 05:43:00,False,False,False,False
284725,JC523028,131063022232,2015-01-03 14:51:00+00:00,1,Emergency Services,3.0,,,Admission,Emergency,2001002.0,2015,2015-01-03 21:24:00+00:00,0 days 06:33:00,False,False,False,False


### continue here keep_visits vs keep_visits2

In [146]:
# compute the time lapse between the ER visit and admission in hours
keep_visits['hours_before_admit'] = round(keep_visits.time_before_admit / np.timedelta64(1, 'h'), 2)
keep_visits.drop(hidecols, axis=1, errors='ignore').head(1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  keep_visits['hours_before_admit'] = round(keep_visits.time_before_admit / np.timedelta64(1, 'h'), 2)


Unnamed: 0,anon_id,pat_enc_csn_id_coded,effective_time_jittered_utc,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,pat_lv_of_care,event_type,pat_service,department_id,effective_year,admit_time,time_before_admit,prev_emerg,curr_inpatient,continued,first_ip,hours_before_admit
283731,JC1170548,131062572931,2015-01-01 18:40:00+00:00,1,Emergency Services,3.0,,,Admission,Emergency,2001002.0,2015,2015-01-02 03:48:00+00:00,0 days 09:08:00,False,False,False,False,9.13


In [147]:
# compute the time lapse between the ER visit and admission in hours
keep_visits2['hours_before_admit'] = round(keep_visits2.time_before_admit / np.timedelta64(1, 'h'), 2)
keep_visits2.drop(hidecols, axis=1, errors='ignore').head(1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  keep_visits2['hours_before_admit'] = round(keep_visits2.time_before_admit / np.timedelta64(1, 'h'), 2)


Unnamed: 0,anon_id,pat_enc_csn_id_coded,effective_time_jittered_utc,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,pat_lv_of_care,event_type,pat_service,department_id,effective_year,admit_time,time_before_admit,prev_emerg,curr_inpatient,continued,first_ip,hours_before_admit
283731,JC1170548,131062572931,2015-01-01 18:40:00+00:00,1,Emergency Services,3.0,,,Admission,Emergency,2001002.0,2015,2015-01-02 03:48:00+00:00,0 days 09:08:00,False,False,False,False,9.13


# Now look at time in ED

In [148]:
# print(keep_visits.columns)

print(keep_visits.hours_before_admit.describe())

keep_visits['hours_before_admit_quantile'] = pd.qcut(keep_visits.hours_before_admit, 10) # not working

count    52530.000000
mean         4.810179
std          6.415204
min          0.020000
25%          2.300000
50%          3.500000
75%          5.250000
max        190.430000
Name: hours_before_admit, dtype: float64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  keep_visits['hours_before_admit_quantile'] = pd.qcut(keep_visits.hours_before_admit, 10)


In [151]:
# print(keep_visits2.columns)

# didn't change the summary stats
print(keep_visits2.hours_before_admit.describe())

# keep_visits2['hours_before_admit_quantile'] = pd.qcut(keep_visits2.hours_before_admit, 10)

count    52532.000000
mean         4.810278
std          6.415523
min          0.020000
25%          2.300000
50%          3.500000
75%          5.250000
max        190.430000
Name: hours_before_admit, dtype: float64


### check test set if available

### use keep_visits2 to make sure all cohort is included in here, keep_visits missed 2 patients

In [152]:
# save the hours before 
# final_visits = keep_visits[['pat_enc_csn_id_coded', 'hours_before_admit']]
final_visits = keep_visits2[['anon_id', 'pat_enc_csn_id_coded', 'effective_time_jittered_utc', 
                            'admit_time', 'time_before_admit', 'hours_before_admit']]
final_visits.rename({'hours_before_admit': 'hours_in_ed', 'effective_time_jittered_utc': 'ed_visit_time'}, axis=1, inplace=True)

print(final_visits.shape)
print(final_visits.pat_enc_csn_id_coded.nunique())

# final_visits.drop(hidecols, axis=1, errors='ignore').head()

(52532, 6)
52532


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [153]:
final_visits.head()

Unnamed: 0,anon_id,pat_enc_csn_id_coded,ed_visit_time,admit_time,time_before_admit,hours_in_ed
283731,JC1170548,131062572931,2015-01-01 18:40:00+00:00,2015-01-02 03:48:00+00:00,0 days 09:08:00,9.13
354402,JC913990,131062745090,2015-01-02 01:56:00+00:00,2015-01-02 05:53:00+00:00,0 days 03:57:00,3.95
307756,JC529112,131062927111,2015-01-04 18:13:00+00:00,2015-01-05 03:20:00+00:00,0 days 09:07:00,9.12
325130,JC1702404,131063006922,2015-01-06 09:04:00+00:00,2015-01-06 14:47:00+00:00,0 days 05:43:00,5.72
284725,JC523028,131063022232,2015-01-03 14:51:00+00:00,2015-01-03 21:24:00+00:00,0 days 06:33:00,6.55


In [154]:
savefile = outdir + "/9_length_of_stay_in_ED.csv"

final_visits.to_csv(savefile, index=False)