# Error analysis
# Admission/Transfer/Discharge table with Trajectories

We want to look into the cases that have a large difference between the time0 predictions and time24 predictions.


We'll look at: the complete `1_4_cohort` here

- pat services
- pat lv of care
- patient trajectories from admission until 24hrs

Subgroups of the entire cohort will be analyzed in another notebook

In [21]:
import pandas as pd
import numpy as np
from datetime import datetime
from datetime import timedelta
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

In [22]:
# view data frame

# adjust this to hide ID columns for posting to github
forrepo=False

def view_df(df):
    hidecols = []
    if forrepo:
        hidecols=['anon_id', 'pat_enc_csn_id_coded', 'inpatient_data_id_coded'] # these are hidden columns
        
    return(df.drop(hidecols, axis=1, errors='ignore').head())

# Data

Load in the data

In [121]:
# read in data files
datadir = "../../DataTD/shc2021/"
outdir = "../../OutputTD/shc2021/"

adt_file = datadir + "cohort3_adt_2021.csv"
cohort_file = outdir + "7_cohort4_3hr_labels_noOR.csv"
los_file = outdir + "length_of_stay_in_ED.csv"

adt = pd.read_csv(adt_file)
cohort = pd.read_csv(cohort_file)
los = pd.read_csv(los_file)

In [101]:
adt.dtypes

anon_id                         object
pat_enc_csn_id_coded             int64
effective_time_jittered_utc     object
seq_num_in_enc                   int64
pat_class                       object
base_pat_class_c               float64
pat_lvl_of_care_c              float64
pat_lv_of_care                  object
event_type                      object
pat_service                     object
department_id                  float64
dtype: object

In [103]:
los.dtypes

anon_id                  object
pat_enc_csn_id_coded      int64
ed_visit_time            object
admit_time               object
time_before_admit        object
hours_in_ed             float64
dtype: object

In [107]:
cohort.dtypes.head()

anon_id                         object
pat_enc_csn_id_coded             int64
admit_time              datetime64[ns]
label_max3                       int64
label_3hr_recent                 int64
dtype: object

In [122]:
cohort.head(1)

Unnamed: 0,anon_id,pat_enc_csn_id_coded,admit_time,label_max3,label_3hr_recent,admit_label,has_admit_label,died_within_24hrs,death_3hr_max_label,death_3hr_recent_label,...,acute_to_critical_label_max_12hr,critical_to_acute_label_max_12hr,label_max24,label_24hr_recent,death_24hr_max_label,death_24hr_recent_label,acute_to_critical_label_recent_24hr,critical_to_acute_label_recent_24hr,acute_to_critical_label_max_24hr,critical_to_acute_label_max_24hr
0,JC1000116,131066472308,2015-01-28 00:46:00,0,0,0.0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [154]:
adt.effective_time_jittered_utc = pd.to_datetime(adt.effective_time_jittered_utc)
cohort.admit_time = pd.to_datetime(cohort.admit_time)
los.admit_time = pd.to_datetime(los.admit_time)
los.ed_visit_time = pd.to_datetime(los.ed_visit_time)

# Bring in the ADT Table

We can use the ADT table to look at 

- pat_class
- pat_lv_of_care

In [160]:
# filter adt table down to test cohort
adt = adt[adt.pat_enc_csn_id_coded.isin(cohort.pat_enc_csn_id_coded)]

# these should match - it does!
print(adt.pat_enc_csn_id_coded.nunique())
print(adt.shape)

# view_df(adt)

52532
(982628, 11)


In [161]:
los.columns

Index(['anon_id', 'pat_enc_csn_id_coded', 'ed_visit_time', 'admit_time',
       'time_before_admit', 'hours_in_ed'],
      dtype='object')

In [162]:
adt_los = los.merge(adt, how='right', on=['anon_id', 'pat_enc_csn_id_coded'])
print(adt_los.pat_enc_csn_id_coded.nunique())
print(adt_los.shape)
adt_los.columns

52532
(982628, 15)


Index(['anon_id', 'pat_enc_csn_id_coded', 'ed_visit_time', 'admit_time',
       'time_before_admit', 'hours_in_ed', 'effective_time_jittered_utc',
       'seq_num_in_enc', 'pat_class', 'base_pat_class_c', 'pat_lvl_of_care_c',
       'pat_lv_of_care', 'event_type', 'pat_service', 'department_id'],
      dtype='object')

In [163]:
adt_los.head(1)

Unnamed: 0,anon_id,pat_enc_csn_id_coded,ed_visit_time,admit_time,time_before_admit,hours_in_ed,effective_time_jittered_utc,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,pat_lv_of_care,event_type,pat_service,department_id
0,JC2210295,131259986092,2018-12-11 02:46:00+00:00,2018-12-11 04:40:00+00:00,0 days 01:54:00,1.9,2018-12-13 07:59:00+00:00,12,Inpatient,,5.0,Acute Care (Assessment or intervention q4-8),Census,Cystic Fibrosis Adult,2000238.0


In [164]:
cohort.head(1)

Unnamed: 0,anon_id,pat_enc_csn_id_coded,admit_time,label_max3,label_3hr_recent,admit_label,has_admit_label,died_within_24hrs,death_3hr_max_label,death_3hr_recent_label,...,acute_to_critical_label_max_12hr,critical_to_acute_label_max_12hr,label_max24,label_24hr_recent,death_24hr_max_label,death_24hr_recent_label,acute_to_critical_label_recent_24hr,critical_to_acute_label_recent_24hr,acute_to_critical_label_max_24hr,critical_to_acute_label_max_24hr
0,JC1000116,131066472308,2015-01-28 00:46:00,0,0,0.0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [167]:
# add the admit time column from the prediction results csv to the adt csv
# adt_admit = adt_los.merge(cohort[['pat_enc_csn_id_coded']], how='left', on=['pat_enc_csn_id_coded'])

adt_admit = adt_los.drop(['admit_time'], axis=1).merge(cohort[['pat_enc_csn_id_coded', 'admit_time']], how='left', on=['pat_enc_csn_id_coded'])
adt_admit.admit_time = pd.to_datetime(adt_admit.admit_time, utc=True)

print(adt_admit.pat_enc_csn_id_coded.nunique())
print(adt_admit.shape)
adt_admit.columns
# view_df(adt_admit[['pat_enc_csn_id_coded', 'effective_time_jittered_utc', 
#                  'pat_class', 'admit_time']])

52532
(982628, 15)


Index(['anon_id', 'pat_enc_csn_id_coded', 'ed_visit_time', 'time_before_admit',
       'hours_in_ed', 'effective_time_jittered_utc', 'seq_num_in_enc',
       'pat_class', 'base_pat_class_c', 'pat_lvl_of_care_c', 'pat_lv_of_care',
       'event_type', 'pat_service', 'department_id', 'admit_time'],
      dtype='object')

In [168]:
adt_admit.dtypes

anon_id                                     object
pat_enc_csn_id_coded                         int64
ed_visit_time                  datetime64[ns, UTC]
time_before_admit                           object
hours_in_ed                                float64
effective_time_jittered_utc    datetime64[ns, UTC]
seq_num_in_enc                               int64
pat_class                                   object
base_pat_class_c                           float64
pat_lvl_of_care_c                          float64
pat_lv_of_care                              object
event_type                                  object
pat_service                                 object
department_id                              float64
admit_time                     datetime64[ns, UTC]
dtype: object

In [169]:
# compute the time since admit for each of these events
adt_admit['time_since_admit'] = adt_admit.apply(lambda x: x.effective_time_jittered_utc - x.admit_time, axis=1)
print(adt_admit.pat_enc_csn_id_coded.nunique())
# adt_admit.drop(hidecols, axis=1, errors='ignore').head()

52532


In [170]:
print(adt_admit.shape)
print(adt_admit.columns)
adt_admit.head(1)

(982628, 16)
Index(['anon_id', 'pat_enc_csn_id_coded', 'ed_visit_time', 'time_before_admit',
       'hours_in_ed', 'effective_time_jittered_utc', 'seq_num_in_enc',
       'pat_class', 'base_pat_class_c', 'pat_lvl_of_care_c', 'pat_lv_of_care',
       'event_type', 'pat_service', 'department_id', 'admit_time',
       'time_since_admit'],
      dtype='object')


Unnamed: 0,anon_id,pat_enc_csn_id_coded,ed_visit_time,time_before_admit,hours_in_ed,effective_time_jittered_utc,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,pat_lv_of_care,event_type,pat_service,department_id,admit_time,time_since_admit
0,JC2210295,131259986092,2018-12-11 02:46:00+00:00,0 days 01:54:00,1.9,2018-12-13 07:59:00+00:00,12,Inpatient,,5.0,Acute Care (Assessment or intervention q4-8),Census,Cystic Fibrosis Adult,2000238.0,2018-12-11 04:40:00+00:00,2 days 03:19:00


In [171]:
# mark the events with some labels that make things easier later
adt_admit = adt_admit.sort_values(['pat_enc_csn_id_coded', 'seq_num_in_enc'])

# **assumes dataframe is sorted by time

# mark whether previous entry had pat class Emergency Services
adt_admit['prev_emerg'] = adt_admit.pat_class.shift() == 'Emergency Services'
# mark whether current event has pat class Inpatient
adt_admit['curr_inpatient'] = adt_admit.pat_class == 'Inpatient'
# mark whether current event is continued csn as previous
adt_admit['continued'] = adt_admit.pat_enc_csn_id_coded.eq(
    adt_admit.pat_enc_csn_id_coded.shift())

# find our cases by taking the AND of these columns
cols = ['prev_emerg', 'curr_inpatient', 'continued']
adt_admit['first_ip'] = adt_admit[cols].all(axis=1)

print(adt_admit.pat_enc_csn_id_coded.nunique())
print(adt_admit.shape)
view_df(adt_admit)

52532
(982628, 20)


Unnamed: 0,anon_id,pat_enc_csn_id_coded,ed_visit_time,time_before_admit,hours_in_ed,effective_time_jittered_utc,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,pat_lv_of_care,event_type,pat_service,department_id,admit_time,time_since_admit,prev_emerg,curr_inpatient,continued,first_ip
616891,JC1170548,131062572931,2015-01-01 18:40:00+00:00,0 days 09:08:00,9.13,2015-01-01 18:40:00+00:00,1,Emergency Services,3.0,,,Admission,Emergency,2001002.0,2015-01-02 03:48:00+00:00,-1 days +14:52:00,False,False,False,False
616892,JC1170548,131062572931,2015-01-01 18:40:00+00:00,0 days 09:08:00,9.13,2015-01-02 03:09:00+00:00,2,Emergency Services,,,,Transfer Out,Emergency,2001002.0,2015-01-02 03:48:00+00:00,-1 days +23:21:00,True,False,True,False
616890,JC1170548,131062572931,2015-01-01 18:40:00+00:00,0 days 09:08:00,9.13,2015-01-02 03:09:00+00:00,3,Emergency Services,,,,Transfer In,Emergency,2001002.0,2015-01-02 03:48:00+00:00,-1 days +23:21:00,True,False,True,False
616889,JC1170548,131062572931,2015-01-01 18:40:00+00:00,0 days 09:08:00,9.13,2015-01-02 03:48:00+00:00,4,Emergency Services,,,,Transfer Out,Emergency,2001002.0,2015-01-02 03:48:00+00:00,0 days 00:00:00,True,False,True,False
616893,JC1170548,131062572931,2015-01-01 18:40:00+00:00,0 days 09:08:00,9.13,2015-01-02 03:48:00+00:00,5,Inpatient,1.0,6.0,Intermediate Care - With Cardiac Monitor,Transfer In,General Medicine (University),2000237.0,2015-01-02 03:48:00+00:00,0 days 00:00:00,True,True,True,True


# Look at first inpatient pat services

In [172]:
first_ips = adt_admit[adt_admit.first_ip == True]

print(first_ips.shape)
print(first_ips.pat_enc_csn_id_coded.nunique())
first_ips.pat_enc_csn_id_coded.nunique()

(52559, 20)
52532


52532

In [173]:
# find first inpatient pat service for each csn
first_ip_pat_service = first_ips[['pat_enc_csn_id_coded', 'pat_service']].drop_duplicates()

print(first_ip_pat_service.shape)
print(first_ip_pat_service.pat_enc_csn_id_coded.nunique())

(52538, 2)
52532


# multiple emergency-->inpatient changes

Some CSNs have multiple emergency-->inpatient changes. This means they went from emergency-->inpatient-->emergency-->inpatient.

Take a look at some of these. The first change from emergency-->inpatient would be the admit time. Keep this occurrence. This only happen for 10 CSNs.

In [174]:
counts = first_ip_pat_service.groupby('pat_enc_csn_id_coded').count().sort_values('pat_service')

multi_csns = counts[counts.pat_service > 1]

print(multi_csns.shape)
multi_csns

(6, 1)


Unnamed: 0_level_0,pat_service
pat_enc_csn_id_coded,Unnamed: 1_level_1
131230902129,2
131201950621,2
131164129413,2
131230527582,2
131176788268,2
131244958373,2


In [175]:
adt_admit.columns

Index(['anon_id', 'pat_enc_csn_id_coded', 'ed_visit_time', 'time_before_admit',
       'hours_in_ed', 'effective_time_jittered_utc', 'seq_num_in_enc',
       'pat_class', 'base_pat_class_c', 'pat_lvl_of_care_c', 'pat_lv_of_care',
       'event_type', 'pat_service', 'department_id', 'admit_time',
       'time_since_admit', 'prev_emerg', 'curr_inpatient', 'continued',
       'first_ip'],
      dtype='object')

In [176]:
adt_admit[adt_admit.pat_enc_csn_id_coded == 131230902129].sort_values('seq_num_in_enc')[
    ['anon_id', 'pat_enc_csn_id_coded', 'effective_time_jittered_utc', 'admit_time',
       'seq_num_in_enc', 'pat_class', 'base_pat_class_c', 'pat_lvl_of_care_c',
       'pat_lv_of_care', 'event_type', 'pat_service'
       ]]

Unnamed: 0,anon_id,pat_enc_csn_id_coded,effective_time_jittered_utc,admit_time,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,pat_lv_of_care,event_type,pat_service
773218,JC1947533,131230902129,2017-05-02 19:17:00+00:00,2017-05-02 20:14:00+00:00,1,Emergency Services,3.0,,,Admission,Emergency
773215,JC1947533,131230902129,2017-05-02 20:14:00+00:00,2017-05-02 20:14:00+00:00,2,Inpatient,1.0,9.0,IICU/Intermediate Care (Assessment or interven...,Patient Update,Emergency
773219,JC1947533,131230902129,2017-05-02 21:57:00+00:00,2017-05-02 20:14:00+00:00,3,Emergency Services,,9.0,IICU/Intermediate Care (Assessment or interven...,Transfer Out,Emergency
773216,JC1947533,131230902129,2017-05-02 21:57:00+00:00,2017-05-02 20:14:00+00:00,4,Emergency Services,,9.0,IICU/Intermediate Care (Assessment or interven...,Transfer In,Emergency
773217,JC1947533,131230902129,2017-05-02 22:40:00+00:00,2017-05-02 20:14:00+00:00,5,Emergency Services,,9.0,IICU/Intermediate Care (Assessment or interven...,Transfer Out,Emergency
773220,JC1947533,131230902129,2017-05-02 22:40:00+00:00,2017-05-02 20:14:00+00:00,6,Inpatient,,9.0,IICU/Intermediate Care (Assessment or interven...,Transfer In,Cardiology
773222,JC1947533,131230902129,2017-05-03 06:59:00+00:00,2017-05-02 20:14:00+00:00,7,Inpatient,,9.0,IICU/Intermediate Care (Assessment or interven...,Census,Cardiology
773221,JC1947533,131230902129,2017-05-04 06:59:00+00:00,2017-05-02 20:14:00+00:00,8,Inpatient,,9.0,IICU/Intermediate Care (Assessment or interven...,Census,Cardiology
773223,JC1947533,131230902129,2017-05-04 22:30:00+00:00,2017-05-02 20:14:00+00:00,9,Inpatient,,9.0,IICU/Intermediate Care (Assessment or interven...,Discharge,Cardiology Transplant


In [177]:
# keep the first occurrence of inpatient event
first_ips_keep = first_ips.sort_values(['pat_enc_csn_id_coded', 'seq_num_in_enc']).groupby('pat_enc_csn_id_coded').first().reset_index()

# now we have one event per person
print(first_ips_keep.pat_enc_csn_id_coded.nunique())
print(first_ips_keep.shape)

52532
(52532, 20)


In [178]:
view_df(first_ips_keep)

Unnamed: 0,pat_enc_csn_id_coded,anon_id,ed_visit_time,time_before_admit,hours_in_ed,effective_time_jittered_utc,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,pat_lv_of_care,event_type,pat_service,department_id,admit_time,time_since_admit,prev_emerg,curr_inpatient,continued,first_ip
0,131062572931,JC1170548,2015-01-01 18:40:00+00:00,0 days 09:08:00,9.13,2015-01-02 03:48:00+00:00,5,Inpatient,1.0,6.0,Intermediate Care - With Cardiac Monitor,Transfer In,General Medicine (University),2000237.0,2015-01-02 03:48:00+00:00,0 days,True,True,True,True
1,131062745090,JC913990,2015-01-02 01:56:00+00:00,0 days 03:57:00,3.95,2015-01-02 05:53:00+00:00,9,Inpatient,1.0,5.0,Acute Care (Assessment or intervention q4-8),Transfer In,Orthopaedic Surgery,2000250.0,2015-01-02 05:53:00+00:00,0 days,True,True,True,True
2,131062927111,JC529112,2015-01-04 18:13:00+00:00,0 days 09:07:00,9.12,2015-01-05 03:20:00+00:00,7,Inpatient,1.0,8.0,Critical Care,Transfer In,Critical Care,2000262.0,2015-01-05 03:20:00+00:00,0 days,True,True,True,True
3,131063006922,JC1702404,2015-01-06 09:04:00+00:00,0 days 05:43:00,5.72,2015-01-06 14:47:00+00:00,7,Inpatient,1.0,6.0,Intermediate Care - With Cardiac Monitor,Transfer In,General Medicine (University),2000233.0,2015-01-06 14:47:00+00:00,0 days,True,True,True,True
4,131063022232,JC523028,2015-01-03 14:51:00+00:00,0 days 06:33:00,6.55,2015-01-03 21:24:00+00:00,5,Inpatient,1.0,8.0,Critical Care,Transfer In,Critical Care,2000262.0,2015-01-03 21:24:00+00:00,0 days,True,True,True,True


In [179]:
print(first_ips_keep.columns)

first_ips_save = first_ips_keep[['pat_enc_csn_id_coded', 'anon_id', 'admit_time', 'ed_visit_time', 
                                 'time_before_admit', 'hours_in_ed', 'effective_time_jittered_utc',
                                 'time_since_admit',
                                 'seq_num_in_enc', 'pat_class', 'base_pat_class_c', 'pat_lvl_of_care_c',
                                 'pat_lv_of_care', 'event_type', 'pat_service']]

view_df(first_ips_save)

Index(['pat_enc_csn_id_coded', 'anon_id', 'ed_visit_time', 'time_before_admit',
       'hours_in_ed', 'effective_time_jittered_utc', 'seq_num_in_enc',
       'pat_class', 'base_pat_class_c', 'pat_lvl_of_care_c', 'pat_lv_of_care',
       'event_type', 'pat_service', 'department_id', 'admit_time',
       'time_since_admit', 'prev_emerg', 'curr_inpatient', 'continued',
       'first_ip'],
      dtype='object')


Unnamed: 0,pat_enc_csn_id_coded,anon_id,admit_time,ed_visit_time,time_before_admit,hours_in_ed,effective_time_jittered_utc,time_since_admit,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,pat_lv_of_care,event_type,pat_service
0,131062572931,JC1170548,2015-01-02 03:48:00+00:00,2015-01-01 18:40:00+00:00,0 days 09:08:00,9.13,2015-01-02 03:48:00+00:00,0 days,5,Inpatient,1.0,6.0,Intermediate Care - With Cardiac Monitor,Transfer In,General Medicine (University)
1,131062745090,JC913990,2015-01-02 05:53:00+00:00,2015-01-02 01:56:00+00:00,0 days 03:57:00,3.95,2015-01-02 05:53:00+00:00,0 days,9,Inpatient,1.0,5.0,Acute Care (Assessment or intervention q4-8),Transfer In,Orthopaedic Surgery
2,131062927111,JC529112,2015-01-05 03:20:00+00:00,2015-01-04 18:13:00+00:00,0 days 09:07:00,9.12,2015-01-05 03:20:00+00:00,0 days,7,Inpatient,1.0,8.0,Critical Care,Transfer In,Critical Care
3,131063006922,JC1702404,2015-01-06 14:47:00+00:00,2015-01-06 09:04:00+00:00,0 days 05:43:00,5.72,2015-01-06 14:47:00+00:00,0 days,7,Inpatient,1.0,6.0,Intermediate Care - With Cardiac Monitor,Transfer In,General Medicine (University)
4,131063022232,JC523028,2015-01-03 21:24:00+00:00,2015-01-03 14:51:00+00:00,0 days 06:33:00,6.55,2015-01-03 21:24:00+00:00,0 days,5,Inpatient,1.0,8.0,Critical Care,Transfer In,Critical Care


## Look at last Emergeny Services Pat Service

In [180]:
# mark the events with some labels that make things easier later

# **assumes dataframe is sorted by time

# mark whether next entry is inpatient
adt_admit['next_ip'] = adt_admit.pat_class.shift(-1) == 'Inpatient'
# mark whether current event has pat class Inpatient
adt_admit['curr_emerg'] = adt_admit.pat_class == 'Emergency Services'
# mark whether current event is continued csn as previous
adt_admit['not_continued'] = ~adt_admit.pat_enc_csn_id_coded.eq(
    adt_admit.pat_enc_csn_id_coded.shift(-1))

# find our cases by taking the AND of these columns
cols = ['next_ip', 'curr_emerg', 'continued']
adt_admit['last_emerg'] = adt_admit.curr_emerg & (adt_admit.next_ip | adt_admit.not_continued) 

adt_admit[['pat_enc_csn_id_coded', 'pat_class', 'last_emerg'] + cols].head(20)

Unnamed: 0,pat_enc_csn_id_coded,pat_class,last_emerg,next_ip,curr_emerg,continued
616891,131062572931,Emergency Services,False,False,True,False
616892,131062572931,Emergency Services,False,False,True,True
616890,131062572931,Emergency Services,False,False,True,True
616889,131062572931,Emergency Services,True,True,True,True
616893,131062572931,Inpatient,False,True,False,True
616894,131062572931,Inpatient,False,True,False,True
616895,131062572931,Inpatient,False,True,False,True
616896,131062572931,Inpatient,False,True,False,True
616897,131062572931,Inpatient,False,False,False,True
884832,131062745090,Emergency Services,False,False,True,False


In [181]:
last_emergs = adt_admit[adt_admit.last_emerg == True]

# take the first one for each csn
last_emergs_keep = last_emergs.sort_values(['pat_enc_csn_id_coded', 'seq_num_in_enc']).groupby('pat_enc_csn_id_coded').first().reset_index()

# now we have one event per person
print(last_emergs_keep.pat_enc_csn_id_coded.nunique())
print(last_emergs_keep.shape)

view_df(last_emergs)

52532
(52532, 24)


Unnamed: 0,anon_id,pat_enc_csn_id_coded,ed_visit_time,time_before_admit,hours_in_ed,effective_time_jittered_utc,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,...,admit_time,time_since_admit,prev_emerg,curr_inpatient,continued,first_ip,next_ip,curr_emerg,not_continued,last_emerg
616889,JC1170548,131062572931,2015-01-01 18:40:00+00:00,0 days 09:08:00,9.13,2015-01-02 03:48:00+00:00,4,Emergency Services,,,...,2015-01-02 03:48:00+00:00,0 days,True,False,True,False,True,True,False,True
884839,JC913990,131062745090,2015-01-02 01:56:00+00:00,0 days 03:57:00,3.95,2015-01-02 05:53:00+00:00,8,Emergency Services,,,...,2015-01-02 05:53:00+00:00,0 days,True,False,True,False,True,True,False,True
704693,JC529112,131062927111,2015-01-04 18:13:00+00:00,0 days 09:07:00,9.12,2015-01-05 03:20:00+00:00,6,Emergency Services,,,...,2015-01-05 03:20:00+00:00,0 days,True,False,True,False,True,True,False,True
773455,JC1702404,131063006922,2015-01-06 09:04:00+00:00,0 days 05:43:00,5.72,2015-01-06 14:47:00+00:00,6,Emergency Services,,,...,2015-01-06 14:47:00+00:00,0 days,True,False,True,False,True,True,False,True
128420,JC523028,131063022232,2015-01-03 14:51:00+00:00,0 days 06:33:00,6.55,2015-01-03 21:24:00+00:00,4,Emergency Services,,,...,2015-01-03 21:24:00+00:00,0 days,True,False,True,False,True,True,False,True


In [182]:
print(last_emergs_keep.columns)

last_emerg_save = last_emergs_keep[['pat_enc_csn_id_coded', 'anon_id', 'admit_time', 'ed_visit_time', 
                                 'time_before_admit', 'hours_in_ed', 'effective_time_jittered_utc',
                                 'time_since_admit',
                                 'seq_num_in_enc', 'pat_class', 'base_pat_class_c', 'pat_lvl_of_care_c',
                                 'pat_lv_of_care', 'event_type', 'pat_service']]

view_df(last_emerg_save)

Index(['pat_enc_csn_id_coded', 'anon_id', 'ed_visit_time', 'time_before_admit',
       'hours_in_ed', 'effective_time_jittered_utc', 'seq_num_in_enc',
       'pat_class', 'base_pat_class_c', 'pat_lvl_of_care_c', 'pat_lv_of_care',
       'event_type', 'pat_service', 'department_id', 'admit_time',
       'time_since_admit', 'prev_emerg', 'curr_inpatient', 'continued',
       'first_ip', 'next_ip', 'curr_emerg', 'not_continued', 'last_emerg'],
      dtype='object')


Unnamed: 0,pat_enc_csn_id_coded,anon_id,admit_time,ed_visit_time,time_before_admit,hours_in_ed,effective_time_jittered_utc,time_since_admit,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,pat_lv_of_care,event_type,pat_service
0,131062572931,JC1170548,2015-01-02 03:48:00+00:00,2015-01-01 18:40:00+00:00,0 days 09:08:00,9.13,2015-01-02 03:48:00+00:00,0 days,4,Emergency Services,,,,Transfer Out,Emergency
1,131062745090,JC913990,2015-01-02 05:53:00+00:00,2015-01-02 01:56:00+00:00,0 days 03:57:00,3.95,2015-01-02 05:53:00+00:00,0 days,8,Emergency Services,,,,Transfer Out,Emergency
2,131062927111,JC529112,2015-01-05 03:20:00+00:00,2015-01-04 18:13:00+00:00,0 days 09:07:00,9.12,2015-01-05 03:20:00+00:00,0 days,6,Emergency Services,,,,Transfer Out,Emergency
3,131063006922,JC1702404,2015-01-06 14:47:00+00:00,2015-01-06 09:04:00+00:00,0 days 05:43:00,5.72,2015-01-06 14:47:00+00:00,0 days,6,Emergency Services,,,,Transfer Out,Emergency
4,131063022232,JC523028,2015-01-03 21:24:00+00:00,2015-01-03 14:51:00+00:00,0 days 06:33:00,6.55,2015-01-03 21:24:00+00:00,0 days,4,Emergency Services,,,,Transfer Out,Emergency


# Pat Services Counts

Count the number of pat services before time 0 (admit to Inpatient)

In [183]:
# get events that occur before admit
adt_before_admit = adt_admit[adt_admit.time_since_admit <= timedelta(0)].sort_values(['pat_enc_csn_id_coded', 'seq_num_in_enc'])


# keep only those that change
# filter for those with pat_services that change from prior event & belong to same csn
change_care = adt_before_admit[~(adt_before_admit.pat_service.eq(adt_before_admit.pat_service.shift()) &
                         adt_before_admit.pat_enc_csn_id_coded.eq(adt_before_admit.pat_enc_csn_id_coded.shift()))]

view_df(change_care)

Unnamed: 0,anon_id,pat_enc_csn_id_coded,ed_visit_time,time_before_admit,hours_in_ed,effective_time_jittered_utc,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,...,admit_time,time_since_admit,prev_emerg,curr_inpatient,continued,first_ip,next_ip,curr_emerg,not_continued,last_emerg
616891,JC1170548,131062572931,2015-01-01 18:40:00+00:00,0 days 09:08:00,9.13,2015-01-01 18:40:00+00:00,1,Emergency Services,3.0,,...,2015-01-02 03:48:00+00:00,-1 days +14:52:00,False,False,False,False,False,True,False,False
616893,JC1170548,131062572931,2015-01-01 18:40:00+00:00,0 days 09:08:00,9.13,2015-01-02 03:48:00+00:00,5,Inpatient,1.0,6.0,...,2015-01-02 03:48:00+00:00,0 days 00:00:00,True,True,True,True,True,False,False,False
884832,JC913990,131062745090,2015-01-02 01:56:00+00:00,0 days 03:57:00,3.95,2015-01-02 01:56:00+00:00,1,Emergency Services,3.0,,...,2015-01-02 05:53:00+00:00,-1 days +20:03:00,False,False,False,False,False,True,False,False
884841,JC913990,131062745090,2015-01-02 01:56:00+00:00,0 days 03:57:00,3.95,2015-01-02 05:53:00+00:00,9,Inpatient,1.0,5.0,...,2015-01-02 05:53:00+00:00,0 days 00:00:00,True,True,True,True,True,False,False,False
704696,JC529112,131062927111,2015-01-04 18:13:00+00:00,0 days 09:07:00,9.12,2015-01-04 18:13:00+00:00,1,Emergency Services,3.0,,...,2015-01-05 03:20:00+00:00,-1 days +14:53:00,False,False,False,False,False,True,False,False


In [184]:
# get the trajectory of pat_service before admit and count the number of changes that occur

# get the trajectory for each csn
trajectory = change_care[['pat_enc_csn_id_coded', 'pat_service']].groupby('pat_enc_csn_id_coded')['pat_service'].apply(list).reset_index(name='trajectory')

view_df(trajectory)

Unnamed: 0,pat_enc_csn_id_coded,trajectory
0,131062572931,"[Emergency, General Medicine (University)]"
1,131062745090,"[Emergency, Orthopaedic Surgery]"
2,131062927111,"[Emergency, Critical Care]"
3,131063006922,"[Emergency, General Medicine (University)]"
4,131063022232,"[Emergency, Critical Care]"


In [185]:
# add a trajectory string column and also trajectory count
trajectory['trajectory_string'] = [' -> '.join(map(str, l)) for l in trajectory['trajectory']]
trajectory['trajectory_length'] = trajectory['trajectory'].str.len()

view_df(trajectory)

Unnamed: 0,pat_enc_csn_id_coded,trajectory,trajectory_string,trajectory_length
0,131062572931,"[Emergency, General Medicine (University)]",Emergency -> General Medicine (University),2
1,131062745090,"[Emergency, Orthopaedic Surgery]",Emergency -> Orthopaedic Surgery,2
2,131062927111,"[Emergency, Critical Care]",Emergency -> Critical Care,2
3,131063006922,"[Emergency, General Medicine (University)]",Emergency -> General Medicine (University),2
4,131063022232,"[Emergency, Critical Care]",Emergency -> Critical Care,2


In [186]:
# save this file
savefile = outdir + "10_pat_service_trajectory_before_admit.csv"
trajectory.to_csv(savefile, index=False)

# Pat lv of care

Look at pat lv of care when patients are admitted

In [187]:
adt_admit.columns

Index(['anon_id', 'pat_enc_csn_id_coded', 'ed_visit_time', 'time_before_admit',
       'hours_in_ed', 'effective_time_jittered_utc', 'seq_num_in_enc',
       'pat_class', 'base_pat_class_c', 'pat_lvl_of_care_c', 'pat_lv_of_care',
       'event_type', 'pat_service', 'department_id', 'admit_time',
       'time_since_admit', 'prev_emerg', 'curr_inpatient', 'continued',
       'first_ip', 'next_ip', 'curr_emerg', 'not_continued', 'last_emerg'],
      dtype='object')

In [188]:
# filter down to 24hrs after admit
# adt_admit['time_since_admit'] = adt_admit.effective_time_jittered_utc - adt_admit.admit_time

adt_results_24hr = adt_admit[(adt_admit.time_since_admit <= timedelta(hours=24)) &
                             (adt_admit.time_since_admit >= timedelta(hours=0))
                            ]

view_df(adt_results_24hr)

Unnamed: 0,anon_id,pat_enc_csn_id_coded,ed_visit_time,time_before_admit,hours_in_ed,effective_time_jittered_utc,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,...,admit_time,time_since_admit,prev_emerg,curr_inpatient,continued,first_ip,next_ip,curr_emerg,not_continued,last_emerg
616889,JC1170548,131062572931,2015-01-01 18:40:00+00:00,0 days 09:08:00,9.13,2015-01-02 03:48:00+00:00,4,Emergency Services,,,...,2015-01-02 03:48:00+00:00,0 days 00:00:00,True,False,True,False,True,True,False,True
616893,JC1170548,131062572931,2015-01-01 18:40:00+00:00,0 days 09:08:00,9.13,2015-01-02 03:48:00+00:00,5,Inpatient,1.0,6.0,...,2015-01-02 03:48:00+00:00,0 days 00:00:00,True,True,True,True,True,False,False,False
616894,JC1170548,131062572931,2015-01-01 18:40:00+00:00,0 days 09:08:00,9.13,2015-01-02 07:59:00+00:00,6,Inpatient,,6.0,...,2015-01-02 03:48:00+00:00,0 days 04:11:00,False,True,True,False,True,False,False,False
884839,JC913990,131062745090,2015-01-02 01:56:00+00:00,0 days 03:57:00,3.95,2015-01-02 05:53:00+00:00,8,Emergency Services,,,...,2015-01-02 05:53:00+00:00,0 days 00:00:00,True,False,True,False,True,True,False,True
884841,JC913990,131062745090,2015-01-02 01:56:00+00:00,0 days 03:57:00,3.95,2015-01-02 05:53:00+00:00,9,Inpatient,1.0,5.0,...,2015-01-02 05:53:00+00:00,0 days 00:00:00,True,True,True,True,True,False,False,False


In [189]:
adt_admit[adt_admit.pat_enc_csn_id_coded == 131283158395][['pat_enc_csn_id_coded', 'effective_time_jittered_utc',
                                                          'seq_num_in_enc', 'pat_class', 'pat_lv_of_care',
                                                          'event_type', 'pat_service', 'admit_time',
                                                          'time_since_admit']]

Unnamed: 0,pat_enc_csn_id_coded,effective_time_jittered_utc,seq_num_in_enc,pat_class,pat_lv_of_care,event_type,pat_service,admit_time,time_since_admit
224005,131283158395,2020-01-22 11:27:00+00:00,1,Emergency Services,,Admission,Emergency,2020-01-22 15:35:00+00:00,-1 days +19:52:00
224007,131283158395,2020-01-22 11:35:00+00:00,2,Emergency Services,,Transfer Out,Emergency,2020-01-22 15:35:00+00:00,-1 days +20:00:00
224006,131283158395,2020-01-22 11:35:00+00:00,3,Emergency Services,,Transfer In,Emergency,2020-01-22 15:35:00+00:00,-1 days +20:00:00
224011,131283158395,2020-01-22 12:46:00+00:00,4,Emergency Services,,Patient Update,Emergency Medicine,2020-01-22 15:35:00+00:00,-1 days +21:11:00
224008,131283158395,2020-01-22 15:35:00+00:00,5,Inpatient,Acute Care (Assessment or intervention q4-8),Patient Update,Emergency Medicine,2020-01-22 15:35:00+00:00,0 days 00:00:00
224012,131283158395,2020-01-22 17:32:00+00:00,6,Inpatient,Acute Care (Assessment or intervention q4-8),Transfer Out,Emergency Medicine,2020-01-22 15:35:00+00:00,0 days 01:57:00
224009,131283158395,2020-01-22 17:32:00+00:00,7,Inpatient,Acute Care (Assessment or intervention q4-8),Transfer In,Emergency Medicine,2020-01-22 15:35:00+00:00,0 days 01:57:00
224010,131283158395,2020-01-22 17:39:00+00:00,8,Inpatient,Acute Care (Assessment or intervention q4-8),Transfer Out,Emergency Medicine,2020-01-22 15:35:00+00:00,0 days 02:04:00
224000,131283158395,2020-01-22 17:39:00+00:00,9,Inpatient,Acute Care (Assessment or intervention q4-8),Transfer In,Medicine,2020-01-22 15:35:00+00:00,0 days 02:04:00
223994,131283158395,2020-01-23 07:59:00+00:00,10,Inpatient,Acute Care (Assessment or intervention q4-8),Census,Medicine,2020-01-22 15:35:00+00:00,0 days 16:24:00


# Adding some additional columns for length of stay in ED

In [190]:
discharge_event.dtypes

anon_id                                     object
pat_enc_csn_id_coded                         int64
ed_visit_time                               object
time_before_admit                           object
hours_in_ed                                float64
effective_time_jittered_utc    datetime64[ns, UTC]
seq_num_in_enc                               int64
pat_class                                   object
base_pat_class_c                           float64
pat_lvl_of_care_c                          float64
pat_lv_of_care                              object
event_type                                  object
pat_service                                 object
department_id                              float64
admit_time                     datetime64[ns, UTC]
time_since_admit                   timedelta64[ns]
prev_emerg                                    bool
curr_inpatient                                bool
continued                                     bool
first_ip                       

In [191]:
# add length from ED entry until discharge

# get time of discharge
# last_event = adt_ed.sort_values(['pat_enc_csn_id_coded', 'seq_num_in_enc']).groupby('pat_enc_csn_id_coded').last().reset_index()

# print("\n", last_event.pat_service.value_counts())
# print("\n", last_event.event_type.value_counts())

# last_event[last_event.event_type == 'Census']

# let's use the actual discharge event instead

# USE adt_admit instead of adt_ed 
discharge_event = adt_admit[adt_admit.event_type == 'Discharge']

# make sure we keep all csns
print(adt_admit.pat_enc_csn_id_coded.nunique())
print(discharge_event.pat_enc_csn_id_coded.nunique())

print("\n", discharge_event.pat_service.value_counts())
print("\n", discharge_event.event_type.value_counts())

# not everyone has a Discharge event
missing_discharge = set(adt_admit.pat_enc_csn_id_coded) - set(discharge_event.pat_enc_csn_id_coded)
len(missing_discharge)

missing_discharge

discharge_event['discharge_time'] = discharge_event.effective_time_jittered_utc
discharge_event['length_of_ip_since_admit'] = discharge_event.time_since_admit
# ed_visit_time is first_ED_time 
discharge_event['length_from_ED_entry_until_discharge'] = discharge_event.discharge_time - discharge_event.ed_visit_time

discharge_event.columns


52532
52369

 Medicine                              12356
General Medicine (University)          8232
Cardiology                             3460
General Medicine (PAMF)                3422
Psychiatry                             3405
                                      ...  
Geriatrics                                1
Treatment/Procedure                       1
Outpatient Surgery at Redwood City        1
Hospice                                   1
Dentistry                                 1
Name: pat_service, Length: 77, dtype: int64

 Discharge    52369
Name: event_type, dtype: int64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  discharge_event['discharge_time'] = discharge_event.effective_time_jittered_utc
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  discharge_event['length_of_ip_since_admit'] = discharge_event.time_since_admit
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  discharge_event['length_from_ED_entry_until_di

Index(['anon_id', 'pat_enc_csn_id_coded', 'ed_visit_time', 'time_before_admit',
       'hours_in_ed', 'effective_time_jittered_utc', 'seq_num_in_enc',
       'pat_class', 'base_pat_class_c', 'pat_lvl_of_care_c', 'pat_lv_of_care',
       'event_type', 'pat_service', 'department_id', 'admit_time',
       'time_since_admit', 'prev_emerg', 'curr_inpatient', 'continued',
       'first_ip', 'next_ip', 'curr_emerg', 'not_continued', 'last_emerg',
       'discharge_time', 'length_of_ip_since_admit',
       'length_from_ED_entry_until_discharge'],
      dtype='object')

In [193]:
# add the csns that were missing a discharge event
# USE adt_admit instead of adt_ed, and ed_visit_time instead of first_ED_time
cns_cols = adt_admit[['anon_id', 'pat_enc_csn_id_coded', 'admit_time', 
       'ed_visit_time']].drop_duplicates() #

all_csns = cns_cols.merge(discharge_event, how='left')
view_df(all_csns)

Unnamed: 0,anon_id,pat_enc_csn_id_coded,admit_time,ed_visit_time,time_before_admit,hours_in_ed,effective_time_jittered_utc,seq_num_in_enc,pat_class,base_pat_class_c,...,curr_inpatient,continued,first_ip,next_ip,curr_emerg,not_continued,last_emerg,discharge_time,length_of_ip_since_admit,length_from_ED_entry_until_discharge
0,JC1170548,131062572931,2015-01-02 03:48:00+00:00,2015-01-01 18:40:00+00:00,0 days 09:08:00,9.13,2015-01-03 21:52:00+00:00,9.0,Inpatient,,...,True,True,False,False,False,True,False,2015-01-03 21:52:00+00:00,1 days 18:04:00,2 days 03:12:00
1,JC913990,131062745090,2015-01-02 05:53:00+00:00,2015-01-02 01:56:00+00:00,0 days 03:57:00,3.95,2015-01-06 02:00:00+00:00,25.0,Inpatient,,...,True,True,False,False,False,True,False,2015-01-06 02:00:00+00:00,3 days 20:07:00,4 days 00:04:00
2,JC529112,131062927111,2015-01-05 03:20:00+00:00,2015-01-04 18:13:00+00:00,0 days 09:07:00,9.12,2015-01-09 00:22:00+00:00,16.0,Inpatient,,...,True,True,False,False,False,True,False,2015-01-09 00:22:00+00:00,3 days 21:02:00,4 days 06:09:00
3,JC1702404,131063006922,2015-01-06 14:47:00+00:00,2015-01-06 09:04:00+00:00,0 days 05:43:00,5.72,2015-01-19 23:12:00+00:00,34.0,Inpatient,,...,True,True,False,False,False,True,False,2015-01-19 23:12:00+00:00,13 days 08:25:00,13 days 14:08:00
4,JC523028,131063022232,2015-01-03 21:24:00+00:00,2015-01-03 14:51:00+00:00,0 days 06:33:00,6.55,2015-01-06 22:02:00+00:00,13.0,Inpatient,,...,True,True,False,False,False,True,False,2015-01-06 22:02:00+00:00,3 days 00:38:00,3 days 07:11:00


In [195]:
print(all_csns.pat_enc_csn_id_coded.nunique())
all_csns.columns

52532


Index(['anon_id', 'pat_enc_csn_id_coded', 'admit_time', 'ed_visit_time',
       'time_before_admit', 'hours_in_ed', 'effective_time_jittered_utc',
       'seq_num_in_enc', 'pat_class', 'base_pat_class_c', 'pat_lvl_of_care_c',
       'pat_lv_of_care', 'event_type', 'pat_service', 'department_id',
       'time_since_admit', 'prev_emerg', 'curr_inpatient', 'continued',
       'first_ip', 'next_ip', 'curr_emerg', 'not_continued', 'last_emerg',
       'discharge_time', 'length_of_ip_since_admit',
       'length_from_ED_entry_until_discharge'],
      dtype='object')

In [198]:
# keep just columns that we care about
length_of_stay_labels = all_csns[['anon_id', 'pat_enc_csn_id_coded', 'admit_time',
                                  'ed_visit_time', 'time_before_admit', 'hours_in_ed', 
                                  'discharge_time', 'length_of_ip_since_admit',
                                  'length_from_ED_entry_until_discharge']].drop_duplicates()
# view_df(length_of_stay_labels)

In [199]:
length_of_stay_labels.head(3)

Unnamed: 0,anon_id,pat_enc_csn_id_coded,admit_time,ed_visit_time,time_before_admit,hours_in_ed,discharge_time,length_of_ip_since_admit,length_from_ED_entry_until_discharge
0,JC1170548,131062572931,2015-01-02 03:48:00+00:00,2015-01-01 18:40:00+00:00,0 days 09:08:00,9.13,2015-01-03 21:52:00+00:00,1 days 18:04:00,2 days 03:12:00
1,JC913990,131062745090,2015-01-02 05:53:00+00:00,2015-01-02 01:56:00+00:00,0 days 03:57:00,3.95,2015-01-06 02:00:00+00:00,3 days 20:07:00,4 days 00:04:00
2,JC529112,131062927111,2015-01-05 03:20:00+00:00,2015-01-04 18:13:00+00:00,0 days 09:07:00,9.12,2015-01-09 00:22:00+00:00,3 days 21:02:00,4 days 06:09:00


In [200]:
# add death during same visit column

# join the death dates, NaN means they didn't die
deaths19 = pd.read_csv("{}/cohort3_demo_deaths_2021.csv".format(datadir))
deaths19.head()

length_of_stay_death = length_of_stay_labels.merge(deaths19, how='left')
length_of_stay_death.death_date_jittered = pd.to_datetime(length_of_stay_death.death_date_jittered, utc=True)
length_of_stay_death['death_datetime'] = length_of_stay_death.death_date_jittered
length_of_stay_death.death_date_jittered = length_of_stay_death.death_date_jittered.dt.date
length_of_stay_death.head()

# find events where death date occur before discharge time
length_of_stay_death['discharge_date'] = length_of_stay_death.discharge_time.dt.date
died_during_stay = length_of_stay_death[length_of_stay_death.death_date_jittered < length_of_stay_death.discharge_date]
print(died_during_stay.pat_enc_csn_id_coded.nunique())
died_during_stay.head()

died_during_stay['died_before_discharge'] = True
died_during_stay.head()

# if died before dishcarge, make end date == death date
died_during_stay['end_date'] = died_during_stay.death_date_jittered


full_labels = length_of_stay_death.merge(died_during_stay, how='left')


544


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  died_during_stay['died_before_discharge'] = True
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  died_during_stay['end_date'] = died_during_stay.death_date_jittered


In [201]:
view_df(full_labels)
# view_df(full_labels[full_labels.died_before_discharge == True])

Unnamed: 0,anon_id,pat_enc_csn_id_coded,admit_time,ed_visit_time,time_before_admit,hours_in_ed,discharge_time,length_of_ip_since_admit,length_from_ED_entry_until_discharge,death_date_jittered,death_datetime,discharge_date,died_before_discharge,end_date
0,JC1170548,131062572931,2015-01-02 03:48:00+00:00,2015-01-01 18:40:00+00:00,0 days 09:08:00,9.13,2015-01-03 21:52:00+00:00,1 days 18:04:00,2 days 03:12:00,NaT,NaT,2015-01-03,,
1,JC913990,131062745090,2015-01-02 05:53:00+00:00,2015-01-02 01:56:00+00:00,0 days 03:57:00,3.95,2015-01-06 02:00:00+00:00,3 days 20:07:00,4 days 00:04:00,NaT,NaT,2015-01-06,,
2,JC529112,131062927111,2015-01-05 03:20:00+00:00,2015-01-04 18:13:00+00:00,0 days 09:07:00,9.12,2015-01-09 00:22:00+00:00,3 days 21:02:00,4 days 06:09:00,NaT,NaT,2015-01-09,,
3,JC1702404,131063006922,2015-01-06 14:47:00+00:00,2015-01-06 09:04:00+00:00,0 days 05:43:00,5.72,2015-01-19 23:12:00+00:00,13 days 08:25:00,13 days 14:08:00,2015-01-19,2015-01-19 00:00:00+00:00,2015-01-19,,
4,JC523028,131063022232,2015-01-03 21:24:00+00:00,2015-01-03 14:51:00+00:00,0 days 06:33:00,6.55,2015-01-06 22:02:00+00:00,3 days 00:38:00,3 days 07:11:00,NaT,NaT,2015-01-06,,


In [202]:
def set_end_date(row):
    # if discharge == NULL, death == NULL, then end_date = NULL
    if (pd.isnull(row.death_date_jittered) & pd.isnull(row.discharge_date)):
        return(None)
    # if death == NULL, then end_date = discharge date
    if (pd.isnull(row.death_date_jittered) & ~pd.isnull(row.discharge_date)):
        return(row.discharge_date)
    # if discharge == NULL, then end_date = death_date
    if (~pd.isnull(row.death_date_jittered) & pd.isnull(row.discharge_date)):
        return(row.death_date_jittered)
    # if discharge and death dates exist, take earlier
    if (~pd.isnull(row.death_date_jittered) & ~pd.isnull(row.discharge_date)):
        if (pd.isnull(row.died_before_discharge)):
            return(row.discharge_date)
        else:
            return(row.death_date_jittered)
    # should never reach here
    return(None)
    


full_labels['end_date'] = full_labels.apply(lambda row: set_end_date(row), axis=1)

In [203]:
# check each case
# view_df(full_labels[pd.isnull(full_labels.death_date_jittered) & pd.isnull(full_labels.discharge_date)])

# view_df(full_labels[pd.isnull(full_labels.death_date_jittered) & (~pd.isnull(full_labels.discharge_date))])

# view_df(full_labels[~pd.isnull(full_labels.death_date_jittered) & pd.isnull(full_labels.discharge_date)])

# view_df(full_labels[~pd.isnull(full_labels.death_date_jittered) & 
#         ~pd.isnull(full_labels.discharge_date) & 
#         pd.isnull(full_labels.died_before_discharge)])

view_df(full_labels[~pd.isnull(full_labels.death_date_jittered) & 
        ~pd.isnull(full_labels.discharge_date) & 
        ~pd.isnull(full_labels.died_before_discharge)])

# each case looks good, so continue

Unnamed: 0,anon_id,pat_enc_csn_id_coded,admit_time,ed_visit_time,time_before_admit,hours_in_ed,discharge_time,length_of_ip_since_admit,length_from_ED_entry_until_discharge,death_date_jittered,death_datetime,discharge_date,died_before_discharge,end_date
5,JC1577478,131063110906,2015-01-07 13:06:00+00:00,2015-01-07 06:11:00+00:00,0 days 06:55:00,6.92,2015-01-12 03:30:00+00:00,4 days 14:24:00,4 days 21:19:00,2015-01-11,2015-01-11 00:00:00+00:00,2015-01-12,True,2015-01-11
21,JC1742795,131063593641,2015-01-07 01:17:00+00:00,2015-01-06 18:39:00+00:00,0 days 06:38:00,6.63,2015-01-07 12:45:00+00:00,0 days 11:28:00,0 days 18:06:00,2015-01-06,2015-01-06 00:00:00+00:00,2015-01-07,True,2015-01-06
158,JC1290431,131064726530,2015-02-01 07:30:00+00:00,2015-01-31 05:22:00+00:00,1 days 02:08:00,26.13,2015-02-12 03:38:00+00:00,10 days 20:08:00,11 days 22:16:00,2015-02-11,2015-02-11 00:00:00+00:00,2015-02-12,True,2015-02-11
231,JC1307552,131065058683,2015-01-12 09:06:00+00:00,2015-01-11 19:14:00+00:00,0 days 13:52:00,13.87,2015-01-15 00:20:00+00:00,2 days 15:14:00,3 days 05:06:00,2015-01-14,2015-01-14 00:00:00+00:00,2015-01-15,True,2015-01-14
445,JC1158421,131066343470,2015-01-13 08:00:00+00:00,2015-01-13 02:57:00+00:00,0 days 05:03:00,5.05,2015-01-31 05:00:00+00:00,17 days 21:00:00,18 days 02:03:00,2015-01-30,2015-01-30 00:00:00+00:00,2015-01-31,True,2015-01-30


In [205]:
# create difference columns with dates instead
# USE ed_visit_time for first_ED_time
full_labels['length_of_ip_since_admit'] = full_labels.end_date - full_labels.admit_time.dt.date
full_labels['length_from_ED_entry_until_end_date'] = full_labels.end_date - full_labels.ed_visit_time.dt.date

print(full_labels.columns)
full_labels

Index(['anon_id', 'pat_enc_csn_id_coded', 'admit_time', 'ed_visit_time',
       'time_before_admit', 'hours_in_ed', 'discharge_time',
       'length_of_ip_since_admit', 'length_from_ED_entry_until_discharge',
       'death_date_jittered', 'death_datetime', 'discharge_date',
       'died_before_discharge', 'end_date',
       'length_from_ED_entry_until_end_date'],
      dtype='object')


Unnamed: 0,anon_id,pat_enc_csn_id_coded,admit_time,ed_visit_time,time_before_admit,hours_in_ed,discharge_time,length_of_ip_since_admit,length_from_ED_entry_until_discharge,death_date_jittered,death_datetime,discharge_date,died_before_discharge,end_date,length_from_ED_entry_until_end_date
0,JC1170548,131062572931,2015-01-02 03:48:00+00:00,2015-01-01 18:40:00+00:00,0 days 09:08:00,9.13,2015-01-03 21:52:00+00:00,1 days,2 days 03:12:00,NaT,NaT,2015-01-03,,2015-01-03,2 days
1,JC913990,131062745090,2015-01-02 05:53:00+00:00,2015-01-02 01:56:00+00:00,0 days 03:57:00,3.95,2015-01-06 02:00:00+00:00,4 days,4 days 00:04:00,NaT,NaT,2015-01-06,,2015-01-06,4 days
2,JC529112,131062927111,2015-01-05 03:20:00+00:00,2015-01-04 18:13:00+00:00,0 days 09:07:00,9.12,2015-01-09 00:22:00+00:00,4 days,4 days 06:09:00,NaT,NaT,2015-01-09,,2015-01-09,5 days
3,JC1702404,131063006922,2015-01-06 14:47:00+00:00,2015-01-06 09:04:00+00:00,0 days 05:43:00,5.72,2015-01-19 23:12:00+00:00,13 days,13 days 14:08:00,2015-01-19,2015-01-19 00:00:00+00:00,2015-01-19,,2015-01-19,13 days
4,JC523028,131063022232,2015-01-03 21:24:00+00:00,2015-01-03 14:51:00+00:00,0 days 06:33:00,6.55,2015-01-06 22:02:00+00:00,3 days,3 days 07:11:00,NaT,NaT,2015-01-06,,2015-01-06,3 days
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
52527,JC1728292,131320698064,2021-09-28 02:29:00+00:00,2021-09-28 00:00:00+00:00,,,NaT,NaT,NaT,NaT,NaT,NaT,,,NaT
52528,JC644080,131320709282,2021-09-29 04:32:00+00:00,2021-09-29 02:49:00+00:00,,,NaT,NaT,NaT,NaT,NaT,NaT,,,NaT
52529,JC620968,131320753559,2021-09-30 04:59:00+00:00,2021-09-29 20:44:00+00:00,,,NaT,NaT,NaT,NaT,NaT,NaT,,,NaT
52530,JC3000863,131320788661,2021-09-29 10:52:00+00:00,2021-09-29 06:30:00+00:00,,,NaT,NaT,NaT,NaT,NaT,NaT,,,NaT


In [206]:
# save the final labels
# USE ed_visit_time for ed_visit_time
keep_cols = ['anon_id', 'pat_enc_csn_id_coded', 'admit_time', 
             'ed_visit_time', 'time_before_admit', 'hours_in_ed',
             'discharge_time', 'end_date', 'length_of_ip_since_admit', 
             'length_from_ED_entry_until_end_date',
             'death_date_jittered', 'discharge_date', 'died_before_discharge',]
                                   

full_labels[keep_cols].to_csv(outdir + "10_time_features.csv", index=False)

In [212]:
# full_labels[full_labels.pat_enc_csn_id_coded == 131284409583]
full_labels.head()

Unnamed: 0,anon_id,pat_enc_csn_id_coded,admit_time,ed_visit_time,time_before_admit,hours_in_ed,discharge_time,length_of_ip_since_admit,length_from_ED_entry_until_discharge,death_date_jittered,death_datetime,discharge_date,died_before_discharge,end_date,length_from_ED_entry_until_end_date
0,JC1170548,131062572931,2015-01-02 03:48:00+00:00,2015-01-01 18:40:00+00:00,0 days 09:08:00,9.13,2015-01-03 21:52:00+00:00,1 days,2 days 03:12:00,NaT,NaT,2015-01-03,,2015-01-03,2 days
1,JC913990,131062745090,2015-01-02 05:53:00+00:00,2015-01-02 01:56:00+00:00,0 days 03:57:00,3.95,2015-01-06 02:00:00+00:00,4 days,4 days 00:04:00,NaT,NaT,2015-01-06,,2015-01-06,4 days
2,JC529112,131062927111,2015-01-05 03:20:00+00:00,2015-01-04 18:13:00+00:00,0 days 09:07:00,9.12,2015-01-09 00:22:00+00:00,4 days,4 days 06:09:00,NaT,NaT,2015-01-09,,2015-01-09,5 days
3,JC1702404,131063006922,2015-01-06 14:47:00+00:00,2015-01-06 09:04:00+00:00,0 days 05:43:00,5.72,2015-01-19 23:12:00+00:00,13 days,13 days 14:08:00,2015-01-19,2015-01-19 00:00:00+00:00,2015-01-19,,2015-01-19,13 days
4,JC523028,131063022232,2015-01-03 21:24:00+00:00,2015-01-03 14:51:00+00:00,0 days 06:33:00,6.55,2015-01-06 22:02:00+00:00,3 days,3 days 07:11:00,NaT,NaT,2015-01-06,,2015-01-06,3 days


In [37]:
# find difference

In [210]:
print(full_labels.shape)
full_labels.pat_enc_csn_id_coded.nunique()

(52532, 15)


52532

# Back to pat lv of care labels

In [213]:
# filter to inpatient events after admit time
ip_events = adt_results_24hr[(adt_results_24hr.admit_time <= adt_results_24hr.effective_time_jittered_utc) &
                     (adt_results_24hr.pat_class == 'Inpatient')]

ip_events = ip_events[ip_events.pat_enc_csn_id_coded.isin(cohort.pat_enc_csn_id_coded)]

ip_events.pat_enc_csn_id_coded.nunique()

52532

In [214]:
# keep only change of lv of care status

# sort by sequence 
ip_events = ip_events.sort_values(['pat_enc_csn_id_coded', 'seq_num_in_enc'])

# keep only those that change
change_care = ip_events[~(ip_events.pat_lv_of_care.eq(ip_events.pat_lv_of_care.shift()) &
                         ip_events.pat_enc_csn_id_coded.eq(ip_events.pat_enc_csn_id_coded.shift()))]

change_care.pat_enc_csn_id_coded.nunique()

52532

In [215]:
change_care.size

1461216

In [216]:
view_df(change_care[change_care.pat_enc_csn_id_coded.eq(change_care.shift().pat_enc_csn_id_coded)])

Unnamed: 0,anon_id,pat_enc_csn_id_coded,ed_visit_time,time_before_admit,hours_in_ed,effective_time_jittered_utc,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,...,admit_time,time_since_admit,prev_emerg,curr_inpatient,continued,first_ip,next_ip,curr_emerg,not_continued,last_emerg
948286,JC626278,131064103797,2015-01-08 09:45:00+00:00,0 days 13:06:00,13.1,2015-01-09 11:40:00+00:00,12,Inpatient,,8.0,...,2015-01-08 22:51:00+00:00,0 days 12:49:00,False,True,True,False,True,False,False,False
449169,JC1729448,131064484335,2015-01-07 09:25:00+00:00,0 days 04:33:00,4.55,2015-01-08 07:43:00+00:00,7,Inpatient,,5.0,...,2015-01-07 13:58:00+00:00,0 days 17:45:00,False,True,True,False,True,False,False,False
832830,JC997878,131064522540,2015-01-11 19:34:00+00:00,0 days 03:07:00,3.12,2015-01-12 04:31:00+00:00,5,Inpatient,,6.0,...,2015-01-11 22:41:00+00:00,0 days 05:50:00,False,True,True,False,True,False,False,False
433780,JC1154492,131064558168,2015-01-10 12:32:00+00:00,0 days 11:47:00,11.78,2015-01-11 21:49:00+00:00,8,Inpatient,,5.0,...,2015-01-11 00:19:00+00:00,0 days 21:30:00,False,True,True,False,True,False,False,False
298796,JC1606131,131064616475,2015-01-28 00:35:00+00:00,0 days 03:54:00,3.9,2015-01-28 10:14:00+00:00,8,Inpatient,,6.0,...,2015-01-28 04:29:00+00:00,0 days 05:45:00,False,True,True,False,True,False,False,False


In [217]:
# get the trajectory for each csn
trajectory = change_care[['pat_enc_csn_id_coded', 'pat_lv_of_care']].groupby('pat_enc_csn_id_coded')['pat_lv_of_care'].apply(list).reset_index(name='trajectory')

view_df(trajectory)

Unnamed: 0,pat_enc_csn_id_coded,trajectory
0,131062572931,[Intermediate Care - With Cardiac Monitor]
1,131062745090,[Acute Care (Assessment or intervention q4-8)]
2,131062927111,[Critical Care]
3,131063006922,[Intermediate Care - With Cardiac Monitor]
4,131063022232,[Critical Care]


In [218]:
# add a trajectory string column and also trajectory count
trajectory['trajectory_string'] = [' -> '.join(map(str, l)) for l in trajectory['trajectory']]
trajectory['trajectory_length'] = trajectory['trajectory'].str.len()

In [219]:
view_df(trajectory[trajectory.trajectory_length > 1])

Unnamed: 0,pat_enc_csn_id_coded,trajectory,trajectory_string,trajectory_length
61,131064103797,"[Intermediate Care - No Cardiac Monitor, Criti...",Intermediate Care - No Cardiac Monitor -> Crit...,2
111,131064484335,"[Intermediate Care - With Cardiac Monitor, Acu...",Intermediate Care - With Cardiac Monitor -> Ac...,2
112,131064522540,"[Critical Care, Intermediate Care - With Cardi...",Critical Care -> Intermediate Care - With Card...,2
123,131064558168,"[Critical Care, Acute Care (Assessment or inte...",Critical Care -> Acute Care (Assessment or int...,2
135,131064616475,"[Acute Care (Assessment or intervention q4-8),...",Acute Care (Assessment or intervention q4-8) -...,2


In [221]:
# save this file
savefile = outdir + "10_trajectory.csv"
trajectory.to_csv(savefile, index=False)

# Combine the data into one dataframe

In [222]:
# format the first inpatient event
first_ip_less = first_ips_save[['anon_id', 'pat_enc_csn_id_coded',
       'pat_lv_of_care', 'event_type', 'pat_service']]

first_ip_less.rename({
                      'pat_service':'first_ip_pat_service',
                      'event_type':'first_ip_event_type',
                      'pat_lv_of_care':'first_ip_lv_of_care'
                     },
                    inplace=True, axis=1)

view_df(first_ip_less)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


Unnamed: 0,anon_id,pat_enc_csn_id_coded,first_ip_lv_of_care,first_ip_event_type,first_ip_pat_service
0,JC1170548,131062572931,Intermediate Care - With Cardiac Monitor,Transfer In,General Medicine (University)
1,JC913990,131062745090,Acute Care (Assessment or intervention q4-8),Transfer In,Orthopaedic Surgery
2,JC529112,131062927111,Critical Care,Transfer In,Critical Care
3,JC1702404,131063006922,Intermediate Care - With Cardiac Monitor,Transfer In,General Medicine (University)
4,JC523028,131063022232,Critical Care,Transfer In,Critical Care


In [223]:
# format the last emerg event
last_emerg_less = last_emerg_save[['anon_id', 'pat_enc_csn_id_coded',
       'pat_lv_of_care', 'event_type', 'pat_service']]

last_emerg_less.rename({
                      'pat_service':'last_emerg_pat_service',
                      'event_type':'last_emerg_event_type',
                      'pat_lv_of_care':'last_emerg_lv_of_care'
                     },
                    inplace=True, axis=1)

view_df(last_emerg_less)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


Unnamed: 0,anon_id,pat_enc_csn_id_coded,last_emerg_lv_of_care,last_emerg_event_type,last_emerg_pat_service
0,JC1170548,131062572931,,Transfer Out,Emergency
1,JC913990,131062745090,,Transfer Out,Emergency
2,JC529112,131062927111,,Transfer Out,Emergency
3,JC1702404,131063006922,,Transfer Out,Emergency
4,JC523028,131063022232,,Transfer Out,Emergency


In [224]:
trajectory_less = trajectory[['pat_enc_csn_id_coded', 'trajectory_string', 'trajectory_length']]

trajectory_less.rename({'trajectory_string':'trajectory'},
                    inplace=True, axis=1)

view_df(trajectory_less)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


Unnamed: 0,pat_enc_csn_id_coded,trajectory,trajectory_length
0,131062572931,Intermediate Care - With Cardiac Monitor,1
1,131062745090,Acute Care (Assessment or intervention q4-8),1
2,131062927111,Critical Care,1
3,131063006922,Intermediate Care - With Cardiac Monitor,1
4,131063022232,Critical Care,1


In [225]:
ip_emerg = first_ip_less.merge(last_emerg_less, how='outer')
view_df(ip_emerg)

Unnamed: 0,anon_id,pat_enc_csn_id_coded,first_ip_lv_of_care,first_ip_event_type,first_ip_pat_service,last_emerg_lv_of_care,last_emerg_event_type,last_emerg_pat_service
0,JC1170548,131062572931,Intermediate Care - With Cardiac Monitor,Transfer In,General Medicine (University),,Transfer Out,Emergency
1,JC913990,131062745090,Acute Care (Assessment or intervention q4-8),Transfer In,Orthopaedic Surgery,,Transfer Out,Emergency
2,JC529112,131062927111,Critical Care,Transfer In,Critical Care,,Transfer Out,Emergency
3,JC1702404,131063006922,Intermediate Care - With Cardiac Monitor,Transfer In,General Medicine (University),,Transfer Out,Emergency
4,JC523028,131063022232,Critical Care,Transfer In,Critical Care,,Transfer Out,Emergency


In [226]:
ip_emerg_traj = ip_emerg.merge(trajectory_less, how='outer')
view_df(ip_emerg_traj)

Unnamed: 0,anon_id,pat_enc_csn_id_coded,first_ip_lv_of_care,first_ip_event_type,first_ip_pat_service,last_emerg_lv_of_care,last_emerg_event_type,last_emerg_pat_service,trajectory,trajectory_length
0,JC1170548,131062572931,Intermediate Care - With Cardiac Monitor,Transfer In,General Medicine (University),,Transfer Out,Emergency,Intermediate Care - With Cardiac Monitor,1
1,JC913990,131062745090,Acute Care (Assessment or intervention q4-8),Transfer In,Orthopaedic Surgery,,Transfer Out,Emergency,Acute Care (Assessment or intervention q4-8),1
2,JC529112,131062927111,Critical Care,Transfer In,Critical Care,,Transfer Out,Emergency,Critical Care,1
3,JC1702404,131063006922,Intermediate Care - With Cardiac Monitor,Transfer In,General Medicine (University),,Transfer Out,Emergency,Intermediate Care - With Cardiac Monitor,1
4,JC523028,131063022232,Critical Care,Transfer In,Critical Care,,Transfer Out,Emergency,Critical Care,1


In [232]:
# save this combined df
savefile = outdir + "10_combined_ip_emerg_traj.csv"
ip_emerg_traj.to_csv(savefile)

# Description of columns

- first_ip_* = labels for the first inpatient event (admit time)
- last_emerg_* = labels for the last emergency event (just before admit time)
- trajectory = CSNs movement through care levels from admit time to 24 hours after admit
- trajectory_length = number of times CSN moves around in trajectory

# Time of first level of care change

Get the time since admit for the first level of care change.

In [227]:
# these are inpatient events after the ED vist within 24 hours
# only looking at the first event for each level of care for each csn
print(change_care.pat_enc_csn_id_coded.nunique())
change_care.head()

52532


Unnamed: 0,anon_id,pat_enc_csn_id_coded,ed_visit_time,time_before_admit,hours_in_ed,effective_time_jittered_utc,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,...,admit_time,time_since_admit,prev_emerg,curr_inpatient,continued,first_ip,next_ip,curr_emerg,not_continued,last_emerg
616893,JC1170548,131062572931,2015-01-01 18:40:00+00:00,0 days 09:08:00,9.13,2015-01-02 03:48:00+00:00,5,Inpatient,1.0,6.0,...,2015-01-02 03:48:00+00:00,0 days,True,True,True,True,True,False,False,False
884841,JC913990,131062745090,2015-01-02 01:56:00+00:00,0 days 03:57:00,3.95,2015-01-02 05:53:00+00:00,9,Inpatient,1.0,5.0,...,2015-01-02 05:53:00+00:00,0 days,True,True,True,True,True,False,False,False
704699,JC529112,131062927111,2015-01-04 18:13:00+00:00,0 days 09:07:00,9.12,2015-01-05 03:20:00+00:00,7,Inpatient,1.0,8.0,...,2015-01-05 03:20:00+00:00,0 days,True,True,True,True,True,False,False,False
773477,JC1702404,131063006922,2015-01-06 09:04:00+00:00,0 days 05:43:00,5.72,2015-01-06 14:47:00+00:00,7,Inpatient,1.0,6.0,...,2015-01-06 14:47:00+00:00,0 days,True,True,True,True,True,False,False,False
128424,JC523028,131063022232,2015-01-03 14:51:00+00:00,0 days 06:33:00,6.55,2015-01-03 21:24:00+00:00,5,Inpatient,1.0,8.0,...,2015-01-03 21:24:00+00:00,0 days,True,True,True,True,True,False,False,False


In [228]:
# only keep events that are not the first inpatient event
not_first = change_care[~change_care.first_ip]
not_first.head()
print(not_first.pat_enc_csn_id_coded.nunique())

# keep the first event for each csn
first_switch = not_first.sort_values(['pat_enc_csn_id_coded', 'seq_num_in_enc']).groupby('pat_enc_csn_id_coded').head(1)
first_switch.head()

6615


Unnamed: 0,anon_id,pat_enc_csn_id_coded,ed_visit_time,time_before_admit,hours_in_ed,effective_time_jittered_utc,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,...,admit_time,time_since_admit,prev_emerg,curr_inpatient,continued,first_ip,next_ip,curr_emerg,not_continued,last_emerg
948286,JC626278,131064103797,2015-01-08 09:45:00+00:00,0 days 13:06:00,13.1,2015-01-09 11:40:00+00:00,12,Inpatient,,8.0,...,2015-01-08 22:51:00+00:00,0 days 12:49:00,False,True,True,False,True,False,False,False
449169,JC1729448,131064484335,2015-01-07 09:25:00+00:00,0 days 04:33:00,4.55,2015-01-08 07:43:00+00:00,7,Inpatient,,5.0,...,2015-01-07 13:58:00+00:00,0 days 17:45:00,False,True,True,False,True,False,False,False
832830,JC997878,131064522540,2015-01-11 19:34:00+00:00,0 days 03:07:00,3.12,2015-01-12 04:31:00+00:00,5,Inpatient,,6.0,...,2015-01-11 22:41:00+00:00,0 days 05:50:00,False,True,True,False,True,False,False,False
433780,JC1154492,131064558168,2015-01-10 12:32:00+00:00,0 days 11:47:00,11.78,2015-01-11 21:49:00+00:00,8,Inpatient,,5.0,...,2015-01-11 00:19:00+00:00,0 days 21:30:00,False,True,True,False,True,False,False,False
298796,JC1606131,131064616475,2015-01-28 00:35:00+00:00,0 days 03:54:00,3.9,2015-01-28 10:14:00+00:00,8,Inpatient,,6.0,...,2015-01-28 04:29:00+00:00,0 days 05:45:00,False,True,True,False,True,False,False,False


In [229]:
first_switch['first_loc_change_minutes_since_admit'] = first_switch.time_since_admit.dt.total_seconds()/60
cleaned_df = first_switch[['pat_enc_csn_id_coded', 'first_loc_change_minutes_since_admit']]
cleaned_df

Unnamed: 0,pat_enc_csn_id_coded,first_loc_change_minutes_since_admit
948286,131064103797,769.0
449169,131064484335,1065.0
832830,131064522540,350.0
433780,131064558168,1290.0
298796,131064616475,345.0
...,...,...
914498,131320006823,729.0
185069,131320032591,228.0
248041,131320164220,1314.0
408116,131320338365,1012.0


In [231]:
cleaned_df['first_loc_change_minutes_since_admit'].describe() # on average 10 hours, sd 6.7 hours

count    6615.000000
mean      590.969312
std       452.769544
min         0.000000
25%       149.000000
50%       532.000000
75%       995.000000
max      1440.000000
Name: first_loc_change_minutes_since_admit, dtype: float64

In [230]:
savefile = outdir + "10_first_loc_change_time.csv"
cleaned_df.to_csv(savefile, index=False)