# Error analysis
# Admission/Transfer/Discharge table with Trajectories

We want to look into the cases that have a large difference between the time0 predictions and time24 predictions.


We'll look at: the complete `1_4_cohort` here

- pat services
- pat lv of care
- patient trajectories from admission until 24hrs

Subgroups of the entire cohort will be analyzed in another notebook

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
from datetime import timedelta
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

In [2]:
# view data frame

# adjust this to hide ID columns for posting to github
forrepo=False

def view_df(df):
    hidecols = []
    if forrepo:
        hidecols=['anon_id', 'pat_enc_csn_id_coded', 'inpatient_data_id_coded'] # these are hidden columns
        
    return(df.drop(hidecols, axis=1, errors='ignore').head())

# Data

Load in the data

In [3]:
# read in data files
datadir = "../../DataTD/"
resultsdir = "../../OutputTD/3_models/1_4_cohort/"

savedir = "../../OutputTD/5_results_analysis/"

adt_file = datadir + "cohort_1_3_adt.csv"
results_file = resultsdir + "1_4_cohort_test_results.csv"
cohort_file = "../../OutputTD/1_cohort/1_4_cohort.csv"


full_adt = pd.read_csv(adt_file)
results = pd.read_csv(results_file)
cohort = pd.read_csv(cohort_file)

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [4]:
view_df(results)

cohort.pat_enc_csn_id_coded.nunique()

43980

# Bring in the ADT Table

We can use the ADT table to look at 

- pat_class
- pat_lv_of_care

In [5]:
# filter adt table down to test cohort
adt = full_adt[full_adt.pat_enc_csn_id_coded.isin(cohort.pat_enc_csn_id_coded)]

# these should match - it does!
print(adt.pat_enc_csn_id_coded.nunique())
print(adt.pat_enc_csn_id_coded.nunique())

view_df(adt)

43980
43980


Unnamed: 0,anon_id,pat_enc_csn_id_coded,effective_time_jittered_utc,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,pat_lv_of_care,event_type,pat_service
0,JCcd7ba7,131277369526,2019-10-31 10:46:00 UTC,4,Inpatient,1.0,66.0,Neonatal ICU - VC Only,Patient Update,Emergency Medicine
1,JCdc0a60,131281229519,2020-01-17 22:12:00 UTC,3,Inpatient,1.0,68.0,Newborn Nursery - VC Only,Patient Update,Emergency Medicine
2,JC2a0efc1,131280575648,2019-12-06 07:22:00 UTC,6,Inpatient,1.0,68.0,Newborn Nursery - VC Only,Patient Update,Emergency
3,JCdc5d49,131240161198,2017-10-08 06:59:00 UTC,2,Emergency Services,,,,Census,Emergency
4,JCe56297,131079845274,2015-04-02 06:59:00 UTC,2,Emergency Services,,,,Census,Emergency


In [6]:
# add the admit time column from the prediction results csv to the adt csv
adt_admit = adt.merge(cohort[['pat_enc_csn_id_coded', 'admit_time']], how='left', on=['pat_enc_csn_id_coded'])


adt_admit.columns
view_df(adt_admit[['pat_enc_csn_id_coded', 'effective_time_jittered_utc', 
                 'pat_class', 'admit_time']])

Unnamed: 0,pat_enc_csn_id_coded,effective_time_jittered_utc,pat_class,admit_time
0,131277369526,2019-10-31 10:46:00 UTC,Inpatient,2019-10-31 10:46:00+00:00
1,131281229519,2020-01-17 22:12:00 UTC,Inpatient,2020-01-17 22:12:00+00:00
2,131280575648,2019-12-06 07:22:00 UTC,Inpatient,2019-12-06 07:22:00+00:00
3,131240161198,2017-10-08 06:59:00 UTC,Emergency Services,2017-10-08 07:59:00+00:00
4,131079845274,2015-04-02 06:59:00 UTC,Emergency Services,2015-04-02 11:07:00+00:00


In [7]:
# mark the events with some labels that make things easier later
adt_admit = adt_admit.sort_values(['pat_enc_csn_id_coded', 'seq_num_in_enc'])

# **assumes dataframe is sorted by time

# mark whether previous entry had pat class Emergency Services
adt_admit['prev_emerg'] = adt_admit.pat_class.shift() == 'Emergency Services'
# mark whether current event has pat class Inpatient
adt_admit['curr_inpatient'] = adt_admit.pat_class == 'Inpatient'
# mark whether current event is continued csn as previous
adt_admit['continued'] = adt_admit.pat_enc_csn_id_coded.eq(
    adt_admit.pat_enc_csn_id_coded.shift())

# find our cases by taking the AND of these columns
cols = ['prev_emerg', 'curr_inpatient', 'continued']
adt_admit['first_ip'] = adt_admit[cols].all(axis=1)

view_df(adt_admit)

Unnamed: 0,anon_id,pat_enc_csn_id_coded,effective_time_jittered_utc,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,pat_lv_of_care,event_type,pat_service,admit_time,prev_emerg,curr_inpatient,continued,first_ip
19178,JCe78a06,131062667066,2015-01-01 17:10:00 UTC,1,Emergency Services,3.0,,,Admission,Emergency,2015-01-02 01:01:00+00:00,False,False,False,False
19438,JCe78a06,131062667066,2015-01-02 00:32:00 UTC,2,Emergency Services,,,,Transfer Out,Emergency,2015-01-02 01:01:00+00:00,True,False,True,False
19096,JCe78a06,131062667066,2015-01-02 00:32:00 UTC,3,Emergency Services,,,,Transfer In,Emergency,2015-01-02 01:01:00+00:00,True,False,True,False
19434,JCe78a06,131062667066,2015-01-02 00:49:00 UTC,4,Emergency Services,,,,Transfer Out,Emergency,2015-01-02 01:01:00+00:00,True,False,True,False
19100,JCe78a06,131062667066,2015-01-02 00:49:00 UTC,5,Emergency Services,,,,Transfer In,Emergency,2015-01-02 01:01:00+00:00,True,False,True,False


# Look at first inpatient pat services

In [8]:
first_ips = adt_admit[adt_admit.first_ip == True]

print(first_ips.shape)
first_ips.pat_enc_csn_id_coded.nunique()

(44021, 15)


43980

In [9]:
# find first inpatient pat service for each csn
first_ip_pat_service = first_ips[['pat_enc_csn_id_coded', 'pat_service']].drop_duplicates()

print(first_ip_pat_service.shape)

(43989, 2)


# multiple emergency-->inpatient changes

Some CSNs have multiple emergency-->inpatient changes. This means they went from emergency-->inpatient-->emergency-->inpatient.

Take a look at some of these. The first change from emergency-->inpatient would be the admit time. Keep this occurrence. This only happen for 10 CSNs.

In [10]:
counts = first_ip_pat_service.groupby('pat_enc_csn_id_coded').count().sort_values('pat_service')

multi_csns = counts[counts.pat_service > 1]

print(multi_csns.shape)
multi_csns

(9, 1)


Unnamed: 0_level_0,pat_service
pat_enc_csn_id_coded,Unnamed: 1_level_1
131176788268,2
131171253065,2
131238854978,2
131244958373,2
131230527582,2
131201950621,2
131189499371,2
131230902129,2
131164129413,2


In [11]:
adt_admit[adt_admit.pat_enc_csn_id_coded == 131238854978].sort_values('seq_num_in_enc')[
    ['anon_id', 'pat_enc_csn_id_coded', 'effective_time_jittered_utc', 'admit_time',
       'seq_num_in_enc', 'pat_class', 'base_pat_class_c', 'pat_lvl_of_care_c',
       'pat_lv_of_care', 'event_type', 'pat_service'
       ]]

Unnamed: 0,anon_id,pat_enc_csn_id_coded,effective_time_jittered_utc,admit_time,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,pat_lv_of_care,event_type,pat_service
4519,JCdb9b61,131238854978,2017-10-02 05:25:00 UTC,2017-10-02 07:56:00+00:00,1,Emergency Services,3.0,,,Admission,Emergency
5006,JCdb9b61,131238854978,2017-10-02 06:59:00 UTC,2017-10-02 07:56:00+00:00,2,Emergency Services,,,,Census,Emergency
4581,JCdb9b61,131238854978,2017-10-02 07:45:00 UTC,2017-10-02 07:56:00+00:00,3,Emergency Services,,,,Transfer Out,Emergency
5027,JCdb9b61,131238854978,2017-10-02 07:45:00 UTC,2017-10-02 07:56:00+00:00,4,Emergency Services,,,,Transfer In,Emergency
307621,JCdb9b61,131238854978,2017-10-02 07:56:00 UTC,2017-10-02 07:56:00+00:00,5,Inpatient,1.0,5.0,Acute Care (Assessment or intervention q4-8),Patient Update,Emergency
305993,JCdb9b61,131238854978,2017-10-02 07:57:00 UTC,2017-10-02 07:56:00+00:00,6,Inpatient,,5.0,Acute Care (Assessment or intervention q4-8),Transfer Out,Emergency
310885,JCdb9b61,131238854978,2017-10-02 07:57:00 UTC,2017-10-02 07:56:00+00:00,7,Emergency Services,,5.0,Acute Care (Assessment or intervention q4-8),Transfer In,Emergency
311000,JCdb9b61,131238854978,2017-10-02 11:33:00 UTC,2017-10-02 07:56:00+00:00,8,Emergency Services,,5.0,Acute Care (Assessment or intervention q4-8),Transfer Out,Emergency
307383,JCdb9b61,131238854978,2017-10-02 11:33:00 UTC,2017-10-02 07:56:00+00:00,9,Inpatient,,5.0,Acute Care (Assessment or intervention q4-8),Transfer In,General Surgery
305544,JCdb9b61,131238854978,2017-10-03 01:29:00 UTC,2017-10-02 07:56:00+00:00,10,Inpatient,,5.0,Acute Care (Assessment or intervention q4-8),Transfer Out,General Surgery


In [12]:
# keep the first occurrence of inpatient event
first_ips_keep = first_ips.sort_values(['pat_enc_csn_id_coded', 'seq_num_in_enc']).groupby('pat_enc_csn_id_coded').first().reset_index()

# now we have one event per person
print(first_ips_keep.pat_enc_csn_id_coded.nunique())
print(first_ips_keep.shape)

43980
(43980, 15)


In [13]:
view_df(first_ips_keep)

Unnamed: 0,pat_enc_csn_id_coded,anon_id,effective_time_jittered_utc,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,pat_lv_of_care,event_type,pat_service,admit_time,prev_emerg,curr_inpatient,continued,first_ip
0,131062667066,JCe78a06,2015-01-02 01:01:00 UTC,7,Inpatient,1.0,6.0,Intermediate Care - With Cardiac Monitor,Transfer In,General Medicine (PAMF),2015-01-02 01:01:00+00:00,True,True,True,True
1,131062745090,JCd1c19e,2015-01-03 05:53:00 UTC,9,Inpatient,1.0,5.0,Acute Care (Assessment or intervention q4-8),Transfer In,Orthopaedic Surgery,2015-01-03 05:53:00+00:00,True,True,True,True
2,131062747648,JCd91eb2,2015-01-01 08:24:00 UTC,8,Inpatient,1.0,6.0,Intermediate Care - With Cardiac Monitor,Transfer In,Cardiology,2015-01-01 08:24:00+00:00,True,True,True,True
3,131062788358,JCe7cb4d,2015-01-01 23:39:00 UTC,3,Inpatient,1.0,5.0,Acute Care (Assessment or intervention q4-8),Transfer In,Psychiatry,2015-01-01 23:39:00+00:00,True,True,True,True
4,131063044001,JCe293de,2015-01-05 02:23:00 UTC,7,Inpatient,1.0,5.0,Acute Care (Assessment or intervention q4-8),Transfer In,Neurology,2015-01-05 02:23:00+00:00,True,True,True,True


In [14]:
first_ips_keep.columns

first_ips_save = first_ips_keep[['pat_enc_csn_id_coded', 'anon_id', 'admit_time',
                                 'effective_time_jittered_utc',
       'seq_num_in_enc', 'pat_class', 'base_pat_class_c', 'pat_lvl_of_care_c',
       'pat_lv_of_care', 'event_type', 'pat_service', ]]

view_df(first_ips_save)

Unnamed: 0,pat_enc_csn_id_coded,anon_id,admit_time,effective_time_jittered_utc,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,pat_lv_of_care,event_type,pat_service
0,131062667066,JCe78a06,2015-01-02 01:01:00+00:00,2015-01-02 01:01:00 UTC,7,Inpatient,1.0,6.0,Intermediate Care - With Cardiac Monitor,Transfer In,General Medicine (PAMF)
1,131062745090,JCd1c19e,2015-01-03 05:53:00+00:00,2015-01-03 05:53:00 UTC,9,Inpatient,1.0,5.0,Acute Care (Assessment or intervention q4-8),Transfer In,Orthopaedic Surgery
2,131062747648,JCd91eb2,2015-01-01 08:24:00+00:00,2015-01-01 08:24:00 UTC,8,Inpatient,1.0,6.0,Intermediate Care - With Cardiac Monitor,Transfer In,Cardiology
3,131062788358,JCe7cb4d,2015-01-01 23:39:00+00:00,2015-01-01 23:39:00 UTC,3,Inpatient,1.0,5.0,Acute Care (Assessment or intervention q4-8),Transfer In,Psychiatry
4,131063044001,JCe293de,2015-01-05 02:23:00+00:00,2015-01-05 02:23:00 UTC,7,Inpatient,1.0,5.0,Acute Care (Assessment or intervention q4-8),Transfer In,Neurology


In [15]:
# save this file

savefile = savedir + "02_first_inpatient_event.csv"
first_ips_save.to_csv(savefile)

## Look at last Emergeny Services Pat Service

In [16]:
# mark the events with some labels that make things easier later

# **assumes dataframe is sorted by time

# mark whether next entry is inpatient
adt_admit['next_ip'] = adt_admit.pat_class.shift(-1) == 'Inpatient'
# mark whether current event has pat class Inpatient
adt_admit['curr_emerg'] = adt_admit.pat_class == 'Emergency Services'
# mark whether current event is continued csn as previous
adt_admit['not_continued'] = ~adt_admit.pat_enc_csn_id_coded.eq(
    adt_admit.pat_enc_csn_id_coded.shift(-1))

# find our cases by taking the AND of these columns
cols = ['next_ip', 'curr_emerg', 'continued']
adt_admit['last_emerg'] = adt_admit.curr_emerg & (adt_admit.next_ip | adt_admit.not_continued) 

adt_admit[['pat_enc_csn_id_coded', 'pat_class', 'last_emerg'] + cols].head(20)

Unnamed: 0,pat_enc_csn_id_coded,pat_class,last_emerg,next_ip,curr_emerg,continued
19178,131062667066,Emergency Services,False,False,True,False
19438,131062667066,Emergency Services,False,False,True,True
19096,131062667066,Emergency Services,False,False,True,True
19434,131062667066,Emergency Services,False,False,True,True
19100,131062667066,Emergency Services,False,False,True,True
18804,131062667066,Emergency Services,True,True,True,True
217107,131062667066,Inpatient,False,True,False,True
217366,131062667066,Inpatient,False,True,False,True
217842,131062667066,Inpatient,False,True,False,True
217409,131062667066,Inpatient,False,True,False,True


In [17]:
last_emergs = adt_admit[adt_admit.last_emerg == True]

# take the first one for each csn
last_emergs_keep = last_emergs.sort_values(['pat_enc_csn_id_coded', 'seq_num_in_enc']).groupby('pat_enc_csn_id_coded').first().reset_index()

# now we have one event per person
print(last_emergs_keep.pat_enc_csn_id_coded.nunique())
print(last_emergs_keep.shape)

view_df(last_emergs)

43980
(43980, 19)


Unnamed: 0,anon_id,pat_enc_csn_id_coded,effective_time_jittered_utc,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,pat_lv_of_care,event_type,pat_service,admit_time,prev_emerg,curr_inpatient,continued,first_ip,next_ip,curr_emerg,not_continued,last_emerg
18804,JCe78a06,131062667066,2015-01-02 01:01:00 UTC,6,Emergency Services,,,,Transfer Out,Emergency,2015-01-02 01:01:00+00:00,True,False,True,False,True,True,False,True
1208,JCd1c19e,131062745090,2015-01-03 05:53:00 UTC,8,Emergency Services,,,,Transfer Out,Emergency,2015-01-03 05:53:00+00:00,True,False,True,False,True,True,False,True
77067,JCd91eb2,131062747648,2015-01-01 08:24:00 UTC,7,Emergency Services,,,,Transfer Out,Emergency,2015-01-01 08:24:00+00:00,True,False,True,False,True,True,False,True
5398,JCe7cb4d,131062788358,2015-01-01 23:39:00 UTC,2,Emergency Services,,,,Transfer Out,Emergency,2015-01-01 23:39:00+00:00,True,False,True,False,True,True,False,True
109842,JCe293de,131063044001,2015-01-05 02:23:00 UTC,6,Emergency Services,,,,Transfer Out,Emergency,2015-01-05 02:23:00+00:00,True,False,True,False,True,True,False,True


In [18]:
last_emergs_keep.columns

last_emerg_save = last_emergs_keep[['pat_enc_csn_id_coded', 'anon_id', 'admit_time',
                                 'effective_time_jittered_utc',
       'seq_num_in_enc', 'pat_class', 'base_pat_class_c', 'pat_lvl_of_care_c',
       'pat_lv_of_care', 'event_type', 'pat_service', ]]

view_df(last_emerg_save)

Unnamed: 0,pat_enc_csn_id_coded,anon_id,admit_time,effective_time_jittered_utc,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,pat_lv_of_care,event_type,pat_service
0,131062667066,JCe78a06,2015-01-02 01:01:00+00:00,2015-01-02 01:01:00 UTC,6,Emergency Services,,,,Transfer Out,Emergency
1,131062745090,JCd1c19e,2015-01-03 05:53:00+00:00,2015-01-03 05:53:00 UTC,8,Emergency Services,,,,Transfer Out,Emergency
2,131062747648,JCd91eb2,2015-01-01 08:24:00+00:00,2015-01-01 08:24:00 UTC,7,Emergency Services,,,,Transfer Out,Emergency
3,131062788358,JCe7cb4d,2015-01-01 23:39:00+00:00,2015-01-01 23:39:00 UTC,2,Emergency Services,,,,Transfer Out,Emergency
4,131063044001,JCe293de,2015-01-05 02:23:00+00:00,2015-01-05 02:23:00 UTC,6,Emergency Services,,,,Transfer Out,Emergency


In [19]:
# save this file

savefile = savedir + "02_last_emerg_event.csv"
last_emerg_save.to_csv(savefile)

# Pat lv of care

Look at pat lv of care when patients are admitted

In [20]:
adt_admit.columns

Index(['anon_id', 'pat_enc_csn_id_coded', 'effective_time_jittered_utc',
       'seq_num_in_enc', 'pat_class', 'base_pat_class_c', 'pat_lvl_of_care_c',
       'pat_lv_of_care', 'event_type', 'pat_service', 'admit_time',
       'prev_emerg', 'curr_inpatient', 'continued', 'first_ip', 'next_ip',
       'curr_emerg', 'not_continued', 'last_emerg'],
      dtype='object')

In [21]:
# change admit time and effective time to datetime - these take a littl while to run!!

# change the effective time to datetime since read in from csv
adt_admit.effective_time_jittered_utc = pd.to_datetime(adt_admit.effective_time_jittered_utc)

# change admit time
adt_admit.admit_time = pd.to_datetime(adt_admit.admit_time, utc=True)

In [22]:
# filter down to 24hrs after admit
adt_admit['time_since_admit'] = adt_admit.effective_time_jittered_utc - adt_admit.admit_time

adt_results_24hr = adt_admit[(adt_admit.time_since_admit <= timedelta(hours=24)) &
                             (adt_admit.time_since_admit >= timedelta(hours=0))
                            ]

view_df(adt_results_24hr)

Unnamed: 0,anon_id,pat_enc_csn_id_coded,effective_time_jittered_utc,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,pat_lv_of_care,event_type,pat_service,admit_time,prev_emerg,curr_inpatient,continued,first_ip,next_ip,curr_emerg,not_continued,last_emerg,time_since_admit
18804,JCe78a06,131062667066,2015-01-02 01:01:00+00:00,6,Emergency Services,,,,Transfer Out,Emergency,2015-01-02 01:01:00+00:00,True,False,True,False,True,True,False,True,0 days 00:00:00
217107,JCe78a06,131062667066,2015-01-02 01:01:00+00:00,7,Inpatient,1.0,6.0,Intermediate Care - With Cardiac Monitor,Transfer In,General Medicine (PAMF),2015-01-02 01:01:00+00:00,True,True,True,True,True,False,False,False,0 days 00:00:00
217366,JCe78a06,131062667066,2015-01-02 07:59:00+00:00,8,Inpatient,,6.0,Intermediate Care - With Cardiac Monitor,Census,General Medicine (PAMF),2015-01-02 01:01:00+00:00,False,True,True,False,True,False,False,False,0 days 06:58:00
1208,JCd1c19e,131062745090,2015-01-03 05:53:00+00:00,8,Emergency Services,,,,Transfer Out,Emergency,2015-01-03 05:53:00+00:00,True,False,True,False,True,True,False,True,0 days 00:00:00
294639,JCd1c19e,131062745090,2015-01-03 05:53:00+00:00,9,Inpatient,1.0,5.0,Acute Care (Assessment or intervention q4-8),Transfer In,Orthopaedic Surgery,2015-01-03 05:53:00+00:00,True,True,True,True,True,False,False,False,0 days 00:00:00


In [23]:
adt_admit[adt_admit.pat_enc_csn_id_coded == 131283158395][['pat_enc_csn_id_coded', 'effective_time_jittered_utc',
                                                          'seq_num_in_enc', 'pat_class', 'pat_lv_of_care',
                                                          'event_type', 'pat_service', 'admit_time',
                                                          'time_since_admit']]

Unnamed: 0,pat_enc_csn_id_coded,effective_time_jittered_utc,seq_num_in_enc,pat_class,pat_lv_of_care,event_type,pat_service,admit_time,time_since_admit
68230,131283158395,2020-01-30 11:27:00+00:00,1,Emergency Services,,Admission,Emergency,2020-01-30 15:35:00+00:00,-1 days +19:52:00
68414,131283158395,2020-01-30 11:35:00+00:00,2,Emergency Services,,Transfer Out,Emergency,2020-01-30 15:35:00+00:00,-1 days +20:00:00
68431,131283158395,2020-01-30 11:35:00+00:00,3,Emergency Services,,Transfer In,Emergency,2020-01-30 15:35:00+00:00,-1 days +20:00:00
67889,131283158395,2020-01-30 12:46:00+00:00,4,Emergency Services,,Patient Update,Emergency Medicine,2020-01-30 15:35:00+00:00,-1 days +21:11:00
551224,131283158395,2020-01-30 15:35:00+00:00,5,Inpatient,Acute Care (Assessment or intervention q4-8),Patient Update,Emergency Medicine,2020-01-30 15:35:00+00:00,0 days 00:00:00
553570,131283158395,2020-01-30 17:32:00+00:00,6,Inpatient,Acute Care (Assessment or intervention q4-8),Transfer Out,Emergency Medicine,2020-01-30 15:35:00+00:00,0 days 01:57:00
552451,131283158395,2020-01-30 17:32:00+00:00,7,Inpatient,Acute Care (Assessment or intervention q4-8),Transfer In,Emergency Medicine,2020-01-30 15:35:00+00:00,0 days 01:57:00
554591,131283158395,2020-01-30 17:39:00+00:00,8,Inpatient,Acute Care (Assessment or intervention q4-8),Transfer Out,Emergency Medicine,2020-01-30 15:35:00+00:00,0 days 02:04:00
551260,131283158395,2020-01-30 17:39:00+00:00,9,Inpatient,Acute Care (Assessment or intervention q4-8),Transfer In,Medicine,2020-01-30 15:35:00+00:00,0 days 02:04:00
552625,131283158395,2020-01-31 07:59:00+00:00,10,Inpatient,Acute Care (Assessment or intervention q4-8),Census,Medicine,2020-01-30 15:35:00+00:00,0 days 16:24:00


# Adding some additional columns for length of stay in ED

In [24]:
# add a couple more columns

# get time of first emergency services event
first_ED = adt_admit[(adt_admit.pat_class == 'Emergency Services')].sort_values(['pat_enc_csn_id_coded', 'seq_num_in_enc']).groupby('pat_enc_csn_id_coded').first().reset_index()

print(adt_admit.pat_enc_csn_id_coded.nunique())
print(first_ED.pat_enc_csn_id_coded.nunique())

set(adt_admit.pat_enc_csn_id_coded) - set(first_ED.pat_enc_csn_id_coded)

print("\n", first_ED.pat_service.value_counts())
print("\n", first_ED.event_type.value_counts())

first_ED['first_ED_time'] = first_ED.effective_time_jittered_utc

first_ED_time = first_ED[['pat_enc_csn_id_coded', 'first_ED_time']].drop_duplicates()
first_ED_time.head()

adt_ed = adt_admit.merge(first_ED_time, how='left')
view_df(adt_ed)

43980
43980

 Emergency                        43971
Emergency Medicine                   2
Neurosurgery                         2
General Medicine (PAMF)              1
Gynecology                           1
Hepatology                           1
Psychiatry                           1
General Medicine (University)        1
Name: pat_service, dtype: int64

 Admission         43979
Patient Update        1
Name: event_type, dtype: int64


Unnamed: 0,anon_id,pat_enc_csn_id_coded,effective_time_jittered_utc,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,pat_lv_of_care,event_type,pat_service,...,prev_emerg,curr_inpatient,continued,first_ip,next_ip,curr_emerg,not_continued,last_emerg,time_since_admit,first_ED_time
0,JCe78a06,131062667066,2015-01-01 17:10:00+00:00,1,Emergency Services,3.0,,,Admission,Emergency,...,False,False,False,False,False,True,False,False,-1 days +16:09:00,2015-01-01 17:10:00+00:00
1,JCe78a06,131062667066,2015-01-02 00:32:00+00:00,2,Emergency Services,,,,Transfer Out,Emergency,...,True,False,True,False,False,True,False,False,-1 days +23:31:00,2015-01-01 17:10:00+00:00
2,JCe78a06,131062667066,2015-01-02 00:32:00+00:00,3,Emergency Services,,,,Transfer In,Emergency,...,True,False,True,False,False,True,False,False,-1 days +23:31:00,2015-01-01 17:10:00+00:00
3,JCe78a06,131062667066,2015-01-02 00:49:00+00:00,4,Emergency Services,,,,Transfer Out,Emergency,...,True,False,True,False,False,True,False,False,-1 days +23:48:00,2015-01-01 17:10:00+00:00
4,JCe78a06,131062667066,2015-01-02 00:49:00+00:00,5,Emergency Services,,,,Transfer In,Emergency,...,True,False,True,False,False,True,False,False,-1 days +23:48:00,2015-01-01 17:10:00+00:00


In [25]:
# add length from ED entry until discharge

# get time of discharge
# last_event = adt_ed.sort_values(['pat_enc_csn_id_coded', 'seq_num_in_enc']).groupby('pat_enc_csn_id_coded').last().reset_index()

# print("\n", last_event.pat_service.value_counts())
# print("\n", last_event.event_type.value_counts())

# last_event[last_event.event_type == 'Census']

# let's use the actual discharge event instead
discharge_event = adt_ed[adt_ed.event_type == 'Discharge']

# make sure we keep all csns
print(adt_admit.pat_enc_csn_id_coded.nunique())
print(discharge_event.pat_enc_csn_id_coded.nunique())

print("\n", discharge_event.pat_service.value_counts())
print("\n", discharge_event.event_type.value_counts())

# not everyone has a Discharge event
missing_discharge = set(adt_admit.pat_enc_csn_id_coded) - set(discharge_event.pat_enc_csn_id_coded)
len(missing_discharge)

missing_discharge

discharge_event['discharge_time'] = discharge_event.effective_time_jittered_utc
discharge_event['length_of_ip_since_admit'] = discharge_event.time_since_admit
discharge_event['length_from_ED_entry_until_discharge'] = discharge_event.discharge_time - discharge_event.first_ED_time

discharge_event.columns


43980
43765

 Medicine                         7838
General Medicine (University)    7463
General Surgery                  3262
General Medicine (PAMF)          2938
Cardiology                       2920
                                 ... 
Pediatric Surgery                   1
Hospice                             1
Treatment/Procedure                 1
Occupational Therapy                1
Bronchoscopy                        1
Name: pat_service, Length: 75, dtype: int64

 Discharge    43765
Name: event_type, dtype: int64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  discharge_event['discharge_time'] = discharge_event.effective_time_jittered_utc
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  discharge_event['length_of_ip_since_admit'] = discharge_event.time_since_admit
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  discharge_event['length_from_ED_entry_until_di

Index(['anon_id', 'pat_enc_csn_id_coded', 'effective_time_jittered_utc',
       'seq_num_in_enc', 'pat_class', 'base_pat_class_c', 'pat_lvl_of_care_c',
       'pat_lv_of_care', 'event_type', 'pat_service', 'admit_time',
       'prev_emerg', 'curr_inpatient', 'continued', 'first_ip', 'next_ip',
       'curr_emerg', 'not_continued', 'last_emerg', 'time_since_admit',
       'first_ED_time', 'discharge_time', 'length_of_ip_since_admit',
       'length_from_ED_entry_until_discharge'],
      dtype='object')

In [26]:
# add the csns that were missing a discharge event
cns_cols = adt_ed[['anon_id', 'pat_enc_csn_id_coded', 'admit_time',
       'first_ED_time']].drop_duplicates()

all_csns = cns_cols.merge(discharge_event, how='left')
view_df(all_csns)

Unnamed: 0,anon_id,pat_enc_csn_id_coded,admit_time,first_ED_time,effective_time_jittered_utc,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,pat_lv_of_care,...,continued,first_ip,next_ip,curr_emerg,not_continued,last_emerg,time_since_admit,discharge_time,length_of_ip_since_admit,length_from_ED_entry_until_discharge
0,JCe78a06,131062667066,2015-01-02 01:01:00+00:00,2015-01-01 17:10:00+00:00,2015-01-11 23:40:00+00:00,20.0,Inpatient,,6.0,Intermediate Care - With Cardiac Monitor,...,True,False,False,False,True,False,9 days 22:39:00,2015-01-11 23:40:00+00:00,9 days 22:39:00,10 days 06:30:00
1,JCd1c19e,131062745090,2015-01-03 05:53:00+00:00,2015-01-03 01:56:00+00:00,2015-01-07 02:00:00+00:00,25.0,Inpatient,,5.0,Acute Care (Assessment or intervention q4-8),...,True,False,False,False,True,False,3 days 20:07:00,2015-01-07 02:00:00+00:00,3 days 20:07:00,4 days 00:04:00
2,JCd91eb2,131062747648,2015-01-01 08:24:00+00:00,2015-01-01 01:10:00+00:00,2015-01-06 03:19:00+00:00,15.0,Inpatient,,6.0,Intermediate Care - With Cardiac Monitor,...,True,False,False,False,True,False,4 days 18:55:00,2015-01-06 03:19:00+00:00,4 days 18:55:00,5 days 02:09:00
3,JCe7cb4d,131062788358,2015-01-01 23:39:00+00:00,2015-01-01 17:07:00+00:00,2015-01-22 01:25:00+00:00,39.0,Inpatient,,5.0,Acute Care (Assessment or intervention q4-8),...,True,False,False,False,True,False,20 days 01:46:00,2015-01-22 01:25:00+00:00,20 days 01:46:00,20 days 08:18:00
4,JCe293de,131063044001,2015-01-05 02:23:00+00:00,2015-01-04 18:13:00+00:00,2015-01-05 22:30:00+00:00,9.0,Inpatient,,5.0,Acute Care (Assessment or intervention q4-8),...,True,False,False,False,True,False,0 days 20:07:00,2015-01-05 22:30:00+00:00,0 days 20:07:00,1 days 04:17:00


In [27]:
all_csns.pat_enc_csn_id_coded.nunique()

43980

In [28]:
# keep just columns that we care about
length_of_stay_labels = all_csns[[
    'anon_id', 'pat_enc_csn_id_coded', 'admit_time',
       'first_ED_time', 'discharge_time', 'length_of_ip_since_admit',
       'length_from_ED_entry_until_discharge']].drop_duplicates()
view_df(length_of_stay_labels)

Unnamed: 0,anon_id,pat_enc_csn_id_coded,admit_time,first_ED_time,discharge_time,length_of_ip_since_admit,length_from_ED_entry_until_discharge
0,JCe78a06,131062667066,2015-01-02 01:01:00+00:00,2015-01-01 17:10:00+00:00,2015-01-11 23:40:00+00:00,9 days 22:39:00,10 days 06:30:00
1,JCd1c19e,131062745090,2015-01-03 05:53:00+00:00,2015-01-03 01:56:00+00:00,2015-01-07 02:00:00+00:00,3 days 20:07:00,4 days 00:04:00
2,JCd91eb2,131062747648,2015-01-01 08:24:00+00:00,2015-01-01 01:10:00+00:00,2015-01-06 03:19:00+00:00,4 days 18:55:00,5 days 02:09:00
3,JCe7cb4d,131062788358,2015-01-01 23:39:00+00:00,2015-01-01 17:07:00+00:00,2015-01-22 01:25:00+00:00,20 days 01:46:00,20 days 08:18:00
4,JCe293de,131063044001,2015-01-05 02:23:00+00:00,2015-01-04 18:13:00+00:00,2015-01-05 22:30:00+00:00,0 days 20:07:00,1 days 04:17:00


In [56]:
# add death during same visit column

# join the death dates, NaN means they didn't die
deaths19 = pd.read_csv("{}/cohort_1_3_demo_deaths19.csv".format(datadir))
deaths19.head()

length_of_stay_death = length_of_stay_labels.merge(deaths19, how='left')
length_of_stay_death.death_date_jittered = pd.to_datetime(length_of_stay_death.death_date_jittered, utc=True)
length_of_stay_death['death_datetime'] = length_of_stay_death.death_date_jittered
length_of_stay_death.death_date_jittered = length_of_stay_death.death_date_jittered.dt.date
length_of_stay_death.head()

# find events where death date occur before discharge time
length_of_stay_death['discharge_date'] = length_of_stay_death.discharge_time.dt.date
died_during_stay = length_of_stay_death[length_of_stay_death.death_date_jittered < length_of_stay_death.discharge_date]
print(died_during_stay.pat_enc_csn_id_coded.nunique())
died_during_stay.head()

died_during_stay['died_before_discharge'] = True
died_during_stay.head()

# if died before dishcarge, make end date == death date
died_during_stay['end_date'] = died_during_stay.death_date_jittered


full_labels = length_of_stay_death.merge(died_during_stay, how='left')


120


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  died_during_stay['died_before_discharge'] = True
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  died_during_stay['end_date'] = died_during_stay.death_date_jittered


In [59]:
view_df(full_labels)
# view_df(full_labels[full_labels.died_before_discharge == True])

Unnamed: 0,anon_id,pat_enc_csn_id_coded,admit_time,first_ED_time,discharge_time,length_of_ip_since_admit,length_from_ED_entry_until_discharge,death_date_jittered,death_datetime,discharge_date,died_before_discharge,end_date
0,JCe78a06,131062667066,2015-01-02 01:01:00+00:00,2015-01-01 17:10:00+00:00,2015-01-11 23:40:00+00:00,9 days 22:39:00,10 days 06:30:00,2020-01-10,2020-01-10 00:00:00+00:00,2015-01-11,,
1,JCd1c19e,131062745090,2015-01-03 05:53:00+00:00,2015-01-03 01:56:00+00:00,2015-01-07 02:00:00+00:00,3 days 20:07:00,4 days 00:04:00,NaT,NaT,2015-01-07,,
2,JCd91eb2,131062747648,2015-01-01 08:24:00+00:00,2015-01-01 01:10:00+00:00,2015-01-06 03:19:00+00:00,4 days 18:55:00,5 days 02:09:00,2019-01-01,2019-01-01 00:00:00+00:00,2015-01-06,,
3,JCe7cb4d,131062788358,2015-01-01 23:39:00+00:00,2015-01-01 17:07:00+00:00,2015-01-22 01:25:00+00:00,20 days 01:46:00,20 days 08:18:00,NaT,NaT,2015-01-22,,
4,JCe293de,131063044001,2015-01-05 02:23:00+00:00,2015-01-04 18:13:00+00:00,2015-01-05 22:30:00+00:00,0 days 20:07:00,1 days 04:17:00,NaT,NaT,2015-01-05,,


In [62]:
def set_end_date(row):
    # if discharge == NULL, death == NULL, then end_date = NULL
    if (pd.isnull(row.death_date_jittered) & pd.isnull(row.discharge_date)):
        return(None)
    # if death == NULL, then end_date = discharge date
    if (pd.isnull(row.death_date_jittered) & ~pd.isnull(row.discharge_date)):
        return(row.discharge_date)
    # if discharge == NULL, then end_date = death_date
    if (~pd.isnull(row.death_date_jittered) & pd.isnull(row.discharge_date)):
        return(row.death_date_jittered)
    # if discharge and death dates exist, take earlier
    if (~pd.isnull(row.death_date_jittered) & ~pd.isnull(row.discharge_date)):
        if (pd.isnull(row.died_before_discharge)):
            return(row.discharge_date)
        else:
            return(row.death_date_jittered)
    # should never reach here
    return(None)
    
    
    

full_labels['end_date'] = full_labels.apply(lambda row: set_end_date(row), axis=1)

In [72]:
# check each case
# view_df(full_labels[pd.isnull(full_labels.death_date_jittered) & pd.isnull(full_labels.discharge_date)])

# view_df(full_labels[pd.isnull(full_labels.death_date_jittered) & (~pd.isnull(full_labels.discharge_date))])

# view_df(full_labels[~pd.isnull(full_labels.death_date_jittered) & pd.isnull(full_labels.discharge_date)])

# view_df(full_labels[~pd.isnull(full_labels.death_date_jittered) & 
#         ~pd.isnull(full_labels.discharge_date) & 
#         pd.isnull(full_labels.died_before_discharge)])

view_df(full_labels[~pd.isnull(full_labels.death_date_jittered) & 
        ~pd.isnull(full_labels.discharge_date) & 
        ~pd.isnull(full_labels.died_before_discharge)])

# each case looks good, so continue

Unnamed: 0,anon_id,pat_enc_csn_id_coded,admit_time,first_ED_time,discharge_time,length_of_ip_since_admit,length_from_ED_entry_until_discharge,death_date_jittered,death_datetime,discharge_date,died_before_discharge,end_date
31284,JCea29f5,131261336517,2019-01-12 23:16:00+00:00,2019-01-12 20:09:00+00:00,2019-01-15 05:20:00+00:00,2 days 06:04:00,2 days 09:11:00,2019-01-14,2019-01-14 00:00:00+00:00,2019-01-15,True,2019-01-14
31307,JCdce698,131261368453,2018-12-24 17:53:00+00:00,2018-12-24 15:49:00+00:00,2019-01-06 03:27:00+00:00,12 days 09:34:00,12 days 11:38:00,2019-01-05,2019-01-05 00:00:00+00:00,2019-01-06,True,2019-01-05
31315,JCd1ad47,131261390042,2019-01-13 22:34:00+00:00,2019-01-13 21:05:00+00:00,2019-02-04 05:59:00+00:00,21 days 07:25:00,21 days 08:54:00,2019-02-03,2019-02-03 00:00:00+00:00,2019-02-04,True,2019-02-03
31434,JCcd32e7,131261491577,2019-01-07 08:42:00+00:00,2019-01-07 03:57:00+00:00,2019-01-15 07:03:00+00:00,7 days 22:21:00,8 days 03:06:00,2019-01-14,2019-01-14 00:00:00+00:00,2019-01-15,True,2019-01-14
31492,JCe882a5,131261583084,2019-01-15 05:10:00+00:00,2019-01-15 01:03:00+00:00,2019-01-22 00:30:00+00:00,6 days 19:20:00,6 days 23:27:00,2019-01-21,2019-01-21 00:00:00+00:00,2019-01-22,True,2019-01-21


In [76]:
# create difference columns with dates instead

full_labels['length_of_ip_since_admit'] = full_labels.end_date - full_labels.admit_time.dt.date
full_labels['length_from_ED_entry_until_end_date'] = full_labels.end_date - full_labels.first_ED_time.dt.date

print(full_labels.columns)
full_labels

Index(['anon_id', 'pat_enc_csn_id_coded', 'admit_time', 'first_ED_time',
       'discharge_time', 'length_of_ip_since_admit',
       'length_from_ED_entry_until_discharge', 'death_date_jittered',
       'death_datetime', 'discharge_date', 'died_before_discharge', 'end_date',
       'length_from_ED_entry_until_end_date'],
      dtype='object')


Unnamed: 0,anon_id,pat_enc_csn_id_coded,admit_time,first_ED_time,discharge_time,length_of_ip_since_admit,length_from_ED_entry_until_discharge,death_date_jittered,death_datetime,discharge_date,died_before_discharge,end_date,length_from_ED_entry_until_end_date
0,JCe78a06,131062667066,2015-01-02 01:01:00+00:00,2015-01-01 17:10:00+00:00,2015-01-11 23:40:00+00:00,9 days,10 days 06:30:00,2020-01-10,2020-01-10 00:00:00+00:00,2015-01-11,,2015-01-11,10 days
1,JCd1c19e,131062745090,2015-01-03 05:53:00+00:00,2015-01-03 01:56:00+00:00,2015-01-07 02:00:00+00:00,4 days,4 days 00:04:00,NaT,NaT,2015-01-07,,2015-01-07,4 days
2,JCd91eb2,131062747648,2015-01-01 08:24:00+00:00,2015-01-01 01:10:00+00:00,2015-01-06 03:19:00+00:00,5 days,5 days 02:09:00,2019-01-01,2019-01-01 00:00:00+00:00,2015-01-06,,2015-01-06,5 days
3,JCe7cb4d,131062788358,2015-01-01 23:39:00+00:00,2015-01-01 17:07:00+00:00,2015-01-22 01:25:00+00:00,21 days,20 days 08:18:00,NaT,NaT,2015-01-22,,2015-01-22,21 days
4,JCe293de,131063044001,2015-01-05 02:23:00+00:00,2015-01-04 18:13:00+00:00,2015-01-05 22:30:00+00:00,0 days,1 days 04:17:00,NaT,NaT,2015-01-05,,2015-01-05,1 days
...,...,...,...,...,...,...,...,...,...,...,...,...,...
43975,JC2a05c45,131287787282,2020-03-23 07:52:00+00:00,2020-03-23 06:02:00+00:00,2020-03-29 21:22:00+00:00,6 days,6 days 15:20:00,NaT,NaT,2020-03-29,,2020-03-29,6 days
43976,JCd84eef,131287789243,2020-03-26 08:19:00+00:00,2020-03-26 05:10:00+00:00,2020-03-29 00:10:00+00:00,3 days,2 days 19:00:00,NaT,NaT,2020-03-29,,2020-03-29,3 days
43977,JCe4ee78,131287806073,2020-03-25 21:49:00+00:00,2020-03-25 17:48:00+00:00,2020-03-29 23:16:00+00:00,4 days,4 days 05:28:00,NaT,NaT,2020-03-29,,2020-03-29,4 days
43978,JCddbfd4,131287838760,2020-03-28 07:06:00+00:00,2020-03-28 04:51:00+00:00,2020-03-29 23:10:00+00:00,1 days,1 days 18:19:00,NaT,NaT,2020-03-29,,2020-03-29,1 days


In [78]:
# save the final labels
keep_cols = ['anon_id', 'pat_enc_csn_id_coded', 'admit_time', 'first_ED_time',
       'discharge_time', 'end_date',
        'length_of_ip_since_admit', 'length_from_ED_entry_until_end_date',
       'death_date_jittered', 'discharge_date', 'died_before_discharge', 
       ]

full_labels[keep_cols].to_csv(savedir+"5_2_length_of_stay_labels.csv", index=False)

In [None]:
full_labels[full_labels.pat_enc_csn_id_coded == 131284409583]

In [None]:
# find difference

In [None]:
full_labels.pat_enc_csn_id_coded.nunique()

# Back to pat lv of care labels

In [None]:
# filter to inpatient events after admit time
ip_events = adt_results_24hr[(adt_results_24hr.admit_time <= adt_results_24hr.effective_time_jittered_utc) &
                     (adt_results_24hr.pat_class == 'Inpatient')]

ip_events = ip_events[ip_events.pat_enc_csn_id_coded.isin(cohort.pat_enc_csn_id_coded)]

ip_events.pat_enc_csn_id_coded.nunique()

In [None]:
# keep only change of lv of care status

# sort by sequence 
ip_events = ip_events.sort_values(['pat_enc_csn_id_coded', 'seq_num_in_enc'])

# keep only those that change
change_care = ip_events[~(ip_events.pat_lv_of_care.eq(ip_events.pat_lv_of_care.shift()) &
                         ip_events.pat_enc_csn_id_coded.eq(ip_events.pat_enc_csn_id_coded.shift()))]

change_care.pat_enc_csn_id_coded.nunique()

In [None]:
change_care.size

In [None]:
view_df(change_care[change_care.pat_enc_csn_id_coded.eq(change_care.shift().pat_enc_csn_id_coded)])

In [None]:
# get the trajectory for each csn
trajectory = change_care[['pat_enc_csn_id_coded', 'pat_lv_of_care']].groupby('pat_enc_csn_id_coded')['pat_lv_of_care'].apply(list).reset_index(name='trajectory')

view_df(trajectory)

In [None]:
# add a trajectory string column and also trajectory count
trajectory['trajectory_string'] = [' -> '.join(map(str, l)) for l in trajectory['trajectory']]
trajectory['trajectory_length'] = trajectory['trajectory'].str.len()

In [None]:
view_df(trajectory[trajectory.trajectory_length > 1])

In [None]:
# save this file
savefile = savedir + "02_trajectory.csv"
trajectory.to_csv(savefile)

# Combine the data into one dataframe

In [None]:
# format the first inpatient event
first_ip_less = first_ips_save[['anon_id', 'pat_enc_csn_id_coded',
       'pat_lv_of_care', 'event_type', 'pat_service']]

first_ip_less.rename({
                      'pat_service':'first_ip_pat_service',
                      'event_type':'first_ip_event_type',
                      'pat_lv_of_care':'first_ip_lv_of_care'
                     },
                    inplace=True, axis=1)

view_df(first_ip_less)

In [None]:
# format the last emerg event
last_emerg_less = last_emerg_save[['anon_id', 'pat_enc_csn_id_coded',
       'pat_lv_of_care', 'event_type', 'pat_service']]

last_emerg_less.rename({
                      'pat_service':'last_emerg_pat_service',
                      'event_type':'last_emerg_event_type',
                      'pat_lv_of_care':'last_emerg_lv_of_care'
                     },
                    inplace=True, axis=1)

view_df(last_emerg_less)

In [None]:
trajectory_less = trajectory[['pat_enc_csn_id_coded', 'trajectory_string', 'trajectory_length']]

trajectory_less.rename({'trajectory_string':'trajectory'},
                    inplace=True, axis=1)

view_df(trajectory_less)

In [None]:
ip_emerg = first_ip_less.merge(last_emerg_less, how='outer')
view_df(ip_emerg)

In [None]:
ip_emerg_traj = ip_emerg.merge(trajectory_less, how='outer')
view_df(ip_emerg_traj)

In [None]:
# save this combined df
savefile = savedir + "02_combined_ip_emerg_traj.csv"
ip_emerg_traj.to_csv(savefile)

# Description of columns

- first_ip_* = labels for the first inpatient event (admit time)
- last_emerg_* = labels for the last emergency event (just before admit time)
- trajectory = CSNs movement through care levels from admit time to 24 hours after admit
- trajectory_length = number of times CSN moves around in trajectory