In [1]:
#This code compresses PROCEDURES_ICD9, and DIAGNOSES_ICD9 into single entries per admission (sorted by SEQ_NUM)
#and merges this data with the ADMISSIONS dataframe, providing two separate dataframes with this merged structure.
#One dataframe corresponds to patients diagnosed with specified ICD-9 codes and the other contains the control patients.

import numpy as np
import pandas as pd
import seaborn as sns
import datetime
import matplotlib.pyplot as plt

ADMISSIONS = pd.read_csv(r"C:\BIOE5860_Data\ADMISSIONS.csv")
DIAGNOSES_ICD = pd.read_csv(r"C:\BIOE5860_Data\DIAGNOSES_ICD.csv")
PATIENTS = pd.read_csv(r"C:\BIOE5860_Data\PATIENTS.csv")
PROCEDURES_ICD = pd.read_csv(r"C:\BIOE5860_Data\PROCEDURES_ICD.csv")

#Input ICD9 code that you want to look at here:
my_icd9_code = ["44100", "44101", "44102", "44103"] #441 is arotic dissection. Change to 421 for bacterial endocarditis
#check what any following numbers would be in the ICD9 code
#need to update to be 441.00, 441.01, 441.02, 441.03

#Returns patients with aortic dissection
AD_SUBJECT_ID = DIAGNOSES_ICD.loc[
    DIAGNOSES_ICD["ICD9_CODE"].astype(str).isin(my_icd9_code),
    "SUBJECT_ID"
].unique() 

"""
Adding filter to remove the text based AD diagnoses that don't have the ICD9 code
"""

# Also grab patients with aortic dissection in admission text
aortic_text_ids = ADMISSIONS[
    ADMISSIONS['DIAGNOSIS'].str.contains('AORTIC DISSECTION', na=False)
]['SUBJECT_ID'].unique()

# Combine both sets â€” these should ALL be excluded from controls
EXCLUDE_FROM_CONTROLS = np.union1d(AD_SUBJECT_ID, aortic_text_ids)

# Now build controls excluding both groups
CONTROL_SUBJECT_ID = DIAGNOSES_ICD.loc[
    ~DIAGNOSES_ICD["SUBJECT_ID"].isin(EXCLUDE_FROM_CONTROLS),
    "SUBJECT_ID"
].unique()

"""
Done adding
"""

"""
commenting out as it's now redundant
CONTROL_SUBJECT_ID = DIAGNOSES_ICD.loc[
    ~DIAGNOSES_ICD["SUBJECT_ID"].isin(AD_SUBJECT_ID), 
    "SUBJECT_ID"
].unique()
"""

#Returns the specific admissions where aortic dissection was diagnosed
AD_HADM_ID = DIAGNOSES_ICD.loc[
    DIAGNOSES_ICD["ICD9_CODE"].astype(str).isin(my_icd9_code),
    "HADM_ID"
].unique()


#Identify all diagnoses for patients diagnosed with aortic dissection, including for admissions where they were not diagnosed with AD
PATIENT_DIAGNOSES = DIAGNOSES_ICD[DIAGNOSES_ICD['SUBJECT_ID'].isin(AD_SUBJECT_ID)]

CONTROL_DIAGNOSES = DIAGNOSES_ICD[DIAGNOSES_ICD['SUBJECT_ID'].isin(CONTROL_SUBJECT_ID)]

#Return a new dataframe with all the ICD9 codes for each admission condensed into a single row,col val as a compressed list
PATIENT_DIAGNOSES = (
    PATIENT_DIAGNOSES
    .sort_values(['HADM_ID','SEQ_NUM'])
    .groupby(['SUBJECT_ID','HADM_ID'])['ICD9_CODE']
    .apply(list)
    .reset_index(name='DIAGNOSES')
)

#Return a new dataframe with all the ICD9 codes for each admission condensed into a single row,col val as a compressed list for control patients
CONTROL_DIAGNOSES = (
    CONTROL_DIAGNOSES
    .sort_values(['HADM_ID','SEQ_NUM'])
    .groupby(['SUBJECT_ID','HADM_ID'])['ICD9_CODE']
    .apply(list)
    .reset_index(name='DIAGNOSES')
)

#Remove DIAGNOSES_ICD to conserve memory since we have already filtered for the relevant data
#del DIAGNOSES_ICD

#Return all procedures for patients diagnosed with AD, including for admissions where they were not diagnosed with AD
PATIENT_PROCEDURES = PROCEDURES_ICD[PROCEDURES_ICD['SUBJECT_ID'].isin(AD_SUBJECT_ID)]

#identify all procedures for control patients as well
CONTROL_PROCEDURES = PROCEDURES_ICD[PROCEDURES_ICD['SUBJECT_ID'].isin(CONTROL_SUBJECT_ID)]

#Return a new dataframe with all procedure codes for each admission compressed into a single row,col val as a compressed list
PATIENT_PROCEDURES = (
    PATIENT_PROCEDURES
    .sort_values(['HADM_ID','SEQ_NUM'])
    .groupby(['SUBJECT_ID','HADM_ID'])['ICD9_CODE']
    .apply(list)
    .reset_index(name='PROCEDURE TYPE')
)

#Return a new dataframe with all procedure codes for each admission compressed into a single row,col val as a compressed list for control patients
CONTROL_PROCEDURES = (
    CONTROL_PROCEDURES
    .sort_values(['HADM_ID','SEQ_NUM'])
    .groupby(['SUBJECT_ID','HADM_ID'])['ICD9_CODE']
    .apply(list)
    .reset_index(name='PROCEDURE TYPE')
)

#Remove PROCEDURES_ICD to conserve memory since we have already extracted the relevant rows
#del PROCEDURES_ICD

#Return every admission entry for patients who were diagnosed with AD at some point
PATIENT_ADMISSIONS = ADMISSIONS[ADMISSIONS['SUBJECT_ID'].isin(AD_SUBJECT_ID)]

#pull control group admissions as well
CONTROL_ADMISSIONS = ADMISSIONS[ADMISSIONS['SUBJECT_ID'].isin(CONTROL_SUBJECT_ID)]

#Remove redundant columns from the other filtered dataframes for a cleaner merge
PATIENT_PROCEDURES.drop('SUBJECT_ID',inplace=True,axis=1)
PATIENT_DIAGNOSES.drop('SUBJECT_ID',inplace=True,axis=1)
CONTROL_PROCEDURES.drop('SUBJECT_ID',inplace=True,axis=1)
CONTROL_DIAGNOSES.drop('SUBJECT_ID',inplace=True,axis=1)

#Merge the compressed DFs engineered earlier with admissions so that each admission has lab event, diagnosis, and procedure data
PATIENT_ADMISSIONS_MERGED = PATIENT_ADMISSIONS.merge(PATIENT_DIAGNOSES, on="HADM_ID", how="left") \
            .merge(PATIENT_PROCEDURES, on="HADM_ID", how="left")

CONTROL_ADMISSIONS_MERGED = CONTROL_ADMISSIONS.merge(CONTROL_DIAGNOSES, on="HADM_ID", how="left") \
            .merge(CONTROL_PROCEDURES, on="HADM_ID", how="left")

#Rename columns for clarity since there is a text-based labeling column and the ICD-9 diagnosis column
PATIENT_ADMISSIONS_MERGED = PATIENT_ADMISSIONS_MERGED.rename(columns={"DIAGNOSIS": "DIAGNOSIS (LABEL)","DIAGNOSES": "DIAGNOSIS (ICD_9)"})
CONTROL_ADMISSIONS_MERGED = CONTROL_ADMISSIONS_MERGED.rename(columns={"DIAGNOSIS": "DIAGNOSIS (LABEL)","DIAGNOSES": "DIAGNOSIS (ICD_9)"})

#Drop redundant row
PATIENT_ADMISSIONS_MERGED.drop(['ROW_ID'],inplace=True,axis=1)
CONTROL_ADMISSIONS_MERGED.drop(['ROW_ID'],inplace=True,axis=1)

#Identify the admissions where AD was one of the diagnoses given to the patients, excluding admissions where AD was not diagnosed
#No need to do this for control group
DISEASE_ADMISSIONS = PATIENT_ADMISSIONS_MERGED[PATIENT_ADMISSIONS_MERGED['HADM_ID'].isin(AD_HADM_ID)]
DISEASE_ADMISSIONS = DISEASE_ADMISSIONS.copy()

#Convert ADMITTIME to datetime for processing
DISEASE_ADMISSIONS['ADMITTIME'] = pd.to_datetime(DISEASE_ADMISSIONS["ADMITTIME"], errors="coerce")

#convert to datetime for control group
CONTROL_ADMISSIONS_MERGED['ADMITTIME'] = pd.to_datetime(CONTROL_ADMISSIONS_MERGED["ADMITTIME"], errors="coerce")

#Sort by HADM_ID and ADMITTIME to get a sorted list for processing
DISEASE_ADMISSIONS = DISEASE_ADMISSIONS.sort_values(['HADM_ID','ADMITTIME'])

#sort control group by HADM_ID and ADMITTIME as well
CONTROL_ADMISSIONS_MERGED = CONTROL_ADMISSIONS_MERGED.sort_values(['HADM_ID','ADMITTIME'])

#Identify the earliest admission time in which patients were diagnosed with AD
DISEASE_FIRST_ADMISSIONS = DISEASE_ADMISSIONS.groupby('SUBJECT_ID',as_index=False)['ADMITTIME'].min()

#Rename this column to "Comparator" since it will be used for filtering admissions from after the patient was diagnosed with AD
DISEASE_FIRST_ADMISSIONS = DISEASE_FIRST_ADMISSIONS.rename(columns={"ADMITTIME": "Comparator"})

#Update ADMISSIONS_MERGED so it now contains all admissions for patients who were diagnosed with AD at some point
#Prior and including the admission with their first diagnosis of AD. Admissions after their first diagnosis are excluded
PATIENT_ADMISSIONS_MERGED['ADMITTIME'] = pd.to_datetime(PATIENT_ADMISSIONS_MERGED["ADMITTIME"], errors="coerce")
PATIENT_ADMISSIONS_MERGED = PATIENT_ADMISSIONS_MERGED.merge(DISEASE_FIRST_ADMISSIONS,on='SUBJECT_ID',how="left")
PATIENT_ADMISSIONS_MERGED = PATIENT_ADMISSIONS_MERGED[PATIENT_ADMISSIONS_MERGED['ADMITTIME']<=PATIENT_ADMISSIONS_MERGED['Comparator']]

#drop the comparator column now that filtering is done so that the DFs are the same
PATIENT_ADMISSIONS_MERGED = PATIENT_ADMISSIONS_MERGED.drop(['Comparator'],axis=1)

In [2]:
#Thomas's logic retained below

print(PATIENT_ADMISSIONS_MERGED.columns)
print(PATIENT_ADMISSIONS_MERGED.shape)

print(CONTROL_ADMISSIONS_MERGED.columns)
print(CONTROL_ADMISSIONS_MERGED.shape)

Index(['SUBJECT_ID', 'HADM_ID', 'ADMITTIME', 'DISCHTIME', 'DEATHTIME',
       'ADMISSION_TYPE', 'ADMISSION_LOCATION', 'DISCHARGE_LOCATION',
       'INSURANCE', 'LANGUAGE', 'RELIGION', 'MARITAL_STATUS', 'ETHNICITY',
       'EDREGTIME', 'EDOUTTIME', 'DIAGNOSIS (LABEL)', 'HOSPITAL_EXPIRE_FLAG',
       'HAS_CHARTEVENTS_DATA', 'DIAGNOSIS (ICD_9)', 'PROCEDURE TYPE'],
      dtype='str')
(378, 20)
Index(['SUBJECT_ID', 'HADM_ID', 'ADMITTIME', 'DISCHTIME', 'DEATHTIME',
       'ADMISSION_TYPE', 'ADMISSION_LOCATION', 'DISCHARGE_LOCATION',
       'INSURANCE', 'LANGUAGE', 'RELIGION', 'MARITAL_STATUS', 'ETHNICITY',
       'EDREGTIME', 'EDOUTTIME', 'DIAGNOSIS (LABEL)', 'HOSPITAL_EXPIRE_FLAG',
       'HAS_CHARTEVENTS_DATA', 'DIAGNOSIS (ICD_9)', 'PROCEDURE TYPE'],
      dtype='str')
(58440, 20)


In [3]:
chunksize = 50000
LAB_EVENTS = pd.read_csv(r"C:\BIOE5860_Data\LABEVENTS.csv",chunksize=chunksize)
LAB_IDS = pd.read_csv(r"C:\BIOE5860_Data\Lab_Item_Codes.txt", sep="\t")

PATIENT_LAB_EVENTS2 = []
CONTROL_LAB_EVENTS2 = []

labs_of_interest = [
    'TROPONIN', 'D-DIMER', 'CREATININE', 'BUN', 
    'UREA', 'C-REACTIVE', 'LDH', 'LACTATE DEHYDROGENASE', 
    'BILIRUBIN', 'AST', 'ALT', 
    'WHITE BLOOD', 'WBC', 'LYMPHOCYTE', 'NEUTROPHIL'
]

#create one large string which the string matcher will search through
pattern = '|'.join(labs_of_interest)

#check for any of the strings listed above in the pattern mega-string
lab_ids = LAB_IDS[
    LAB_IDS['Display'].str.contains(pattern, case=False, na=False)
]
lab_ids = lab_ids['Code']

for EVENT in LAB_EVENTS:

    unique_admission_count = len(EVENT['HADM_ID'].unique())
    unique_admission_count += unique_admission_count

    EVENT = EVENT.loc[
        EVENT['ITEMID'].isin(lab_ids)
    ]
    
    #Identify all patient labs for patients who were diagnosed with AD at any point. Do not include labs taken during admissions following their first diagnosis
    PATIENT_LAB_EVENTS = EVENT[EVENT['HADM_ID'].isin(PATIENT_ADMISSIONS_MERGED['HADM_ID'])]
    
    #pull control group lab events as well
    CONTROL_LAB_EVENTS = EVENT[EVENT['HADM_ID'].isin(CONTROL_ADMISSIONS_MERGED['HADM_ID'])]
    
    #Remove redundant rows
    PATIENT_LAB_EVENTS = PATIENT_LAB_EVENTS.drop(['ROW_ID','VALUE'],axis=1)
    CONTROL_LAB_EVENTS = CONTROL_LAB_EVENTS.drop(['ROW_ID','VALUE'],axis=1)
    
    #Convert CHARTTIME to a datetime for sorting
    PATIENT_LAB_EVENTS['CHARTTIME'] = pd.to_datetime(PATIENT_LAB_EVENTS["CHARTTIME"], errors="coerce")
    CONTROL_LAB_EVENTS['CHARTTIME'] = pd.to_datetime(CONTROL_LAB_EVENTS["CHARTTIME"], errors="coerce")
    
    #Only return values that are not na.
    PATIENT_LAB_EVENTS = PATIENT_LAB_EVENTS[PATIENT_LAB_EVENTS['HADM_ID'].notna()]
    CONTROL_LAB_EVENTS = CONTROL_LAB_EVENTS[CONTROL_LAB_EVENTS['HADM_ID'].notna()]

    PATIENT_LAB_EVENTS2.append(PATIENT_LAB_EVENTS)
    CONTROL_LAB_EVENTS2.append(CONTROL_LAB_EVENTS)
    
#Remove LAB_EVENTS to conserve lots of memory since we already have extracted the necessary data
del LAB_EVENTS

#Construct final labs DFs through concatenation
FINAL_PATIENT_LABS = pd.concat(PATIENT_LAB_EVENTS2)
FINAL_CONTROL_LABS = pd.concat(CONTROL_LAB_EVENTS2)

print(unique_admission_count)

136


In [4]:
def apply_event_index_filter(PATIENT_ADMISSIONS_MERGED, CONTROL_ADMISSIONS_MERGED, AD_HADM_ID):

    PATIENT_ADMISSIONS_MERGED = PATIENT_ADMISSIONS_MERGED.copy()
    CONTROL_ADMISSIONS_MERGED = CONTROL_ADMISSIONS_MERGED.copy()

    PATIENT_ADMISSIONS_MERGED["ADMITTIME"] = pd.to_datetime(
        PATIENT_ADMISSIONS_MERGED["ADMITTIME"], errors="coerce"
    )

    CONTROL_ADMISSIONS_MERGED["ADMITTIME"] = pd.to_datetime(
        CONTROL_ADMISSIONS_MERGED["ADMITTIME"], errors="coerce"
    )

    PATIENT_ADMISSIONS_MERGED = PATIENT_ADMISSIONS_MERGED.sort_values(
        ["SUBJECT_ID", "ADMITTIME"]
    )

    """
    PATIENT_ADMISSIONS_MERGED["PATIENT_ADMISSION_INDEX"] = (
        PATIENT_ADMISSIONS_MERGED
        .groupby("SUBJECT_ID")
        .cumcount() + 1
    )
    """

    DISEASE_ADMISSIONS = PATIENT_ADMISSIONS_MERGED[
        PATIENT_ADMISSIONS_MERGED["HADM_ID"].isin(AD_HADM_ID)
    ].copy()

    DISEASE_ADMISSIONS = DISEASE_ADMISSIONS.sort_values(
        ["SUBJECT_ID", "ADMITTIME"]
    )

    DISEASE_FIRST_ADMISSIONS = (
        DISEASE_ADMISSIONS
        .groupby("SUBJECT_ID", as_index=False)["ADMITTIME"]
        .min()
        .rename(columns={"ADMITTIME": "Comparator"})
    )

    PATIENT_ADMISSIONS_MERGED = PATIENT_ADMISSIONS_MERGED.merge(
        DISEASE_FIRST_ADMISSIONS,
        on="SUBJECT_ID",
        how="left"
    )

    PATIENT_ADMISSIONS_MERGED = PATIENT_ADMISSIONS_MERGED[
        (PATIENT_ADMISSIONS_MERGED["Comparator"].isna()) |
        (PATIENT_ADMISSIONS_MERGED["ADMITTIME"] <= PATIENT_ADMISSIONS_MERGED["Comparator"])
    ]

    PATIENT_ADMISSIONS_MERGED.loc[PATIENT_ADMISSIONS_MERGED["Comparator"].notna(), "PATIENT_ADMISSION_INDEX"] = (
        PATIENT_ADMISSIONS_MERGED[PATIENT_ADMISSIONS_MERGED["Comparator"].notna()]
        .groupby("SUBJECT_ID")["ADMITTIME"]
        .rank(method="first", ascending=False)
        .astype(int) - 1
    )
    
    PATIENT_ADMISSIONS_MERGED["PATIENT_ADMISSION_INDEX"] = PATIENT_ADMISSIONS_MERGED["PATIENT_ADMISSION_INDEX"].astype("float")

    PATIENT_ADMISSIONS_MERGED = PATIENT_ADMISSIONS_MERGED.drop(columns=["Comparator"])

    # Keep control DF columns consistent
    CONTROL_ADMISSIONS_MERGED["ADMISSION_INDEX_PER_PATIENT"] = np.nan

    return PATIENT_ADMISSIONS_MERGED, CONTROL_ADMISSIONS_MERGED

In [5]:
PATIENT_ADMISSIONS_MERGED, CONTROL_ADMISSIONS_MERGED = apply_event_index_filter(PATIENT_ADMISSIONS_MERGED, CONTROL_ADMISSIONS_MERGED, AD_HADM_ID)

In [6]:
#display unique ICD9 codes in the filtered patient admissions
#print(PATIENT_ADMISSIONS_MERGED['DIAGNOSIS (ICD_9)'].explode().unique())
#print(CONTROL_ADMISSIONS_MERGED['DIAGNOSIS (ICD_9)'].explode().unique())

print(PATIENT_ADMISSIONS_MERGED.columns)
print(PATIENT_ADMISSIONS_MERGED.shape)
print(CONTROL_ADMISSIONS_MERGED.columns)
print(CONTROL_ADMISSIONS_MERGED.shape)

Index(['SUBJECT_ID', 'HADM_ID', 'ADMITTIME', 'DISCHTIME', 'DEATHTIME',
       'ADMISSION_TYPE', 'ADMISSION_LOCATION', 'DISCHARGE_LOCATION',
       'INSURANCE', 'LANGUAGE', 'RELIGION', 'MARITAL_STATUS', 'ETHNICITY',
       'EDREGTIME', 'EDOUTTIME', 'DIAGNOSIS (LABEL)', 'HOSPITAL_EXPIRE_FLAG',
       'HAS_CHARTEVENTS_DATA', 'DIAGNOSIS (ICD_9)', 'PROCEDURE TYPE',
       'PATIENT_ADMISSION_INDEX'],
      dtype='str')
(378, 21)
Index(['SUBJECT_ID', 'HADM_ID', 'ADMITTIME', 'DISCHTIME', 'DEATHTIME',
       'ADMISSION_TYPE', 'ADMISSION_LOCATION', 'DISCHARGE_LOCATION',
       'INSURANCE', 'LANGUAGE', 'RELIGION', 'MARITAL_STATUS', 'ETHNICITY',
       'EDREGTIME', 'EDOUTTIME', 'DIAGNOSIS (LABEL)', 'HOSPITAL_EXPIRE_FLAG',
       'HAS_CHARTEVENTS_DATA', 'DIAGNOSIS (ICD_9)', 'PROCEDURE TYPE',
       'ADMISSION_INDEX_PER_PATIENT'],
      dtype='str')
(58440, 21)


In [7]:
PATIENT_ADMISSIONS_MERGED.to_csv(r"C:\BIOE5860_Data\PATIENT_ADMISSIONS_MERGED.csv", index=False)
CONTROL_ADMISSIONS_MERGED.to_csv(r"C:\BIOE5860_Data\CONTROL_ADMISSIONS_MERGED.csv", index=False)

In [8]:
#this script finds all patients who have 'AORTIC DISSECTION' in their ADMISSIONS DIAGNOSIS text field
#but do not have the corresponding ICD9 code in DIAGNOSES_ICD
#my_icd9_code = [whatever yours is]
 
aortic_ids = ADMISSIONS[ADMISSIONS['DIAGNOSIS'].str.contains('AORTIC DISSECTION', na=False)]['SUBJECT_ID'].unique()
 
"""
AD_SUBJECT_ID = DIAGNOSES_ICD.loc[
    DIAGNOSES_ICD["ICD9_CODE"].astype(str).isin(my_icd9_code),
    "SUBJECT_ID"
].unique()
"""
 
#aortic_patients = PATIENT_DIAGNOSES[PATIENT_DIAGNOSES['SUBJECT_ID']].unique()
 
#find patients who have AORTIC DISSECTION text but don't have the ICD9 code from my_icd9_code

difference = set(aortic_ids) - set(AD_SUBJECT_ID)
print(len(set(difference) & set(CONTROL_SUBJECT_ID))) #if this is 0 then all were successfully removed in first cell

print(len(difference))
print(difference)
 
#remove all subject IDs found above from CONTROL_SUBJECT_ID since we don't know if they are actually controls 
#or if they are patients with AD who just don't have the correct ICD9 code in the DIAGNOSES_ICD file
#CONTROL_SUBJECT_ID = CONTROL_SUBJECT_ID[~np.isin(CONTROL_SUBJECT_ID, list(difference))]

#PATIENT_ADMISSIONS_MERGED.to_csv(r"C:\BIOE5860_Data\PATIENT_ADMISSIONS_MERGED.csv", index=False)
#CONTROL_ADMISSIONS_MERGED.to_csv(r"C:\BIOE5860_Data\CONTROL_ADMISSIONS_MERGED.csv", index=False)

print(PATIENT_ADMISSIONS_MERGED.columns)
print(PATIENT_ADMISSIONS_MERGED.shape)
print(CONTROL_ADMISSIONS_MERGED.columns)
print(CONTROL_ADMISSIONS_MERGED.shape)

0
17
{np.int64(83040), np.int64(56898), np.int64(41603), np.int64(26308), np.int64(25828), np.int64(5353), np.int64(682), np.int64(19914), np.int64(24652), np.int64(48078), np.int64(31377), np.int64(16181), np.int64(21206), np.int64(95030), np.int64(13208), np.int64(6938), np.int64(5183)}
Index(['SUBJECT_ID', 'HADM_ID', 'ADMITTIME', 'DISCHTIME', 'DEATHTIME',
       'ADMISSION_TYPE', 'ADMISSION_LOCATION', 'DISCHARGE_LOCATION',
       'INSURANCE', 'LANGUAGE', 'RELIGION', 'MARITAL_STATUS', 'ETHNICITY',
       'EDREGTIME', 'EDOUTTIME', 'DIAGNOSIS (LABEL)', 'HOSPITAL_EXPIRE_FLAG',
       'HAS_CHARTEVENTS_DATA', 'DIAGNOSIS (ICD_9)', 'PROCEDURE TYPE',
       'PATIENT_ADMISSION_INDEX'],
      dtype='str')
(378, 21)
Index(['SUBJECT_ID', 'HADM_ID', 'ADMITTIME', 'DISCHTIME', 'DEATHTIME',
       'ADMISSION_TYPE', 'ADMISSION_LOCATION', 'DISCHARGE_LOCATION',
       'INSURANCE', 'LANGUAGE', 'RELIGION', 'MARITAL_STATUS', 'ETHNICITY',
       'EDREGTIME', 'EDOUTTIME', 'DIAGNOSIS (LABEL)', 'HOSPITAL_EXPI