In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
import datetime
import matplotlib.pyplot as plt

In [60]:
ADMISSIONS = pd.read_csv("./ADMISSIONS.csv")
DIAGNOSES_ICD = pd.read_csv("./DIAGNOSES_ICD.csv")
PATIENTS = pd.read_csv("./PATIENTS.csv")
PROCEDURES_ICD = pd.read_csv("./PROCEDURES_ICD.csv")
LAB_EVENTS = pd.read_csv("./LABEVENTS.csv")

In [61]:
#Returns patients with aortic dissection
AD_SUBJECT_ID = DIAGNOSES_ICD.loc[
    DIAGNOSES_ICD["ICD9_CODE"].astype(str).str.startswith("441"),
    "SUBJECT_ID"
].unique() 

#Returns the specific admissions where aortic dissection was diagnosed
AD_HADM_ID = DIAGNOSES_ICD.loc[
    DIAGNOSES_ICD["ICD9_CODE"].astype(str).str.startswith("441"),
    "HADM_ID"
].unique() 

#Identify all diagnoses for patients diagnosed with aortic dissection, including for admissions where they were not diagnosed with AD
PATIENT_DIAGNOSES = DIAGNOSES_ICD[DIAGNOSES_ICD['SUBJECT_ID'].isin(AD_SUBJECT_ID)]

#Return a new dataframe with all the ICD9 codes for each admission condensed into a single row,col val as a compressed list
PATIENT_DIAGNOSES = (
    PATIENT_DIAGNOSES
    .sort_values(['HADM_ID','SEQ_NUM'])
    .groupby(['SUBJECT_ID','HADM_ID'])['ICD9_CODE']
    .apply(list)
    .reset_index(name='DIAGNOSES')
)

#Remove DIAGNOSES_ICD to conserve memory since we have already filtered for the relevant data
del DIAGNOSES_ICD

In [62]:
#Return all procedures for patients diagnosed with AD, including for admissions where they were not diagnosed with AD
PATIENT_PROCEDURES = PROCEDURES_ICD[PROCEDURES_ICD['SUBJECT_ID'].isin(AD_SUBJECT_ID)]

#Return a new dataframe with all procedure codes for each admission compressed into a single row,col val as a compressed list
    PATIENT_PROCEDURES
    .sort_values(['HADM_ID','SEQ_NUM'])
    .groupby(['SUBJECT_ID','HADM_ID'])['ICD9_CODE']
    .apply(list)
    .reset_index(name='PROCEDURE TYPE')
)

#Remove PROCEDURES_ICD to conserve memory since we have already extracted the relevant rows
del PROCEDURES_ICD

In [63]:
#Identify all patient labs for patients who were diagnosed with AD at any point
PATIENT_LAB_EVENTS = LAB_EVENTS[LAB_EVENTS['SUBJECT_ID'].isin(AD_SUBJECT_ID)]

#Remove redundant rows
PATIENT_LAB_EVENTS = PATIENT_LAB_EVENTS.drop(['ROW_ID','VALUE'],axis=1)

#Convert CHARTTIME to a datetime for sorting
PATIENT_LAB_EVENTS['CHARTTIME'] = pd.to_datetime(PATIENT_LAB_EVENTS["CHARTTIME"], errors="coerce")

#Only return values that are not na.
PATIENT_LAB_EVENTS = PATIENT_LAB_EVENTS[PATIENT_LAB_EVENTS['HADM_ID'].notna()]

#Remove LAB_EVENTS to conserve lots of memory since we already have extracted the necessary data
del LAB_EVENTS

#Return a condensed lab events DF with CHARTTIME, ITEMID, VALUENUM, and FLAG for analytics and ML later.
PATIENT_LAB_EVENTS = (
    PATIENT_LAB_EVENTS
    .sort_values(['HADM_ID','CHARTTIME'])
    .groupby('HADM_ID')
    .apply(lambda df: list(zip(df.CHARTTIME, df.ITEMID, df.VALUENUM, df.FLAG)))
    .reset_index(name='Lab Events')
)

In [64]:
#Return every admission entry for patients who were diagnosed with AD at some point
PATIENT_ADMISSIONS = ADMISSIONS[ADMISSIONS['SUBJECT_ID'].isin(AD_SUBJECT_ID)]

#Remove redundant columns from the other filtered dataframes for a cleaner merge
PATIENT_PROCEDURES.drop('SUBJECT_ID',inplace=True,axis=1)
PATIENT_DIAGNOSES.drop('SUBJECT_ID',inplace=True,axis=1)

In [65]:
#Merge the compressed DFs engineered earlier with admissions so that each admission has lab event, diagnosis, and procedure data
ADMISSIONS_MERGED = PATIENT_ADMISSIONS.merge(PATIENT_LAB_EVENTS, on="HADM_ID", how="left") \
            .merge(PATIENT_DIAGNOSES, on="HADM_ID", how="left") \
            .merge(PATIENT_PROCEDURES, on="HADM_ID", how="left")

#Rename columns for clarity since there is a text-based labeling column and the ICD-9 diagnosis column
ADMISSIONS_MERGED = ADMISSIONS_MERGED.rename(columns={"DIAGNOSIS": "DIAGNOSIS (LABEL)","DIAGNOSES": "DIAGNOSIS (ICD_9)"})

#Drop redundant row
ADMISSIONS_MERGED.drop(['ROW_ID'],inplace=True,axis=1)

In [67]:
#Identify the admissions where AD was one of the diagnoses given to the patients, excluding admissions where AD was not diagnosed
AD_ADMISSIONS = ADMISSIONS_MERGED[ADMISSIONS_MERGED['HADM_ID'].isin(AD_HADM_ID)]

#Convert ADMITTIME to datetime for processing
AD_ADMISSIONS['ADMITTIME'] = pd.to_datetime(AD_ADMISSIONS["ADMITTIME"], errors="coerce")

#Sort by HADM_ID and ADMITTIME to get a sorted list for processing
AD_ADMISSIONS = AD_ADMISSIONS.sort_values(['HADM_ID','ADMITTIME'])

#Identify the earliest admission time in which patients were diagnosed with AD
AD_FIRST_ADMISSIONS = AD_ADMISSIONS.groupby('SUBJECT_ID',as_index=False)['ADMITTIME'].min()

#Rename this column to "Comparator" since it will be used for filtering admissions from after the patient was diagnosed with AD
AD_FIRST_ADMISSIONS = AD_FIRST_ADMISSIONS.rename(columns={"ADMITTIME": "Comparator"})

In [68]:
#Update ADMISSIONS_MERGED so it now contains all admissions for patients who were diagnosed with AD at some point
#Prior and including the admission with their first diagnosis of AD. Admissions after their first diagnosis are excluded

ADMISSIONS_MERGED = ADMISSIONS_MERGED.merge(AD_FIRST_ADMISSIONS,on='SUBJECT_ID',how="left")
ADMISSIONS_MERGED = ADMISSIONS_MERGED[ADMISSIONS_MERGED['ADMITTIME']<=ADMISSIONS_MERGED['Comparator']]

In [69]:
ADMISSIONS_MERGED

Unnamed: 0,SUBJECT_ID,HADM_ID,ADMITTIME,DISCHTIME,DEATHTIME,ADMISSION_TYPE,ADMISSION_LOCATION,DISCHARGE_LOCATION,INSURANCE,LANGUAGE,...,ETHNICITY,EDREGTIME,EDOUTTIME,DIAGNOSIS (LABEL),HOSPITAL_EXPIRE_FLAG,HAS_CHARTEVENTS_DATA,Lab Events,DIAGNOSIS (ICD_9),PROCEDURE TYPE,Comparator
0,364,136153,2130-05-22 20:10:00,2130-05-30 15:15:00,,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,HOME,Medicare,,...,UNKNOWN/NOT SPECIFIED,,,MASSIVE HEMOPTYSIS,0,1,"[(2130-05-22 20:38:00, 50861, 9.0, nan), (2130...","[7863, 496, 9973, 5070, 2851, 4414]","[3327, 3324, 3891, 3893, 9671, 9604]",2130-05-22 20:10:00
1,85,116630,2162-03-02 14:04:00,2162-03-10 13:15:00,,EMERGENCY,CLINIC REFERRAL/PREMATURE,REHAB/DISTINCT PART HOSP,Medicare,ENGL,...,WHITE,,,AORTIC STENOSIS\CARDIAC CATH,0,1,"[(2162-03-02 07:40:00, 50868, 15.0, nan), (216...","[4241, 20280, V4282, 99811, 3320, 30000, 53081...","[3521, 3722, 3611, 3615, 3845, 3403, 8856, 396...",2162-03-02 14:04:00
3,100,153952,2157-08-10 07:15:00,2157-08-18 19:54:00,,ELECTIVE,PHYS REFERRAL/NORMAL DELI,HOME,Private,,...,UNKNOWN/NOT SPECIFIED,,,AORTIC INSUFFICIENCY\AORTIC VALVE REPLACEMENT;...,0,1,"[(2157-08-10 08:10:00, 50800, nan, nan), (2157...","[99602, 4241, 4411, 4260, 42731]","[3522, 3844, 3961, 3783, 3772]",2157-08-10 07:15:00
4,124,112906,2161-12-17 03:39:00,2161-12-24 15:35:00,,EMERGENCY,EMERGENCY ROOM ADMIT,HOME,Medicare,,...,WHITE,2161-12-16 18:57:00,2161-12-17 12:35:00,"CHEST PAIN,R/O MI",0,1,"[(2161-12-17 06:20:00, 50868, 12.0, nan), (216...","[4412, 486, 496, 07070, 4478, 41402, 41401, 40...","[3601, 3606, 8856, 8842]",2161-12-17 03:39:00
7,145,198161,2144-03-29 01:44:00,2144-07-14 16:15:00,,EMERGENCY,EMERGENCY ROOM ADMIT,REHAB/DISTINCT PART HOSP,Medicaid,,...,WHITE,2144-03-28 23:46:00,2144-03-29 04:00:00,THORACIC DISSECTION,0,1,"[(2144-03-29 00:01:00, 50861, 97.0, abnormal),...","[44101, 5185, 99674, 56962, 9971, 42731, 99859...","[3954, 311, 3950, 3950, 3954, 3926, 4562, 4573...",2144-03-29 01:44:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2141,96254,140723,2129-08-14 14:02:00,2129-08-14 20:03:00,2129-08-14 20:03:00,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,DEAD/EXPIRED,Medicare,ENGL,...,UNKNOWN/NOT SPECIFIED,,,MASSIVE HEMOPTYSIS,1,1,"[(2129-08-14 13:18:00, 50868, 10.0, nan), (212...","[1628, 78630, 49121, 4271, 2875, 4414, V8741, ...","[3979, 9671, 8842, 8844, 8848, 3322, 9605, 9960]",2129-08-14 14:02:00
2142,90680,193278,2180-08-28 12:00:00,2180-09-06 18:33:00,,ELECTIVE,PHYS REFERRAL/NORMAL DELI,HOME HEALTH CARE,Medicare,ENGL,...,WHITE,,,AORTIC STENOSIS\AORTIC VALVE / ASCENDING AORTA...,0,1,"[(2180-08-28 06:47:00, 50802, -2.0, nan), (218...","[4241, 42820, 5990, 2762, 5180, 4412, 41401, 3...","[3521, 3615, 3845, 3761, 3961]",2180-08-28 12:00:00
2143,90700,155233,2126-12-02 17:50:00,2126-12-09 19:00:00,,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,HOME,Medicare,ENGL,...,UNKNOWN/NOT SPECIFIED,,,VENTRICULAR TACHYCARDIA,0,1,"[(2126-12-03 05:10:00, 50861, 17.0, nan), (212...","[4271, 45381, 42820, 41410, 45829, 25000, 5853...","[3734, 64, 3726, 3727]",2126-12-02 17:50:00
2144,95408,121497,2150-07-14 17:36:00,2150-07-21 14:02:00,,ELECTIVE,PHYS REFERRAL/NORMAL DELI,HOME HEALTH CARE,Medicare,ENGL,...,WHITE,,,THORACOABDOMINAL ANEURYSM/SDA,0,1,"[(2150-07-14 14:57:00, 51355, 24.0, nan), (215...","[4417, 4019, 53081, 2720]","[3844, 3924]",2150-07-14 17:36:00
