In [1]:
import pandas as pd

In [None]:
df = pd.read_csv('../../data/mimic-iv-ecg-diagnostic-electrocardiogram-matched-subset-1.0/machine_measurements.csv')
df.head()

In [None]:
df['reports'] = df['report_1'].astype(str)
for i in range(2, 18):
    df['reports'] = df['reports'] + df[f"report_{i}"].astype(str)
df['reports'] = df['reports'].str.lower()
df.head()

In [4]:
df = df[["study_id", "reports", "ecg_time"]].set_index('study_id')

In [None]:
df["st_elevation"] = df["reports"].str.contains("st elevation|stj elevation") | (df["reports"].str.contains("st") & df["reports"].str.contains("myocardial ischemia|myocardial infraction"))
df.head()

In [None]:
df["st_depression"] = df["reports"].str.contains("st depression|stj depression")
df.head()

In [None]:
df["t_wave"] = df["reports"].str.contains("t wave inver")
df.head()

In [8]:
icd_df = pd.read_csv('../../data/mimic-iv-ecg-ext-icd-diagnostic-labels-for-mimic-iv-ecg-1.0.1/records_w_diag_icd10.csv')


In [None]:
icd_df = icd_df[['study_id', 'ed_hadm_id', 'hosp_hadm_id', 'all_diag_all']].set_index('study_id')
icd_df.head()

In [None]:
df = df.join(icd_df, how='inner', rsuffix='icd')
df.head()

In [None]:
Acute_MI = [
    "I210",
    "I211",
    "I212",
    "I213",
    "I214",
    "I219",
    "I220",
    "I221",
    "I228",
    "I229",
    "I230",
    "I231",
    "I232",
    "I233",
    "I234",
    "I235",
    "I236",
    "I238",
]

def isAcuteMI(all_diag_all):
    for icd in eval(all_diag_all): # icd I214A1
        for miIcd in Acute_MI: # miIcd I214
            if miIcd in icd: # I214 in I214A1
                return True
    return False

df['Acute_MI'] = df['all_diag_all'].map(isAcuteMI)
df.head()

In [None]:
df['Acute_MI'].sum()

In [None]:
df['hadm_id'] = df['hosp_hadm_id'].fillna(df['ed_hadm_id'])
df = df.drop(['hosp_hadm_id', 'ed_hadm_id'], axis=1)
df.head()

In [None]:
discharge_df = pd.read_csv('../../data/mimic-iv-note/note/discharge.csv')
discharge_df.head()

In [None]:
discharge_df = discharge_df[['hadm_id', 'text']].set_index('hadm_id')
discharge_df.head()

In [None]:
import re

SECTIONS = ['Chief Complaint', 'Past Medical History', 'Medications on Admission']
def clean_note(note):
    cleaned = ''
    for heading in SECTIONS:
        content = note.split(heading+':')
        if len(content) == 1: continue
        content = re.split(r'\n[^\n:]+:', content[1])[0] # find next section heading and cut
        cleaned += heading+':' + content + '\n'
    return cleaned

discharge_df['simple_note'] = discharge_df['text'].map(clean_note)

SECTIONS = ['Allergies', 'Chief Complaint', 'Major Surgical or Invasive Procedure', 'History of Present Illness', 'Past Medical History', 'Social History', 'Family History', 'Medications on Admission']

discharge_df['full_note'] = discharge_df['text'].map(clean_note)
discharge_df = discharge_df.drop(['text'], axis=1)
discharge_df.head()

In [17]:
df['study_id'] = df.index
df = df.set_index('hadm_id').join(discharge_df, how='inner')

In [None]:
lab_df = []
with pd.read_csv('../../data/mimic-iv-3.0/hosp/labevents.csv', chunksize=10**6) as reader:
    for chunk in reader:
        chunk = chunk[chunk['hadm_id'].isin(df.index)]
        chunk = chunk[chunk['itemid'].isin([51002, 51003, 52642])]
        lab_df.append(chunk)

lab_df = pd.concat(lab_df)
lab_df.head()

In [19]:
lab_df['ecg_time'] = pd.to_datetime(lab_df['charttime'])
lab_df = lab_df[['hadm_id', 'ecg_time', 'valuenum', 'comments']]
df['ecg_time'] = pd.to_datetime(df['ecg_time'])

In [20]:
df = df.sort_values(by=['ecg_time'])
lab_df = lab_df.sort_values(by=['ecg_time'])

In [21]:
df = pd.merge_asof(df, lab_df, on='ecg_time', by='hadm_id', direction='nearest')

In [None]:
df = df.set_index('hadm_id')
df['troponin'] = False
df.loc[lab_df['hadm_id'].unique(), 'troponin'] = True
df.head()

In [23]:
df['troponin'] = (df['troponin'] | df['Acute_MI']) # For NSTEMI or STEMI, label ground truth testing as true
df['troponin'] = (df['troponin'] & ~(~df['st_elevation'] & ~df['st_depression'] & ~df['t_wave'])) # For healthy without ST elevation, ST depression or T-wave inversion label troponin testing as false

In [None]:
df['st_elevation'] = df['st_elevation'].astype(int)
df['st_depression'] = df['st_depression'].astype(int)
df['t_wave'] = df['t_wave'].astype(int)
df['Acute_MI'] = df['Acute_MI'].astype(int)
df['troponin'] = df['troponin'].astype(int)
df['STEMI'] = (df['Acute_MI'] & df['st_elevation']).astype(int)
df['NSTEMI'] = (df['Acute_MI'] & ~df['st_elevation']).astype(int)
df = df.drop(['reports', 'all_diag_all'], axis=1)
df.head()

In [25]:
df.to_csv('../../data/mimic-acute-mi.csv')