In [1]:
%%html
<style type='text/css'>
.CodeMirror{
    font-family: Inconsolata;
    font-size: 13px;
}

div.output_area pre {
    font-family: Inconsolata;
    font-size: 13px;
}

div.text_cell_render {
    font-family: Inconsolata;
    font-size: 13px;
}
</style>

In [2]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from tqdm.notebook import tqdm, tqdm_notebook
from datetime import datetime, timedelta

In [3]:
path_dataset = r"/data/data2/041/datasets/mimic-iii-clinical-database-1.4"
list_csvgz = [f for f in os.listdir(path_dataset) if f[-6:]=="csv.gz"]
list_csvgz

['DRGCODES.csv.gz',
 'LABEVENTS.csv.gz',
 'PATIENTS.csv.gz',
 'D_ITEMS.csv.gz',
 'DIAGNOSES_ICD.csv.gz',
 'CAREGIVERS.csv.gz',
 'SERVICES.csv.gz',
 'CPTEVENTS.csv.gz',
 'OUTPUTEVENTS.csv.gz',
 'D_LABITEMS.csv.gz',
 'D_ICD_DIAGNOSES.csv.gz',
 'LABEVENTS_NEW_remove_duplicate_edges.csv.gz',
 'CALLOUT.csv.gz',
 'ADMISSIONS.csv.gz',
 'D_CPT.csv.gz',
 'INPUTEVENTS_MV.csv.gz',
 'D_LABITEMS_NEW.csv.gz',
 'PROCEDUREEVENTS_MV.csv.gz',
 'ADMISSIONS_NEW.csv.gz',
 'PRESCRIPTIONS.csv.gz',
 'NOTEEVENTS.csv.gz',
 'DATETIMEEVENTS.csv.gz',
 'MICROBIOLOGYEVENTS.csv.gz',
 'ICUSTAYS.csv.gz',
 'D_ICD_PROCEDURES.csv.gz',
 'LABEVENTS_NEW.csv.gz',
 'CHARTEVENTS.csv.gz',
 'TRANSFERS.csv.gz',
 'PROCEDURES_ICD.csv.gz',
 'INPUTEVENTS_CV.csv.gz']

# Prepare necessary `DataFrame`

In [4]:
df_new_labevents_remove_duplicate_edges = pd.read_csv(os.path.join(path_dataset, "LABEVENTS_NEW_remove_duplicate_edges.csv.gz"))
df_new_labevents_remove_duplicate_edges.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15309100 entries, 0 to 15309099
Data columns (total 14 columns):
 #   Column             Dtype  
---  ------             -----  
 0   Unnamed: 0         int64  
 1   Unnamed: 0.1       int64  
 2   ROW_ID             int64  
 3   SUBJECT_ID         int64  
 4   HADM_ID            float64
 5   ITEMID             int64  
 6   CHARTTIME          object 
 7   VALUE              object 
 8   VALUENUM           float64
 9   VALUEUOM           object 
 10  FLAG               object 
 11  CATAGORY           float64
 12  VALUENUM_Z-SCORED  float64
 13  TIMESTEP           float64
dtypes: float64(5), int64(5), object(4)
memory usage: 1.6+ GB


In [5]:
df_d_labitems = pd.read_csv(os.path.join(path_dataset, "D_LABITEMS.csv.gz"))
df_d_labitems.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 753 entries, 0 to 752
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   ROW_ID      753 non-null    int64 
 1   ITEMID      753 non-null    int64 
 2   LABEL       753 non-null    object
 3   FLUID       753 non-null    object
 4   CATEGORY    753 non-null    object
 5   LOINC_CODE  585 non-null    object
dtypes: int64(2), object(4)
memory usage: 35.4+ KB


In [10]:
df_prescriptions = pd.read_csv(os.path.join(path_dataset, "PRESCRIPTIONS.csv.gz"))

# Convert columns to datetime
df_prescriptions["ADMITTIME"] = pd.to_datetime(df_prescriptions["STARTDATE"], format="%Y-%m-%d %H:%M:%S")
df_prescriptions["DISCHTIME"] = pd.to_datetime(df_prescriptions["ENDDATE"], format="%Y-%m-%d %H:%M:%S")

df_prescriptions.sort_values(by="STARTDATE", inplace=True)

df_prescriptions.info()

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


<class 'pandas.core.frame.DataFrame'>
Int64Index: 4156450 entries, 1457891 to 4151306
Data columns (total 21 columns):
 #   Column             Dtype         
---  ------             -----         
 0   ROW_ID             int64         
 1   SUBJECT_ID         int64         
 2   HADM_ID            int64         
 3   ICUSTAY_ID         float64       
 4   STARTDATE          object        
 5   ENDDATE            object        
 6   DRUG_TYPE          object        
 7   DRUG               object        
 8   DRUG_NAME_POE      object        
 9   DRUG_NAME_GENERIC  object        
 10  FORMULARY_DRUG_CD  object        
 11  GSN                object        
 12  NDC                float64       
 13  PROD_STRENGTH      object        
 14  DOSE_VAL_RX        object        
 15  DOSE_UNIT_RX       object        
 16  FORM_VAL_DISP      object        
 17  FORM_UNIT_DISP     object        
 18  ROUTE              object        
 19  ADMITTIME          datetime64[ns]
 20  DISCHTIME         

In [14]:
df_procedures_icd = pd.read_csv(os.path.join(path_dataset, "PROCEDURES_ICD.csv.gz"))
df_d_icd_procedures = pd.read_csv(os.path.join(path_dataset, "D_ICD_PROCEDURES.csv.gz"))

# Explore Specific Case

In [6]:
HADM_ID_temp = 199998

df_temp = df_new_labevents_remove_duplicate_edges[df_new_labevents_remove_duplicate_edges.HADM_ID==HADM_ID_temp].merge(
    df_d_labitems[['ITEMID', 'LABEL']], how='left', on='ITEMID')
df_temp.sort_values(by="TIMESTEP", inplace=True)
df_temp[df_temp.CATAGORY!=0]

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,ROW_ID,SUBJECT_ID,HADM_ID,ITEMID,CHARTTIME,VALUE,VALUENUM,VALUEUOM,FLAG,CATAGORY,VALUENUM_Z-SCORED,TIMESTEP,LABEL
49,21445317,21445317,16833719,27200,199998.0,50920,2119-02-18 17:58:00,See Comments,,,,1.0,0.0,0.0,Estimated GFR (MDRD equation)
141,21445431,21445431,16833738,27200,199998.0,51464,2119-02-18 19:49:00,NEG,,mg/dL,,1.0,0.0,0.0,Bilirubin
143,21445433,21445433,16833739,27200,199998.0,51466,2119-02-18 19:49:00,SM,,,,3.0,0.0,0.0,Blood
146,21445436,21445436,16833743,27200,199998.0,51487,2119-02-18 19:49:00,NEG,,,,1.0,0.0,0.0,Nitrite
152,21445442,21445442,16833747,27200,199998.0,51506,2119-02-18 19:49:00,Clear,,,,1.0,0.0,0.0,Urine Appearance
0,21445171,21445171,16833896,27200,199998.0,50800,2119-02-20 23:10:00,ART,,,,1.0,0.0,2.0,SPECIMEN TYPE
9,21445225,21445225,16833813,27200,199998.0,50812,2119-02-20 12:59:00,INTUBATED,,,,1.0,0.0,2.0,Intubated
18,21445286,21445286,16833804,27200,199998.0,50828,2119-02-20 12:33:00,CONTROLLED,,,,1.0,0.0,2.0,Ventilator
140,21445430,21445430,16833945,27200,199998.0,51464,2119-02-22 09:32:00,NEG,,mg/dL,,1.0,0.0,4.0,Bilirubin
151,21445441,21445441,16833956,27200,199998.0,51506,2119-02-22 09:32:00,Clear,,,,1.0,0.0,4.0,Urine Appearance


In [7]:
df_temp[df_temp.CATAGORY==0]

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,ROW_ID,SUBJECT_ID,HADM_ID,ITEMID,CHARTTIME,VALUE,VALUENUM,VALUEUOM,FLAG,CATAGORY,VALUENUM_Z-SCORED,TIMESTEP,LABEL
154,21445444,21445444,16833749,27200,199998.0,51514,2119-02-18 19:49:00,4,4.00,mg/dL,abnormal,0.0,8.935176,0.0,Urobilinogen
27,21445295,21445295,16833714,27200,199998.0,50868,2119-02-18 17:58:00,15,15.00,mEq/L,,0.0,0.578508,0.0,Anion Gap
98,21445374,21445374,16833729,27200,199998.0,51248,2119-02-18 17:58:00,33.0,33.00,pg,abnormal,0.0,2.480894,0.0,MCH
92,21445366,21445366,16833728,27200,199998.0,51237,2119-02-18 17:58:00,1.0,1.00,,,0.0,-0.641693,0.0,INR(PT)
89,21445362,21445362,16833727,27200,199998.0,51222,2119-02-18 17:58:00,12.8,12.80,g/dL,abnormal,0.0,-0.670068,0.0,Hemoglobin
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
72,21445340,21445340,16833986,27200,199998.0,51006,2119-02-24 04:00:00,25,25.00,mg/dL,abnormal,0.0,2.870949,6.0,Urea Nitrogen
44,21445312,21445312,16833982,27200,199998.0,50912,2119-02-24 04:00:00,1.1,1.10,mg/dL,,0.0,1.389148,6.0,Creatinine
129,21445416,21445416,16834005,27200,199998.0,51279,2119-02-24 08:15:00,3.04,3.04,m/uL,abnormal,0.0,-4.377918,6.0,Red Blood Cells
51,21445319,21445319,16833983,27200,199998.0,50931,2119-02-24 04:00:00,112,112.00,mg/dL,abnormal,0.0,2.322528,6.0,Glucose


In [8]:
df_temp[df_temp.CATAGORY!=0]

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,ROW_ID,SUBJECT_ID,HADM_ID,ITEMID,CHARTTIME,VALUE,VALUENUM,VALUEUOM,FLAG,CATAGORY,VALUENUM_Z-SCORED,TIMESTEP,LABEL
49,21445317,21445317,16833719,27200,199998.0,50920,2119-02-18 17:58:00,See Comments,,,,1.0,0.0,0.0,Estimated GFR (MDRD equation)
141,21445431,21445431,16833738,27200,199998.0,51464,2119-02-18 19:49:00,NEG,,mg/dL,,1.0,0.0,0.0,Bilirubin
143,21445433,21445433,16833739,27200,199998.0,51466,2119-02-18 19:49:00,SM,,,,3.0,0.0,0.0,Blood
146,21445436,21445436,16833743,27200,199998.0,51487,2119-02-18 19:49:00,NEG,,,,1.0,0.0,0.0,Nitrite
152,21445442,21445442,16833747,27200,199998.0,51506,2119-02-18 19:49:00,Clear,,,,1.0,0.0,0.0,Urine Appearance
0,21445171,21445171,16833896,27200,199998.0,50800,2119-02-20 23:10:00,ART,,,,1.0,0.0,2.0,SPECIMEN TYPE
9,21445225,21445225,16833813,27200,199998.0,50812,2119-02-20 12:59:00,INTUBATED,,,,1.0,0.0,2.0,Intubated
18,21445286,21445286,16833804,27200,199998.0,50828,2119-02-20 12:33:00,CONTROLLED,,,,1.0,0.0,2.0,Ventilator
140,21445430,21445430,16833945,27200,199998.0,51464,2119-02-22 09:32:00,NEG,,mg/dL,,1.0,0.0,4.0,Bilirubin
151,21445441,21445441,16833956,27200,199998.0,51506,2119-02-22 09:32:00,Clear,,,,1.0,0.0,4.0,Urine Appearance


In [9]:
df_temp[df_temp.TIMESTEP==0]

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,ROW_ID,SUBJECT_ID,HADM_ID,ITEMID,CHARTTIME,VALUE,VALUENUM,VALUEUOM,FLAG,CATAGORY,VALUENUM_Z-SCORED,TIMESTEP,LABEL
154,21445444,21445444,16833749,27200,199998.0,51514,2119-02-18 19:49:00,4,4.0,mg/dL,abnormal,0.0,8.935176,0.0,Urobilinogen
27,21445295,21445295,16833714,27200,199998.0,50868,2119-02-18 17:58:00,15,15.0,mEq/L,,0.0,0.578508,0.0,Anion Gap
98,21445374,21445374,16833729,27200,199998.0,51248,2119-02-18 17:58:00,33.0,33.0,pg,abnormal,0.0,2.480894,0.0,MCH
92,21445366,21445366,16833728,27200,199998.0,51237,2119-02-18 17:58:00,1.0,1.0,,,0.0,-0.641693,0.0,INR(PT)
89,21445362,21445362,16833727,27200,199998.0,51222,2119-02-18 17:58:00,12.8,12.8,g/dL,abnormal,0.0,-0.670068,0.0,Hemoglobin
34,21445302,21445302,16833715,27200,199998.0,50882,2119-02-18 17:58:00,27,27.0,mEq/L,,0.0,0.377594,0.0,Bicarbonate
36,21445304,21445304,16833716,27200,199998.0,50893,2119-02-18 17:58:00,10.2,10.2,mg/dL,,0.0,2.954342,0.0,"Calcium, Total"
49,21445317,21445317,16833719,27200,199998.0,50920,2119-02-18 17:58:00,See Comments,,,,1.0,0.0,0.0,Estimated GFR (MDRD equation)
83,21445354,21445354,16833726,27200,199998.0,51221,2119-02-18 17:58:00,37.0,37.0,%,abnormal,0.0,-1.114777,0.0,Hematocrit
42,21445310,21445310,16833717,27200,199998.0,50902,2119-02-18 17:58:00,103,103.0,mEq/L,,0.0,0.019351,0.0,Chloride


In [11]:
df_prescriptions[df_prescriptions.HADM_ID==HADM_ID_temp]

Unnamed: 0,ROW_ID,SUBJECT_ID,HADM_ID,ICUSTAY_ID,STARTDATE,ENDDATE,DRUG_TYPE,DRUG,DRUG_NAME_POE,DRUG_NAME_GENERIC,...,GSN,NDC,PROD_STRENGTH,DOSE_VAL_RX,DOSE_UNIT_RX,FORM_VAL_DISP,FORM_UNIT_DISP,ROUTE,ADMITTIME,DISCHTIME
1961425,3189028,27200,199998,,2119-02-18 00:00:00,2119-02-18 00:00:00,MAIN,Hydrochlorothiazide,Hydrochlorothiazide,Hydrochlorothiazide,...,029832,6.033855e+08,25mg Tablet,25,mg,1,TAB,PO,2119-02-18,2119-02-18
1961433,3189027,27200,199998,,2119-02-18 00:00:00,2119-02-20 00:00:00,MAIN,Doxazosin,Doxazosin,Doxazosin,...,015584,9.045523e+08,1mg Tablet,1,mg,1,TAB,PO,2119-02-18,2119-02-20
1961427,3189033,27200,199998,,2119-02-18 00:00:00,2119-02-20 00:00:00,MAIN,Nitroglycerin SL,Nitroglycerin SL,Nitroglycerin SL,...,000474,5.817703e+10,0.3mg SL Tablet Bottle,0.3,mg,1,TAB,SL,2119-02-18,2119-02-20
1961432,3189026,27200,199998,,2119-02-18 00:00:00,2119-02-20 00:00:00,MAIN,Aspirin EC,Aspirin EC,Aspirin EC,...,004381,1.771400e+10,325mg Tab,325,mg,1,TAB,PO,2119-02-18,2119-02-20
1961431,3189025,27200,199998,,2119-02-18 00:00:00,2119-02-20 00:00:00,MAIN,Sodium Chloride 0.9% Flush,Sodium Chloride 0.9% Flush,Sodium Chloride 0.9% Flush,...,,0.000000e+00,Syringe,3,mL,0.6,SYR,IV,2119-02-18,2119-02-20
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1958140,3190291,27200,199998,,2119-02-23 00:00:00,2119-02-23 00:00:00,MAIN,Metoprolol Tartrate,Metoprolol Tartrate,Metoprolol Tartrate,...,005132,5.107908e+10,50mg Tablet,50,mg,1,TAB,PO,2119-02-23,2119-02-23
1958141,3190293,27200,199998,,2119-02-23 00:00:00,2119-02-24 00:00:00,MAIN,Warfarin,Warfarin,Warfarin,...,014198,5.601698e+07,Check with MD for Dose,1,dose,1,dose,PO,2119-02-23,2119-02-24
1958142,3190292,27200,199998,,2119-02-23 00:00:00,2119-02-24 00:00:00,MAIN,Warfarin,Warfarin,Warfarin,...,014198,5.601698e+07,1mg Tablet,3,mg,3,TAB,PO,2119-02-23,2119-02-24
1958143,3190294,27200,199998,,2119-02-23 00:00:00,2119-02-24 00:00:00,MAIN,Metoprolol Tartrate,Metoprolol Tartrate,Metoprolol Tartrate,...,050631,5.107903e+10,25mg Tablet,75,mg,3,TAB,PO,2119-02-23,2119-02-24


In [15]:
df_procedures_icd[df_procedures_icd.HADM_ID==HADM_ID_temp].merge(df_d_icd_procedures[["ICD9_CODE", "LONG_TITLE"]], how="left", on="ICD9_CODE")

Unnamed: 0,ROW_ID,SUBJECT_ID,HADM_ID,SEQ_NUM,ICD9_CODE,LONG_TITLE
0,157105,27200,199998,1,3612,(Aorto)coronary bypass of two coronary arteries
1,157106,27200,199998,2,3615,Single internal mammary-coronary artery bypass
2,157107,27200,199998,3,3964,Intraoperative cardiac pacemaker
