In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import icd10
from icd9cms.icd9 import search

from collections import Counter

import sys
sys.path.insert(1, os.path.join(os.path.abspath('.'), '..'))

from utils.data_io import *
from utils.common import *

def legend_without_duplicate_labels(ax, fontsize=12):
    handles, labels = ax.get_legend_handles_labels()
    unique = [(h, l) for i, (h, l) in enumerate(zip(handles, labels)) if l not in labels[:i]]
    ax.legend(*zip(*unique), fontsize=fontsize)

In [2]:
tables = [
    '/home/kai/workspace/data/MIMIC/MIMIC4/d_v1-i_v1.tsv',
    '/home/kai/workspace/data/MIMIC/MIMIC4/d_v1-d_chartitems.tsv',
    '/home/kai/workspace/data/MIMIC/MIMIC4/d_v1-d_derived.tsv',
    '/home/kai/workspace/data/MIMIC/MIMIC4/d_v1-d_labitems.tsv',
    
]

uid_dict = {}

for tab in tables:
    tab = pd.read_csv(tab, header=0, sep='\t')
    uid_dict.update(pd.Series(tab.label.values, index=tab.uid).to_dict())


In [3]:
uid_selected = pd.read_csv('/home/kai/workspace/data/MIMIC/MIMIC4/d_v1-d_v1.tsv', header=0, sep='\t').uid.values
uid_selected_dict = {uid:uid_dict[uid] for uid in uid_selected}

uid_selected_dict

{0: 'subject_id',
 1: 'hadm_id',
 2: 'stay_id',
 3: 'gender',
 4: 'dod',
 5: 'admittime',
 6: 'dischtime',
 7: 'los_hospital',
 8: 'admission_age',
 9: 'ethnicity',
 10: 'hospital_expire_flag',
 11: 'hospstay_seq',
 12: 'first_hosp_stay',
 13: 'icu_intime',
 14: 'icu_outtime',
 15: 'los_icu',
 16: 'icustay_seq',
 17: 'first_icu_stay',
 18: 'first_careunit',
 19: 'last_careunit',
 20: 'age_score',
 21: 'myocardial_infarct',
 22: 'congestive_heart_failure',
 23: 'peripheral_vascular_disease',
 24: 'cerebrovascular_disease',
 25: 'dementia',
 26: 'chronic_pulmonary_disease',
 27: 'rheumatic_disease',
 28: 'peptic_ulcer_disease',
 29: 'mild_liver_disease',
 30: 'diabetes_without_cc',
 31: 'diabetes_with_cc',
 32: 'paraplegia',
 33: 'renal_disease',
 34: 'malignant_cancer',
 35: 'severe_liver_disease',
 36: 'metastatic_solid_tumor',
 37: 'aids',
 38: 'charlson_comorbidity_index',
 39: 'icd',
 40: 'scr_min',
 41: 'ckd',
 42: 'mdrd_est',
 43: 'scr_baseline',
 100001: 'height',
 100002: 'weigh

In [4]:
mimic_data_path = '/home/kai/workspace/data/MIMIC/MIMIC4/structured_dsv_data_v1'
pid = 30000646
data_file = os.path.join(mimic_data_path, 'data_' + str(pid) + '.dsv')
info_file = os.path.join(mimic_data_path, 'info_' + str(pid) + '.dsv')

In [5]:
data = pd.read_csv(data_file, header=0, sep='$')
info = pd.read_csv(info_file, header=0, sep='$')

In [6]:
data

Unnamed: 0,uid,value,unit,rate,rate_unit,lower_range,upper_range,category,specimen_id,starttime,endtime
0,100031,,,,,,,KDIGO,,2194-04-27 14:25:00,
1,100032,,,,,,,KDIGO,,2194-04-27 14:25:00,
2,100033,0.0,,,,,,KDIGO,,2194-04-27 14:25:00,
3,100034,,,,,,,KDIGO,,2194-04-27 14:25:00,
4,100035,0.0,,,,,,KDIGO,,2194-04-27 14:25:00,
...,...,...,...,...,...,...,...,...,...,...,...
3367,224642,,,,,,,Routine Vital Signs,,2194-05-03 18:00:00,
3368,224650,,,,,,,Routine Vital Signs,,2194-05-03 18:00:00,
3369,224651,,,,,,,Routine Vital Signs,,2194-05-03 18:00:00,
3370,226479,,,,,,,Routine Vital Signs,,2194-05-03 18:00:00,


In [19]:
data.loc[(data['category']=='KDIGO') & (~pd.isnull(data['value'])) & (data['uid']==100035)]

Unnamed: 0,uid,value,unit,rate,rate_unit,lower_range,upper_range,category,specimen_id,starttime,endtime
4,100035,0.0,,,,,,KDIGO,,2194-04-27 14:25:00,
9,100035,0.0,,,,,,KDIGO,,2194-04-28 06:30:00,
70,100035,0.0,,,,,,KDIGO,,2194-04-29 02:33:00,
183,100035,0.0,,,,,,KDIGO,,2194-04-29 05:54:00,
253,100035,0.0,,,,,,KDIGO,,2194-04-29 06:09:00,
597,100035,0.0,,,,,,KDIGO,,2194-04-29 08:00:00,
781,100035,0.0,,,,,,KDIGO,,2194-04-29 15:00:00,
902,100035,0.0,,,,,,KDIGO,,2194-04-29 19:00:00,
950,100035,0.0,,,,,,KDIGO,,2194-04-29 19:33:00,
1139,100035,0.0,,,,,,KDIGO,,2194-04-29 23:00:00,


In [23]:
data[data['uid']==220045][:50]

Unnamed: 0,uid,value,unit,rate,rate_unit,lower_range,upper_range,category,specimen_id,starttime,endtime
17,220045,100.0,bpm,,,,,Routine Vital Signs,,2194-04-29 01:39:00,
34,220045,100.0,bpm,,,,,Routine Vital Signs,,2194-04-29 01:41:00,
45,220045,102.0,bpm,,,,,Routine Vital Signs,,2194-04-29 02:00:00,
71,220045,,bpm,,,,,Routine Vital Signs,,2194-04-29 02:33:00,
120,220045,97.0,bpm,,,,,Routine Vital Signs,,2194-04-29 03:00:00,
136,220045,93.0,bpm,,,,,Routine Vital Signs,,2194-04-29 04:00:00,
157,220045,87.0,bpm,,,,,Routine Vital Signs,,2194-04-29 05:00:00,
189,220045,89.0,bpm,,,,,Routine Vital Signs,,2194-04-29 05:55:00,
205,220045,90.0,bpm,,,,,Routine Vital Signs,,2194-04-29 06:00:00,
222,220045,87.0,bpm,,,,,Routine Vital Signs,,2194-04-29 06:03:00,


In [24]:
pd.to_datetime(data.loc[data['uid']==220045, 'starttime']).diff()[:50]

17                NaT
34    0 days 00:02:00
45    0 days 00:19:00
71    0 days 00:33:00
120   0 days 00:27:00
136   0 days 01:00:00
157   0 days 01:00:00
189   0 days 00:55:00
205   0 days 00:05:00
222   0 days 00:03:00
233   0 days 00:02:00
254   0 days 00:04:00
323   0 days 00:01:00
346   0 days 00:05:00
357   0 days 00:05:00
368   0 days 00:01:00
379   0 days 00:04:00
390   0 days 00:05:00
401   0 days 00:04:00
412   0 days 00:01:00
423   0 days 00:02:00
434   0 days 00:03:00
445   0 days 00:03:00
456   0 days 00:02:00
467   0 days 00:05:00
478   0 days 00:05:00
495   0 days 00:05:00
523   0 days 00:05:00
534   0 days 00:05:00
545   0 days 00:05:00
556   0 days 00:05:00
567   0 days 00:05:00
598   0 days 00:35:00
616   0 days 01:00:00
633   0 days 00:15:00
644   0 days 00:45:00
686   0 days 00:02:00
697   0 days 00:58:00
718   0 days 01:00:00
735   0 days 01:00:00
751   0 days 01:00:00
782   0 days 01:00:00
803   0 days 01:00:00
821   0 days 01:00:00
838   0 days 01:00:00
854   0 da