In [1]:
from mimic3benchmark.mimic3csv import *
from mimic3benchmark.preprocessing import add_hcup_ccs_2015_groups, make_phenotype_label_matrix
from mimic3benchmark.util import *
import yaml
mimic3_path = "/Users/brian/Downloads/mimic-iii-clinical-database-1.4"
output_path = "/tmp/mimic3"
phenotype_definitions = "../resources/hcup_ccs_2015_definitions.yaml"
verbose = False

In [2]:
patients = read_patients_table(mimic3_path)
admits = read_admissions_table(mimic3_path)
transfers = read_transfers_table(mimic3_path)
stays = read_icustays_table(mimic3_path)

In [3]:
transfers = merge_on_subject_admission(transfers, admits)
transfers = merge_on_subject(transfers, patients)

stays = merge_on_subject_admission(stays, admits)
stays = merge_on_subject(stays, patients)

In [4]:
transfers = add_age_to_icustays(transfers)

In [5]:
transfers = add_inunit_mortality_to_icustays(transfers)
transfers = add_inhospital_mortality_to_icustays(transfers)
transfers = filter_icustays_on_age(transfers)

In [6]:
transfers.to_csv(os.path.join(output_path, 'all_transfers.csv'), index=False)
print ('stransfers_done')
stays.to_csv(os.path.join(output_path, 'all_stays.csv'), index=False)
print ('stays_done')
#====================================================================================

diagnoses = read_icd_diagnoses_table(mimic3_path)
diagnoses = filter_diagnoses_on_stays(diagnoses, stays)
diagnoses.to_csv(os.path.join(output_path, 'all_diagnoses.csv'), index=False)
print ('all_diagnoses_done')
count_icd_codes(diagnoses, output_path=os.path.join(output_path, 'diagnosis_counts.csv'))
print ('diagnosis_counts_done')
#====================================================================================

procedures = read_icd_procedures_table(mimic3_path)
procedures = filter_diagnoses_on_stays(procedures, stays)
procedures.to_csv(os.path.join(output_path, 'all_procedures.csv'), index=False)
print ('all_procedures_done')
count_icd_codes(procedures, output_path=os.path.join(output_path, 'procedures_counts.csv'))
print ('procedures_counts_done')
#----------
prescriptions = read_prescriptions_table(mimic3_path)
prescriptions.to_csv(os.path.join(output_path, 'all_prescriptions.csv'), index=False)
print ('all_prescriptions_done')

stransfers_done
stays_done
all_diagnoses_done
diagnosis_counts_done
all_procedures_done
procedures_counts_done


  exec(code_obj, self.user_global_ns, self.user_ns)


all_prescriptions_done


In [7]:
#====================================================================================
phenotypes = add_hcup_ccs_2015_groups(diagnoses, yaml.load(open(phenotype_definitions, 'r')))

In [9]:
phenotypes.columns

Index(['ROW_ID', 'SUBJECT_ID', 'HADM_ID', 'SEQ_NUM', 'ICD9_CODE',
       'SHORT_TITLE', 'LONG_TITLE', 'ICUSTAY_ID', 'HCUP_CCS_2015',
       'USE_IN_BENCHMARK'],
      dtype='object')

In [17]:
phenotypes


Unnamed: 0,ICUSTAY_ID,HCUP_CCS_2015
0,262652,Hypertension with complications and secondary ...
1,262652,Pneumonia (except that caused by tuberculosis ...
3,262652,Chronic kidney disease
5,262652,Fluid and electrolyte disorders
9,262652,Complications of surgical procedures or medica...
...,...,...
687735,250923,Other lower respiratory disease
687764,217928,Other upper respiratory disease
687765,298882,Other upper respiratory disease
687784,287794,Other lower respiratory disease


In [10]:
phenotypes = phenotypes[['ICUSTAY_ID', 'HCUP_CCS_2015']].loc[phenotypes.USE_IN_BENCHMARK > 0].drop_duplicates()


In [11]:
make_phenotype_label_matrix(phenotypes, stays).to_csv(os.path.join(output_path, 'phenotype_labels.csv'),
                                                      index=False, quoting=csv.QUOTE_NONNUMERIC)
#====================================================================================

AttributeError: 'DataFrame' object has no attribute 'USE_IN_BENCHMARK'

In [7]:
subjects = stays.SUBJECT_ID.unique()
break_up_stays_by_subject(stays, output_path, subjects=subjects, verbose=verbose)
break_up_transfers_by_subject(transfers, output_path, subjects=subjects, verbose=verbose)

break_up_diagnoses_by_subject(phenotypes, output_path, subjects=subjects, verbose=verbose)
break_up_procedures_by_subject(procedures, output_path, subjects=subjects, verbose=verbose)

break_up_prescriptions_by_subject(prescriptions, output_path, subjects=subjects, verbose=verbose)

  


KeyError: "Passing list-likes to .loc or [] with any missing labels is no longer supported. The following labels were missing: Int64Index([200017, 200018, 200020, 200023, 200031,\n            ...\n            299949, 299952, 299962, 299963, 299993],\n           dtype='int64', name='ICUSTAY_ID', length=10898). See https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike"

In [None]:
items_to_keep = set(
    [int(itemid) for itemid in dataframe_from_csv(args.itemids_file)['ITEMID'].unique()]) if args.itemids_file else None

for table in args.event_tables:
    read_events_table_and_break_up_by_subject(mimic3_path, table, output_path, items_to_keep=items_to_keep,
                                              subjects_to_keep=subjects, verbose=verbose)

In [None]:
def read_prescriptions_table(mimic3_path):
    prescriptions = dataframe_from_csv(os.path.join(mimic3_path, 'PRESCRIPTIONS.csv'))
    prescriptions.STARTDATE = pd.to_datetime(prescriptions.STARTDATE)
    prescriptions.ENDDATE = pd.to_datetime(prescriptions.ENDDATE)


    prescriptions=prescriptions.loc[prescriptions.ICUSTAY_ID.notnull()]
    prescriptions['ICUSTAY_ID'] = prescriptions['ICUSTAY_ID'].astype(int)
    prescriptions = prescriptions.loc[prescriptions.NDC != 0]

    #prescriptions=prescriptions.ICUSTAY_ID.notnull()&(prescriptions.ndc!=0)

    prescriptions=prescriptions[['SUBJECT_ID','HADM_ID','ICUSTAY_ID','NDC','DOSE_VAL_RX', 'DOSE_UNIT_RX','STARTDATE','ENDDATE']]

    #exclude = ['GSN']
    #prescriptions=prescriptions.loc[:, prescriptions.columns.difference(exclude)].hist()
    #print (prescriptions)
    return prescriptions


In [None]:
read_prescriptions_table(mimic3_path)

In [None]:
dataframe_from_csv(os.path.join(mimic3_path, 'PRESCRIPTIONS.csv'))

In [None]:
values = set(['', 'N/A', 'NA', '-1.#IND', 'NaN', '1.#IND', '<NA>', '#NA',
       '#N/A N/A', '1.#QNAN', 'NULL', 'nan', '-NaN', '-nan', '#N/A',
       'null', '-1.#QNAN', 'n/a'])
values.remove('')

In [None]:
import gzip
with gzip.open(os.path.join(mimic3_path, 'PRESCRIPTIONS.csv'+".gz"), 'rt', encoding="utf-8") as f:
    data = f.readlines()

In [None]:
columns = [d.replace('"','').strip() for d in data[0].split(",")]

In [None]:
columns

In [None]:
pd.read_csv(os.path.join(mimic3_path, 'PRESCRIPTIONS.csv'+".gz"), header=0, index_col=None)#, usecols=columns)

In [None]:
for i in range(1,len(columns)+1):
    print(i)
    tmp=pd.read_csv(os.path.join(mimic3_path, 'PRESCRIPTIONS.csv'+".gz"), usecols=columns[:i])
    print(tmp.shape)
    print("-"*24)

In [None]:
tmp

In [None]:
d2 = [d for d in data if values.intersection(d.split(","))]

In [None]:
d2

In [None]:
for d in d2[:20]:
    print(d)

In [None]:
d3 = [d for d in data if d[1:] ==values]

In [None]:
type(data[0])

In [None]:
len(data[0])

In [None]:
values.intersection('''2214776,6,107064,,2175-06-11 00:00:00,2175-06-12 00:00:00,"MAIN","Tacrolimus","Tacrolimus","Tacrolimus","TACR1","021796","00469061711","1mg Capsule","2","mg","2","CAP","PO"'''.split(","))

In [None]:
'''2214776,6,107064,,2175-06-11 00:00:00,2175-06-12 00:00:00,"MAIN","Tacrolimus","Tacrolimus","Tacrolimus","TACR1","021796","00469061711","1mg Capsule","2","mg","2","CAP","PO"'''.split(",")