In [9]:
from mimic3benchmark.mimic3csv import *
from mimic3benchmark.preprocessing import add_hcup_ccs_2015_groups, make_phenotype_label_matrix
from mimic3benchmark.util import *
import os
import yaml


mimic3_path = "/Users/brian/Downloads/mimic-iii-clinical-database-1.4"
output_path = "/tmp/mimic3"
if not os.path.exists(output_path):
    os.mkdir(output_path)
phenotype_definitions = "../resources/hcup_ccs_2015_definitions.yaml"
verbose = False

In [10]:
patients = read_patients_table(mimic3_path)
admits = read_admissions_table(mimic3_path)
transfers = read_transfers_table(mimic3_path)
stays = read_icustays_table(mimic3_path)

In [11]:
transfers = merge_on_subject_admission(transfers, admits)
transfers = merge_on_subject(transfers, patients)

stays = merge_on_subject_admission(stays, admits)
stays = merge_on_subject(stays, patients)

In [12]:
transfers = add_age_to_icustays(transfers)

In [13]:
transfers = add_inunit_mortality_to_icustays(transfers)
transfers = add_inhospital_mortality_to_icustays(transfers)
transfers = filter_icustays_on_age(transfers)

In [14]:
transfers.to_csv(os.path.join(output_path, 'all_transfers.csv'), index=False)
print ('transfers_done')
stays.to_csv(os.path.join(output_path, 'all_stays.csv'), index=False)
print ('stays_done')
#====================================================================================

diagnoses = read_icd_diagnoses_table(mimic3_path)
diagnoses = filter_diagnoses_on_stays(diagnoses, stays)
diagnoses.to_csv(os.path.join(output_path, 'all_diagnoses.csv'), index=False)
print ('all_diagnoses_done')
count_icd_codes(diagnoses, output_path=os.path.join(output_path, 'diagnosis_counts.csv'))
print ('diagnosis_counts_done')
#====================================================================================

procedures = read_icd_procedures_table(mimic3_path)
procedures = filter_diagnoses_on_stays(procedures, stays)
procedures.to_csv(os.path.join(output_path, 'all_procedures.csv'), index=False)
print ('all_procedures_done')
count_icd_codes(procedures, output_path=os.path.join(output_path, 'procedures_counts.csv'))
print ('procedures_counts_done')
#----------
prescriptions = read_prescriptions_table(mimic3_path)
prescriptions.to_csv(os.path.join(output_path, 'all_prescriptions.csv'), index=False)
print ('all_prescriptions_done')

stransfers_done
stays_done
all_diagnoses_done
diagnosis_counts_done
all_procedures_done
procedures_counts_done


  exec(code_obj, self.user_global_ns, self.user_ns)


all_prescriptions_done


In [15]:
#====================================================================================
phenotypes = add_hcup_ccs_2015_groups(diagnoses, yaml.load(open(phenotype_definitions, 'r')))

In [16]:
#phenotypes = phenotypes[['ICUSTAY_ID', 'HCUP_CCS_2015']].loc[phenotypes.USE_IN_BENCHMARK > 0].drop_duplicates()


In [17]:
make_phenotype_label_matrix(phenotypes, stays).to_csv(os.path.join(output_path, 'phenotype_labels.csv'),
                                                      index=False, quoting=csv.QUOTE_NONNUMERIC)
#====================================================================================

In [18]:
subjects = stays.SUBJECT_ID.unique()
break_up_stays_by_subject(stays, output_path, subjects=subjects, verbose=verbose)
break_up_transfers_by_subject(transfers, output_path, subjects=subjects, verbose=verbose)

break_up_diagnoses_by_subject(phenotypes, output_path, subjects=subjects, verbose=verbose)
break_up_procedures_by_subject(procedures, output_path, subjects=subjects, verbose=verbose)

break_up_prescriptions_by_subject(prescriptions, output_path, subjects=subjects, verbose=verbose)

In [None]:
items_to_keep = set(
    [int(itemid) for itemid in dataframe_from_csv(args.itemids_file)['ITEMID'].unique()]) if args.itemids_file else None

for table in args.event_tables:
    read_events_table_and_break_up_by_subject(mimic3_path, table, output_path, items_to_keep=items_to_keep,
                                              subjects_to_keep=subjects, verbose=verbose)