# MIMIC-III tables to FHIR resource mapping in this notebook [NOT READY]

||Original format | FHIR resource| Progress|Final Check|
|------|:-----|:-----|:-----|---:---|
|12|caregivers | practitioner| C,L,A|Done|
|13|procedures_icd | procedure| C,L,A|Done|
|14|procedureevents_mv | procedure| C,L,A|Done|
|15|microbiology | specimen| C,L,A|Done|
|16|outputevents | specimen| C,L,A|Done|
|17|service | serviceRequest|C,L,A|Done|
|18|callout | -|-|-|
|19|transfers | -|-|-|
|20|drgcodes | -|-|-|

In [1]:
import numpy as np
import pandas as pd
import os
import gc

In [2]:
data_path = './data/mimic-iii-clinical-database-1.4/'
output_path = './data/fhir_out/'
file_ext = '.csv.gz'
compression = 'gzip'

data_files = os.listdir(data_path)

## fhir.practitioner table

#### MAPPING:

||Original format | FHIR resource format|
|------|:-----|:-----|
|1|mimic.caregivers.CGID|fhir.practitioner.identifier|
|2|mimic.caregivers.LABEL|fhir.practitioner.qualification_category|
|3|mimic.caregivers.DESCRIPTION|fhir.practitioner.qualification_label|

In [None]:
def transform_caregivers(data_path, output_path):
    caregivers = pd.read_csv(data_path+'CAREGIVERS'+file_ext, compression=compression)

    caregivers.drop(['ROW_ID'], axis=1, inplace=True)

    caregivers.rename(columns={'CGID':'identifier',
                                 'LABEL':'qualification_label',
                                 'DESCRIPTION':'qualification_category'}, inplace=True)

    caregivers.to_csv(output_path+'practitioner.csv.gz', compression='gzip', index=False)
    return caregivers

practitioner = transform_caregivers(data_path, output_path)
practitioner.head()

## fhir.procedure table

#### PROCEDURES_ICD MAPPING:<br>
||Original format | FHIR resource format|
|------|:-----|:-----|
|1|mimic.procedures_icd.ROW_ID | fhir.procedure.identifier|
|2|mimic.procedures_icd.SUBJECT_ID | fhir.procedure.subject|
|3|mimic.procedures_icd.HADM_ID | fhir.procedure.encounter|
|4|mimic.procedures_icd.ICD9_CODE | fhir.procedure.code_icd9|

#### PROCEDUREEVENTS_MV MAPPING:<br>
||Original format | FHIR resource format|
|------|:-----|:-----|
|1|mimic.procedureevents_mv.ROW_ID | fhir.procedure.identifier|
|2|mimic.procedureevents_mv.SUBJECT_ID | fhir.procedure.subject|
|3|mimic.procedureevents_mv.HADM_ID | fhir.procedure.encounter|
|4|mimic.procedureevents_mv.ICUSTAY_ID| fhir.procedure.partOf|
|5|mimic.procedureevents_mv.STARTTIME| fhir.procedure.performedRange_start|
|6|mimic.procedureevents_mv.ENDTIME| fhir.procedure.performedRange_end|
|7|mimic.procedureevents_mv.ITEMID| fhir.procedure.code|
|8|mimic.procedureevents_mv.VALUE| fhir.procedure.outcome_value|
|9|mimic.procedureevents_mv.VALUEOM | fhir.procedure.outcome_unit|
|10|mimic.procedureevents_mv.LOCATION| fhir.procedure.location_name|
|11|mimic.procedureevents_mv.LOCATIONCATEGORY| fhir.procedure.location_category|
|12|mimic.procedureevents_mv.CGID|fhir.procedure.performer|
|13|mimic.procedureevents_mv.ORDERID| fhir.procedure.basedOn|
|14|mimic.procedureevents_mv.LINKORDERID|fhir.procedure.basedOn_linked|
|15|mimic.procedureevents_mv.ORDERCATEGORYNAME| fhir.procedure.category_order_name|
|16|mimic.procedureevents_mv.SECONDARYORDERCATEGORYNAME|fhir.procedure.category_secOrder_name|
|17|mimic.procedureevents_mv.ORDERCATEGORYDESCRIPTION|fhir.procedure.category_order_description|
|18|mimic.procedureevents_mv.ISOPENBAG| fhir.procedure.usedReference_openBag|
|19|mimic.procedureevents_mv.CONTINUEINEXTDEPT| fhir.procedure.report_contExtDep|
|20|mimic.procedureevents_mv.CANCELREASON| fhir.procedure.report_cancelReason|
|21|mimic.procedureevents_mv.STATUSDESCRIPTION| fhir.procedure.status|
|22|mimic.procedureevents_mv.COMMENTS_EDITEDBY| fhir.procedure.report_editedBy|
|23|mimic.procedureevents_mv.COMMENTS_CANCELEDBY| fhir.procedure.report_canceledBy|
|24|mimic.procedureevents_mv.COMMENTS_DATE| fhir.procedure.report_canceledDate|
|25|mimic.d_items.(LABEL+DBSOURCE+PARAM_TYPE)|mimic.procedure.note|
|26|mimic.d_items.CATEGORY|mimic.procedure.category|

In [None]:
def transform_procedures_icd(data_path, output_path):
    procedures_icd = pd.read_csv(data_path+'PROCEDURES_ICD'+file_ext, compression=compression)
    procedures_icd['followUp'] = procedures_icd.groupby('HADM_ID')['ROW_ID'].shift(-1)
    procedures_icd.rename(columns={'ROW_ID':'identifier',
                                   'SUBJECT_ID':'subject',
                                   'HADM_ID':'encounter',
                                   'ICD9_CODE':'code'}, inplace=True)

    procedures_icd.to_csv(output_path+'procedure_icd9.csv.gz', compression='gzip', index=False)
    return procedures_icd

procedure_icd9 = transform_procedures_icd(data_path, output_path)
procedure_icd9.head()

In [None]:
def transform_procedurevents_mv(data_path, output_path):
    procedurevents_mv = pd.read_csv(data_path+'PROCEDUREEVENTS_MV'+file_ext, compression=compression)
    d_items = pd.read_csv(data_path+'D_ITEMS'+file_ext, compression=compression, index_col=0)
    
    procedurevents_mv = pd.merge(procedurevents_mv, d_items, on='ITEMID')
    
    procedurevents_mv.STARTTIME = pd.to_datetime(procedurevents_mv.STARTTIME, format = '%Y-%m-%d %H:%M:%S', errors = 'coerce')
    procedurevents_mv.ENDTIME = pd.to_datetime(procedurevents_mv.ENDTIME, format = '%Y-%m-%d %H:%M:%S', errors = 'coerce')
    procedurevents_mv.COMMENTS_DATE = pd.to_datetime(procedurevents_mv.COMMENTS_DATE, format = '%Y-%m-%d', errors = 'coerce')
    
    procedurevents_mv['note'] = procedurevents_mv['LABEL'] + ' ' + procedurevents_mv['DBSOURCE'] + ' ' + procedurevents_mv['PARAM_TYPE']
    
    procedurevents_mv.drop(['LABEL', 'PARAM_TYPE', 'STORETIME', 'ABBREVIATION', 'DBSOURCE', 'LINKSTO', 'CONCEPTID', 'UNITNAME'], axis=1, inplace=True)

    procedurevents_mv.rename(columns={'ROW_ID':'identifier',
                                      'SUBJECT_ID':'subject',
                                      'HADM_ID':'encounter',
                                      'ICUSTAY_ID':'partOf',
                                      'STARTTIME':'performedRange_start',
                                      'ENDTIME':'performedRange_end',
                                      'ITEMID':'code',
                                      'VALUE':'outcome_value',
                                      'VALUEUOM':'outcome_unit',
                                      'LOCATION':'location_name',
                                      'LOCATIONCATEGORY':'location_category',
                                      'CGID':'performer',
                                      'ORDERID':'basedOn',
                                      'LINKORDERID':'basedOn_linked',
                                      'ORDERCATEGORYNAME':'category_order_name',
                                      'SECONDARYORDERCATEGORYNAME':'category_secOrder_name',
                                      'ORDERCATEGORYDESCRIPTION':'category_order_description',
                                      'ISOPENBAG':'usedReference_openBag',
                                      'CONTINUEINNEXTDEPT':'report_contNextDep',
                                      'CANCELREASON':'report_cancelReason',
                                      'STATUSDESCRIPTION':'status',
                                      'COMMENTS_EDITEDBY':'report_editedBy',
                                      'COMMENTS_CANCELEDBY':'report_canceledBy',
                                      'COMMENTS_DATE':'report_canceledDate',
                                      'CATEGORY':'category'
                                     }, inplace=True)

    procedurevents_mv.to_csv(output_path+'procedure_mv.csv.gz', compression='gzip', index=False)
    return procedurevents_mv

procedure_mv = transform_procedurevents_mv(data_path, output_path)
procedure_mv.head()

## fhir.specimen table

#### OUTPUTEVENTS MAPPING:<br>

||Original format | FHIR resource format|
|------|:-----|:-----|
|1|mimic.outputevents:ROW_ID | fhir.specimen.identifier|
|2|mimic.outputevents.SUBJECT_ID | fhir.specimen.subject|
|3|mimic.outputevents.HADM_ID | fhir.specimen.request_encounter_admission|
|4|mimic.outputevents.ICUSTAY_ID | fhir.specimen.request_encounter_icustay|
|5|mimic.outputevents.ITEMID | fhir.specimen.type_code|
|6|mimic.d_items.CATEGORY | fhir.specimen.type_category|
|7|mimic.outputevents.CGID | fhir.specimen.collection_collector|
|8|mimic.outputevents.CHARTTIME | fhir.specimen.collection_dateTime|
|9|mimic.outputevents.VALUE | fhir.specimen.collection_quantity|
|10|mimic.outputevents.VALUEUOM | fhir.specimen.collection_unit|
|11|mimic.outputevents.NEWBOTTLE | fhir.specimen.collection_newBottle|
|12|mimic.outputevents.(STOPPED+ISERROR) | fhir.specimen.status|
|13|mimic.d_items.(LABEL+DBSOURCE+PARAM_TYPE) | fhir.specimen.note|

#### MICROBIOLOGYEVENTS MAPPING:<br>

||Original format | FHIR resource format|
|------|:-----|:-----|
|1|mimic.microbiologyevents.ROW_ID| fhir.specimen.identifier|
|2|mimic.microbiologyevents.SUBJECT_ID | fhir.specimen.subject|
|3|mimic.microbiologyevents.HADM_ID | fhir.specimen.request_encounter_admission|
|4|mimic.microbiologyevents.CHARTTIME | fhir.specimen.collection_dateTime|
|5|mimic.microbiologyevents.SPEC_ITEMID | fhir.specimen.type_code|
|6|mimic.microbiologyevents.SPEC_TYPE_DESC | fhir.specimen.type_name|
|7|mimic.d_items(on SPEC).CATEGORY | fhir.specimen.type_category|
|8|mimic.microbiologyevents.ORG_ITEMID | fhir.specimen.method_bact_code|
|9|mimic.microbiologyevents.ORG_NAME | fhir.specimen.method_bact_name|
|10|mimic.microbiologyevents.ISOLATE_NUM | fhir.specimen.method_colNum|
|11|mimic.microbiologyevents.AB_ITEMID | fhir.specimen.method_antibiotic_code|
|12|mimic.microbiologyevents.AB_NAME | fhir.specimen.method_antibiotic_name|
|13|mimic.microbiologyevents.DILUTION_TEXT | fhir.specimen.method_dilution_description|
|14|mimic.microbiologyevents.DILUTION_COMPARISON | fhir.specimen.method_dilution_comp|
|15|mimic.microbiologyevents.DILUTION_VALUE | fhir.specimen.method_dilution_value|
|16|mimic.microbiologyevents.INTERPRETATION | fhir.specimen.note_interpretation|
|17|mimic.d_items_(SPEC/ORG/AB).(LABEL+PARAM_TYPE+DBSOURCE) | fhir.specimen.note|

In [None]:
def transform_outputevents(data_path, output_path):
    outputevents = pd.read_csv(data_path+'OUTPUTEVENTS'+file_ext, compression=compression)
    d_items = pd.read_csv(data_path+'D_ITEMS'+file_ext, compression=compression, index_col=0)
    
    specimen_oe = pd.merge(outputevents, d_items, on='ITEMID')
    specimen_oe.CHARTTIME = pd.to_datetime(specimen_oe.CHARTTIME, format = '%Y-%m-%d %H:%M:%S', errors = 'coerce')

    # Replace NaN in columns with empty strings so that concatenation in notes works
    specimen_oe['PARAM_TYPE'].replace(np.NaN, '', regex=True, inplace=True)
    specimen_oe['note'] = specimen_oe['LABEL'] + ' ' + specimen_oe['DBSOURCE'] + ' ' + specimen_oe['PARAM_TYPE']

    # Combine STOPPED and ISERROR column, Errorneous notes entries will be eliminated later on
    specimen_oe.loc[specimen_oe.ISERROR==1,'STOPPED'] = 'Error'

    # Drop Columns not needed anymore
    specimen_oe.drop(['LABEL', 'PARAM_TYPE', 'STORETIME', 'ISERROR', 'ABBREVIATION', 'DBSOURCE', 'LINKSTO', 'CONCEPTID', 'UNITNAME'], axis=1, inplace=True)

    specimen_oe.rename(columns={'ROW_ID':'identifier',
                                'SUBJECT_ID':'subject',
                                'HADM_ID':'request_encounter_admission',
                                'ICUSTAY_ID':'request_encounter_icustay',
                                'ITEMID':'type_code',
                                'CATEGORY':'type_category',
                                'CGID':'collector',
                                'CHARTTIME':'collected_dateTime',
                                'VALUE':'collection_quantity',
                                'VALUEUOM':'collection_unit',
                                'NEWBOTTLE':'collection_newBottle',
                                'STOPPED':'status'}, inplace=True)
    
    specimen_oe = specimen_oe.reindex(columns=['identifier',
                                                'subject',
                                                'request_encounter_admission',
                                                'request_encounter_icustay',
                                                'type_code',
                                                'type_category',
                                                'collection_collector',
                                                'collection_dateTime',
                                                'collection_quantity',
                                                'collection_unit',
                                                'collection_newBottle',
                                                'status',
                                                'note'], copy=False)

    specimen_oe.to_csv(output_path+'specimen_oe.csv.gz', compression='gzip', index=False)
    return specimen_oe

specimen_oe = transform_outputevents(data_path, output_path)
specimen_oe.head()

In [None]:
def transform_microbiologyevents(data_path, output_path):
    microbiologyevents = pd.read_csv(data_path+'MICROBIOLOGYEVENTS'+file_ext, compression=compression)
    d_items = pd.read_csv(data_path+'D_ITEMS'+file_ext, compression=compression, index_col=0)
    
    specimen_mbe = pd.merge(microbiologyevents, d_items[['ITEMID','LABEL','DBSOURCE','PARAM_TYPE','CATEGORY']], left_on='SPEC_ITEMID', right_on='ITEMID')
    specimen_mbe = pd.merge(specimen_mbe, d_items[['ITEMID','LABEL','DBSOURCE','PARAM_TYPE']], left_on='ORG_ITEMID', right_on='ITEMID', suffixes=('','_org'))
    specimen_mbe = pd.merge(specimen_mbe, d_items[['ITEMID','LABEL','DBSOURCE','PARAM_TYPE']], left_on='AB_ITEMID', right_on='ITEMID', suffixes=('','_ab'))

    specimen_mbe.CHARTTIME = pd.to_datetime(specimen_mbe.CHARTTIME, format = '%Y-%m-%d %H:%M:%S', errors = 'coerce')

    # Replace NaN in columns with empty strings so that concatenation in notes works
    specimen_mbe['PARAM_TYPE'].replace(np.NaN, '', regex=True, inplace=True)
    specimen_mbe['PARAM_TYPE_org'].replace(np.NaN, '', regex=True, inplace=True)
    specimen_mbe['PARAM_TYPE_ab'].replace(np.NaN, '', regex=True, inplace=True)

    specimen_mbe['note'] = specimen_mbe['LABEL'] + ' ' + specimen_mbe['DBSOURCE'] + ' ' + specimen_mbe['PARAM_TYPE']  + ' ' + specimen_mbe['LABEL_org'] + ' ' + specimen_mbe['DBSOURCE_org'] + ' ' + specimen_mbe['PARAM_TYPE_org']  + ' ' + specimen_mbe['LABEL_ab'] + ' ' + specimen_mbe['DBSOURCE_ab'] + ' ' + specimen_mbe['PARAM_TYPE_ab']

    # Drop columns combined to note field
    specimen_mbe.drop(['CHARTDATE'], axis=1, inplace=True)
    specimen_mbe.drop(['ITEMID', 'LABEL', 'PARAM_TYPE','DBSOURCE'], axis=1, inplace=True)
    specimen_mbe.drop(['ITEMID_org', 'LABEL_org', 'PARAM_TYPE_org','DBSOURCE_org'], axis=1, inplace=True)
    specimen_mbe.drop(['ITEMID_ab', 'LABEL_ab', 'PARAM_TYPE_ab','DBSOURCE_ab'], axis=1, inplace=True)

    specimen_mbe.rename(columns={'ROW_ID':'identifier',
                                 'SUBJECT_ID':'subject',
                                 'HADM_ID':'request_encounter_admission',
                                 'CHARTTIME':'collection_dateTime',
                                 'SPEC_ITEMID':'type_code',
                                 'SPEC_TYPE_DESC':'type_name',
                                 'CATEGORY':'type_category',
                                 'ORG_ITEMID':'method_bact_code',
                                 'ORG_NAME':'method_bact_name',
                                 'ISOLATE_NUM':'method_colNum',
                                 'AB_ITEMID':'method_antibiotic_code',
                                 'AB_NAME':'method_antibiotic_name',
                                 'DILUTION_TEXT':'method_dilution_description',
                                 'DILUTION_COMPARISON':'method_dilution_comp',
                                 'DILUTION_VALUE':'method_dilution_value',
                                 'INTERPRETATION':'note_interpretation'}, inplace=True)

    specimen_mbe.to_csv(output_path+'specimen_mbe.csv.gz', compression='gzip', index=False)
    return specimen_mbe

specimen_mbe = transform_microbiologyevents(data_path, output_path)
specimen_mbe.head()

## fhir.serviceRequest table

#### MAPPING:<br>

||Original format | FHIR resource format|
|------|:-----|:-----|
|1|mimic.services.ROW_ID | fhir.serviceRequest.identifier|
|2|mimic.services.SUBJECT_ID | fhir.serviceRequest.subject|
|3|mimic.services.HADM_ID | fhir.serviceRequest.encounter|
|4|mimic.services.TRANSFERTIME | fhir.serviceRequest.occuranceDateTime|
|5|mimic.services.PREV_SERVICE | fhir.serviceRequest.replaces|
|6|mimic.services.CURR_SERVICE | fhir.serviceRequest.code_name|

In [None]:
def transform_services(data_path, output_path):
    services = pd.read_csv(data_path+'SERVICES'+file_ext, compression=compression)
    
    services.TRANSFERTIME = pd.to_datetime(services.TRANSFERTIME, format = '%Y-%m-%d %H:%M:%S', errors = 'coerce')

    services.rename(columns={'ROW_ID':'identifier',
                             'SUBJECT_ID':'subject',
                             'HADM_ID':'encounter',
                             'TRANSFERTIME':'occuranceDateTime',
                             'PREV_SERVICE':'replaces',
                             'CURR_SERVICE':'code_name'}, inplace=True)
    
    services.to_csv(output_path+'services.csv.gz', compression='gzip', index=False)
    return services

serviceRequest = transform_services(data_path, output_path)
serviceRequest.head()