# MIMIC-III tables to FHIR resource mapping in this notebook

||Original format | FHIR resource| Progress|Final Check|
|------|:-----|:-----|:-----|---:---|
|12|caregivers | practitioner| C,L,A|Done|
|13|procedures_icd | procedure| C,L,A|Done|
|14|procedureevents_mv | procedure| C,L,A|Done|
|15|microbiology | specimen| C,L,A|Done|
|16|outputevents | specimen| C,L,A|Done|
|17|service | serviceRequest|C,L,A|Done|
|18|callout | -|-|-|
|19|transfers | -|-|-|
|20|drgcodes | -|-|-|

In [1]:
import numpy as np
import pandas as pd
import os
import gc

In [2]:
data_path = './data/mimic-iii-clinical-database-1.4/'
output_path = './data/fhir_out/'
file_ext = '.csv.gz'
compression = 'gzip'

data_files = os.listdir(data_path)

## fhir.practitioner table

#### MAPPING:

||Original format | FHIR resource format|
|------|:-----|:-----|
|1|mimic.caregivers.CGID|fhir.practitioner.identifier|
|2|mimic.caregivers.LABEL|fhir.practitioner.qualification_category|
|3|mimic.caregivers.DESCRIPTION|fhir.practitioner.qualification_label|

In [3]:
def transform_caregivers(data_path, output_path):
    caregivers = pd.read_csv(data_path+'CAREGIVERS'+file_ext, compression=compression,
                             usecols = ['CGID', 'LABEL', 'DESCRIPTION'],
                             dtype = {'CGID': int, 'LABEL': str, 'DESCRIPTION': str})

    caregivers.rename(columns={'CGID':'identifier',
                               'LABEL':'qualification_label',
                               'DESCRIPTION':'qualification_category'}, inplace=True)

    caregivers.to_csv(output_path+'practitioner.csv.gz', compression='gzip', index=False)
    return caregivers

practitioner = transform_caregivers(data_path, output_path)
practitioner.head()

Unnamed: 0,identifier,qualification_label,qualification_category
0,16174,RO,Read Only
1,16175,RO,Read Only
2,16176,Res,Resident/Fellow/PA/NP
3,16177,RO,Read Only
4,16178,RT,Respiratory


## fhir.procedure table

#### PROCEDURES_ICD MAPPING:<br>
||Original format | FHIR resource format|
|------|:-----|:-----|
|1|mimic.procedures_icd.ROW_ID | fhir.procedure.identifier|
|2|mimic.procedures_icd.SUBJECT_ID | fhir.procedure.subject|
|3|mimic.procedures_icd.HADM_ID | fhir.procedure.encounter|
|4|mimic.procedures_icd.ICD9_CODE | fhir.procedure.code_icd9|



In [29]:
def transform_procedures_icd(data_path, output_path):
    """ original function had erroneous result """
    procedures_icd = pd.read_csv(data_path+'PROCEDURES_ICD'+file_ext, compression=compression,
                                 dtype = {'ROW_ID': int, 'SUBJECT_ID': int,
                                          'HADM_ID': int, 'ICD9_CODE': str,
                                          'SEQ_NUM': int})
    
    # will have next procedure ROW_ID if any
    procedures_icd['followUp'] = procedures_icd.sort_values(['HADM_ID', 'SEQ_NUM']).groupby('HADM_ID')['ROW_ID'].shift(-1)
    procedures_icd.drop(columns=['SEQ_NUM'], inplace=True)
    procedures_icd.rename(columns={'ROW_ID':'identifier',
                                   'SUBJECT_ID':'subject',
                                   'HADM_ID':'encounter',
                                   'ICD9_CODE':'code'}, inplace=True)

    procedures_icd.to_csv(output_path+'procedure_icd9.csv.gz', compression='gzip', index=False)
    return procedures_icd

procedure_icd9 = transform_procedures_icd(data_path, output_path)
procedure_icd9.head()

Unnamed: 0,identifier,subject,encounter,code,followUp
0,944,62641,154460,3404,
1,945,2592,130856,9671,946.0
2,946,2592,130856,3893,
3,947,55357,119355,9672,948.0
4,948,55357,119355,331,949.0


#### PROCEDUREEVENTS_MV MAPPING:<br>
||Original format | FHIR resource format|
|------|:-----|:-----|
|1|mimic.procedureevents_mv.ROW_ID | fhir.procedure.identifier|
|2|mimic.procedureevents_mv.SUBJECT_ID | fhir.procedure.subject|
|3|mimic.procedureevents_mv.HADM_ID | fhir.procedure.encounter|
|4|mimic.procedureevents_mv.ICUSTAY_ID| fhir.procedure.partOf|
|5|mimic.procedureevents_mv.STARTTIME| fhir.procedure.performedRange_start|
|6|mimic.procedureevents_mv.ENDTIME| fhir.procedure.performedRange_end|
|7|mimic.procedureevents_mv.ITEMID| fhir.procedure.code|
|8|mimic.procedureevents_mv.VALUE| fhir.procedure.outcome_value|
|9|mimic.procedureevents_mv.VALUEOM | fhir.procedure.outcome_unit|
|10|mimic.procedureevents_mv.LOCATION| fhir.procedure.location_name|
|11|mimic.procedureevents_mv.LOCATIONCATEGORY| fhir.procedure.location_category|
|12|mimic.procedureevents_mv.CGID|fhir.procedure.performer|
|13|mimic.procedureevents_mv.ORDERID| fhir.procedure.basedOn|
|14|mimic.procedureevents_mv.LINKORDERID|fhir.procedure.basedOn_linked|
|15|mimic.procedureevents_mv.ORDERCATEGORYNAME| fhir.procedure.category_order_name|
|16|mimic.procedureevents_mv.SECONDARYORDERCATEGORYNAME|fhir.procedure.category_secOrder_name|
|17|mimic.procedureevents_mv.ORDERCATEGORYDESCRIPTION|fhir.procedure.category_order_description|
|18|mimic.procedureevents_mv.ISOPENBAG| fhir.procedure.usedReference_openBag|
|19|mimic.procedureevents_mv.CONTINUEINEXTDEPT| fhir.procedure.report_contExtDep|
|20|mimic.procedureevents_mv.CANCELREASON| fhir.procedure.report_cancelReason|
|21|mimic.procedureevents_mv.STATUSDESCRIPTION| fhir.procedure.status|
|22|mimic.procedureevents_mv.COMMENTS_EDITEDBY| fhir.procedure.report_editedBy|
|23|mimic.procedureevents_mv.COMMENTS_CANCELEDBY| fhir.procedure.report_canceledBy|
|24|mimic.procedureevents_mv.COMMENTS_DATE| fhir.procedure.report_canceledDate|
|25|mimic.d_items.(LABEL+DBSOURCE+PARAM_TYPE)|mimic.procedure.note|
|26|mimic.d_items.CATEGORY|mimic.procedure.category|

In [32]:
def transform_procedurevents_mv(data_path, output_path):
    procedurevents_mv = pd.read_csv(data_path+'PROCEDUREEVENTS_MV'+file_ext, compression=compression,
                                    # 'STORETIME'
                                    usecols = ['ROW_ID', 'SUBJECT_ID', 'HADM_ID', 'ICUSTAY_ID',
                                               'STARTTIME', 'ENDTIME', 'ITEMID', 'VALUE', 'VALUEUOM',
                                               'LOCATION', 'LOCATIONCATEGORY',
                                               'CGID', 'ORDERID', 'LINKORDERID', 'ORDERCATEGORYNAME',
                                               'SECONDARYORDERCATEGORYNAME', 'ORDERCATEGORYDESCRIPTION',
                                               'ISOPENBAG', 'CONTINUEINNEXTDEPT', 'CANCELREASON',
                                               'STATUSDESCRIPTION', 'COMMENTS_EDITEDBY',
                                               'COMMENTS_CANCELEDBY', 'COMMENTS_DATE'],
                                    dtype ={'ROW_ID': int, 'SUBJECT_ID': int, 'HADM_ID': int,
                                            'ICUSTAY_ID': float, 'ITEMID': int, 'VALUE': np.float64,
                                            'VALUEUOM': str, 'LOCATION': str, 'LOCATIONCATEGORY': str,
                                            'CGID': int, 'ORDERID': int, 'LINKORDERID': int,
                                            'ORDERCATEGORYNAME': str, 'SECONDARYORDERCATEGORYNAME': str,
                                            'ORDERCATEGORYDESCRIPTION': str, 'ISOPENBAG': int,
                                            'CONTINUEINNEXTDEPT': int, 'CANCELREASON': int,
                                            'STATUSDESCRIPTION': str, 'COMMENTS_EDITEDBY': str,
                                            'COMMENTS_CANCELEDBY': str},
                                    parse_dates=['ENDTIME', 'STARTTIME', 'COMMENTS_DATE'],
                                   )
    
    d_items = pd.read_csv(data_path+'D_ITEMS'+file_ext, compression=compression, index_col=0,
                      # dropped 'ABBREVIATION', 'LINKSTO', 'CONCEPTID', 'UNITNAME'
                      usecols=['ROW_ID', 'ITEMID', 'LABEL', 'DBSOURCE', 'CATEGORY', 'PARAM_TYPE'],
                      dtype={'ROW_ID': int, 'ITEMID': int, 'LABEL': str, 'DBSOURCE': 'category',
                             'CATEGORY': 'category', 'PARAM_TYPE': str})
    
    procedurevents_mv = pd.merge(procedurevents_mv, d_items, on='ITEMID')
    
    procedurevents_mv['note'] = procedurevents_mv['LABEL'].str.cat(procedurevents_mv['DBSOURCE'], sep=' ', na_rep='NA')
    procedurevents_mv['note'] = procedurevents_mv['note'].str.cat(procedurevents_mv['PARAM_TYPE'], sep=' ', na_rep='')
    
    procedurevents_mv.drop(columns=['LABEL', 'PARAM_TYPE', 'DBSOURCE'], inplace=True)

    procedurevents_mv.rename(columns={'ROW_ID':'identifier',
                                      'SUBJECT_ID':'subject',
                                      'HADM_ID':'encounter',
                                      'ICUSTAY_ID':'partOf',
                                      'STARTTIME':'performedRange_start',
                                      'ENDTIME':'performedRange_end',
                                      'ITEMID':'code',
                                      'VALUE':'outcome_value',
                                      'VALUEUOM':'outcome_unit',
                                      'LOCATION':'location_name',
                                      'LOCATIONCATEGORY':'location_category',
                                      'CGID':'performer',
                                      'ORDERID':'basedOn',
                                      'LINKORDERID':'basedOn_linked',
                                      'ORDERCATEGORYNAME':'category_order_name',
                                      'SECONDARYORDERCATEGORYNAME':'category_secOrder_name',
                                      'ORDERCATEGORYDESCRIPTION':'category_order_description',
                                      'ISOPENBAG':'usedReference_openBag',
                                      'CONTINUEINNEXTDEPT':'report_contNextDep',
                                      'CANCELREASON':'report_cancelReason',
                                      'STATUSDESCRIPTION':'status',
                                      'COMMENTS_EDITEDBY':'report_editedBy',
                                      'COMMENTS_CANCELEDBY':'report_canceledBy',
                                      'COMMENTS_DATE':'report_canceledDate',
                                      'CATEGORY':'category'
                                     }, inplace=True)

    procedurevents_mv.to_csv(output_path+'procedure_mv.csv.gz', compression='gzip', index=False)
    return procedurevents_mv

procedure_mv = transform_procedurevents_mv(data_path, output_path)
procedure_mv.head()

Unnamed: 0,identifier,subject,encounter,partOf,performedRange_start,performedRange_end,code,outcome_value,outcome_unit,location_name,...,category_order_description,usedReference_openBag,report_contNextDep,report_cancelReason,status,report_editedBy,report_canceledBy,report_canceledDate,category,note
0,379,29070,115071,232563.0,2145-03-12 23:04:00,2145-03-12 23:05:00,225401,1.0,,,...,Electrolytes,0,0,0,FinishedRunning,,,NaT,6-Cultures,Blood Cultured metavision Process
1,390,29070,115071,232563.0,2145-03-15 12:17:00,2145-03-15 12:18:00,225401,1.0,,,...,Electrolytes,0,0,0,FinishedRunning,,,NaT,6-Cultures,Blood Cultured metavision Process
2,393,29070,115071,232563.0,2145-03-16 12:02:00,2145-03-16 12:03:00,225401,1.0,,,...,Electrolytes,0,0,0,FinishedRunning,,,NaT,6-Cultures,Blood Cultured metavision Process
3,396,29070,115071,232563.0,2145-03-17 00:51:00,2145-03-17 00:52:00,225401,1.0,,,...,Electrolytes,0,0,1,Rewritten,,RN,2145-03-17 00:52:00,6-Cultures,Blood Cultured metavision Process
4,397,29070,115071,232563.0,2145-03-17 00:52:00,2145-03-17 00:53:00,225401,1.0,,,...,Electrolytes,0,0,0,FinishedRunning,,,NaT,6-Cultures,Blood Cultured metavision Process


## fhir.specimen table

#### OUTPUTEVENTS MAPPING:<br>

||Original format | FHIR resource format|
|------|:-----|:-----|
|1|mimic.outputevents:ROW_ID | fhir.specimen.identifier|
|2|mimic.outputevents.SUBJECT_ID | fhir.specimen.subject|
|3|mimic.outputevents.HADM_ID | fhir.specimen.request_encounter_admission|
|4|mimic.outputevents.ICUSTAY_ID | fhir.specimen.request_encounter_icustay|
|5|mimic.outputevents.ITEMID | fhir.specimen.type_code|
|6|mimic.d_items.CATEGORY | fhir.specimen.type_category|
|7|mimic.outputevents.CGID | fhir.specimen.collection_collector|
|8|mimic.outputevents.CHARTTIME | fhir.specimen.collection_dateTime|
|9|mimic.outputevents.VALUE | fhir.specimen.collection_quantity|
|10|mimic.outputevents.VALUEUOM | fhir.specimen.collection_unit|
|11|mimic.outputevents.NEWBOTTLE | fhir.specimen.collection_newBottle|
|12|mimic.outputevents.(STOPPED+ISERROR) | fhir.specimen.status|
|13|mimic.d_items.(LABEL+DBSOURCE+PARAM_TYPE) | fhir.specimen.note|



In [37]:
def transform_outputevents(data_path, output_path):
    # 'STORETIME'
    outputevents = pd.read_csv(data_path+'OUTPUTEVENTS'+file_ext, compression=compression,
                               usecols = ['ROW_ID', 'SUBJECT_ID', 'HADM_ID', 'ICUSTAY_ID',
                                          'CHARTTIME', 'ITEMID', 'VALUE', 'VALUEUOM',
                                          'CGID', 'STOPPED', 'NEWBOTTLE', 'ISERROR'],
                               dtype = {'ROW_ID': int, 'SUBJECT_ID': int, 'HADM_ID': float,
                                        'ICUSTAY_ID': float, 'ITEMID': int, 'VALUE': np.float64,
                                        'VALUEUOM': str, 'CGID': int, 'STOPPED': str,
                                        'NEWBOTTLE': float, 'ISERROR': float},
                               parse_dates=['CHARTTIME'],
                               )
    
    d_items = pd.read_csv(data_path+'D_ITEMS'+file_ext, compression=compression, index_col=0,
                      # dropped 'ABBREVIATION', 'LINKSTO', 'CONCEPTID', 'UNITNAME'
                      usecols=['ROW_ID', 'ITEMID', 'LABEL', 'DBSOURCE', 'CATEGORY', 'PARAM_TYPE'],
                      dtype={'ROW_ID': int, 'ITEMID': int, 'LABEL': str, 'DBSOURCE': 'category',
                             'CATEGORY': 'category', 'PARAM_TYPE': str})
    
    specimen_oe = pd.merge(outputevents, d_items, on='ITEMID')

    # Replace NaN in columns with empty strings so that concatenation in notes works
    specimen_oe['note'] = specimen_oe['LABEL'].str.cat(specimen_oe['DBSOURCE'], sep=' ', na_rep='NA')
    specimen_oe['note'] = specimen_oe['note'].str.cat(specimen_oe['PARAM_TYPE'], sep=' ', na_rep='')
    
    # Combine STOPPED and ISERROR column, Errorneous notes entries will be eliminated later on
    specimen_oe.loc[specimen_oe.ISERROR==1,'STOPPED'] = 'Error'

    # Drop Columns not needed anymore
    specimen_oe.drop(columns=['LABEL', 'PARAM_TYPE', 'DBSOURCE', 'ISERROR'], inplace=True)

    specimen_oe.rename(columns={'ROW_ID':'identifier',
                                'SUBJECT_ID':'subject',
                                'HADM_ID':'request_encounter_admission',
                                'ICUSTAY_ID':'request_encounter_icustay',
                                'ITEMID':'type_code',
                                'CATEGORY':'type_category',
                                'CGID':'collector',
                                'CHARTTIME':'collected_dateTime',
                                'VALUE':'collection_quantity',
                                'VALUEUOM':'collection_unit',
                                'NEWBOTTLE':'collection_newBottle',
                                'STOPPED':'status'}, inplace=True)
    
    specimen_oe = specimen_oe.reindex(columns=['identifier',
                                                'subject',
                                                'request_encounter_admission',
                                                'request_encounter_icustay',
                                                'type_code',
                                                'type_category',
                                                'collection_collector',
                                                'collection_dateTime',
                                                'collection_quantity',
                                                'collection_unit',
                                                'collection_newBottle',
                                                'status',
                                                'note'], copy=False)

    specimen_oe.to_csv(output_path+'specimen_oe.csv.gz', compression='gzip', index=False)
    return specimen_oe

specimen_oe = transform_outputevents(data_path, output_path)
specimen_oe.head()

Unnamed: 0,identifier,subject,request_encounter_admission,request_encounter_icustay,type_code,type_category,collection_collector,collection_dateTime,collection_quantity,collection_unit,collection_newBottle,status,note
0,344,21219,177991.0,225765.0,40055,,,,200.0,ml,,,Urine Out Foley carevue
1,345,21219,177991.0,225765.0,40055,,,,200.0,ml,,,Urine Out Foley carevue
2,346,21219,177991.0,225765.0,40055,,,,120.0,ml,,,Urine Out Foley carevue
3,347,21219,177991.0,225765.0,40055,,,,100.0,ml,,,Urine Out Foley carevue
4,348,21219,177991.0,225765.0,40055,,,,200.0,ml,,,Urine Out Foley carevue


#### MICROBIOLOGYEVENTS MAPPING:<br>

||Original format | FHIR resource format|
|------|:-----|:-----|
|1|mimic.microbiologyevents.ROW_ID| fhir.specimen.identifier|
|2|mimic.microbiologyevents.SUBJECT_ID | fhir.specimen.subject|
|3|mimic.microbiologyevents.HADM_ID | fhir.specimen.request_encounter_admission|
|4|mimic.microbiologyevents.CHARTTIME | fhir.specimen.collection_dateTime|
|5|mimic.microbiologyevents.SPEC_ITEMID | fhir.specimen.type_code|
|6|mimic.microbiologyevents.SPEC_TYPE_DESC | fhir.specimen.type_name|
|7|mimic.d_items(on SPEC).CATEGORY | fhir.specimen.type_category|
|8|mimic.microbiologyevents.ORG_ITEMID | fhir.specimen.method_bact_code|
|9|mimic.microbiologyevents.ORG_NAME | fhir.specimen.method_bact_name|
|10|mimic.microbiologyevents.ISOLATE_NUM | fhir.specimen.method_colNum|
|11|mimic.microbiologyevents.AB_ITEMID | fhir.specimen.method_antibiotic_code|
|12|mimic.microbiologyevents.AB_NAME | fhir.specimen.method_antibiotic_name|
|13|mimic.microbiologyevents.DILUTION_TEXT | fhir.specimen.method_dilution_description|
|14|mimic.microbiologyevents.DILUTION_COMPARISON | fhir.specimen.method_dilution_comp|
|15|mimic.microbiologyevents.DILUTION_VALUE | fhir.specimen.method_dilution_value|
|16|mimic.microbiologyevents.INTERPRETATION | fhir.specimen.note_interpretation|
|17|mimic.d_items_(SPEC/ORG/AB).(LABEL+PARAM_TYPE+DBSOURCE) | fhir.specimen.note|

In [44]:
def transform_microbiologyevents(data_path, output_path):
    # 'CHARTDATE', 
    microbiologyevents = pd.read_csv(data_path+'MICROBIOLOGYEVENTS'+file_ext, compression=compression,
                                     usecols = ['ROW_ID', 'SUBJECT_ID', 'HADM_ID',
                                                'CHARTTIME', 'SPEC_ITEMID', 'SPEC_TYPE_DESC',
                                                'ORG_ITEMID', 'ORG_NAME', 'ISOLATE_NUM', 'AB_ITEMID',
                                                'AB_NAME', 'DILUTION_TEXT', 'DILUTION_COMPARISON',
                                                'DILUTION_VALUE', 'INTERPRETATION'],
                                     dtype = {'ROW_ID': int, 'SUBJECT_ID': int, 'HADM_ID': int,
                                              'SPEC_ITEMID': float, 'SPEC_TYPE_DESC': str,
                                              'ORG_ITEMID': float, 'ORG_NAME': str, 'ISOLATE_NUM': float,
                                              'AB_ITEMID': float, 'AB_NAME': str, 'DILUTION_TEXT': str,
                                              'DILUTION_COMPARISON': str, 'DILUTION_VALUE': np.float64,
                                              'INTERPRETATION': str},
                                     parse_dates=['CHARTTIME'],
                                    )
    
    d_items = pd.read_csv(data_path+'D_ITEMS'+file_ext, compression=compression, index_col=0,
                          # dropped 'ABBREVIATION', 'LINKSTO', 'CONCEPTID', 'UNITNAME'
                          usecols=['ROW_ID', 'ITEMID', 'LABEL', 'DBSOURCE', 'CATEGORY', 'PARAM_TYPE'],
                          dtype={'ROW_ID': int, 'ITEMID': int, 'LABEL': str, 'DBSOURCE': 'category',
                                 'CATEGORY': 'category', 'PARAM_TYPE': str})
    
    # ???
    specimen_mbe = pd.merge(microbiologyevents, d_items[['ITEMID','LABEL','DBSOURCE','PARAM_TYPE','CATEGORY']], left_on='SPEC_ITEMID', right_on='ITEMID')
    specimen_mbe = pd.merge(specimen_mbe, d_items[['ITEMID','LABEL','DBSOURCE','PARAM_TYPE']], left_on='ORG_ITEMID', right_on='ITEMID', suffixes=('','_org'))
    specimen_mbe = pd.merge(specimen_mbe, d_items[['ITEMID','LABEL','DBSOURCE','PARAM_TYPE']], left_on='AB_ITEMID', right_on='ITEMID', suffixes=('','_ab'))

    # Replace NaN in columns with empty strings so that concatenation in notes works
    specimen_mbe['note'] = specimen_mbe['LABEL'].str.cat(specimen_mbe['DBSOURCE'], sep=' ', na_rep='NA')
    specimen_mbe['note'] = specimen_mbe['note'].str.cat(specimen_mbe['PARAM_TYPE'], sep=' ', na_rep='')
    specimen_mbe['note'] = specimen_mbe['note'].str.cat(specimen_mbe['LABEL_org'], sep=' ', na_rep='')
    specimen_mbe['note'] = specimen_mbe['note'].str.cat(specimen_mbe['DBSOURCE_org'], sep=' ', na_rep='')
    specimen_mbe['note'] = specimen_mbe['note'].str.cat(specimen_mbe['PARAM_TYPE_org'], sep=' ', na_rep='')
    specimen_mbe['note'] = specimen_mbe['note'].str.cat(specimen_mbe['LABEL_ab'], sep=' ', na_rep='')
    specimen_mbe['note'] = specimen_mbe['note'].str.cat(specimen_mbe['DBSOURCE_ab'], sep=' ', na_rep='')
    specimen_mbe['note'] = specimen_mbe['note'].str.cat(specimen_mbe['PARAM_TYPE_ab'], sep=' ', na_rep='')

    # Drop columns combined to note field
    specimen_mbe.drop(columns=['ITEMID', 'LABEL', 'PARAM_TYPE','DBSOURCE',
                               'ITEMID_org', 'LABEL_org', 'PARAM_TYPE_org','DBSOURCE_org',
                               'ITEMID_ab', 'LABEL_ab', 'PARAM_TYPE_ab','DBSOURCE_ab'
                              ], inplace=True)

    specimen_mbe.rename(columns={'ROW_ID':'identifier',
                                 'SUBJECT_ID':'subject',
                                 'HADM_ID':'request_encounter_admission',
                                 'CHARTTIME':'collection_dateTime',
                                 'SPEC_ITEMID':'type_code',
                                 'SPEC_TYPE_DESC':'type_name',
                                 'CATEGORY':'type_category',
                                 'ORG_ITEMID':'method_bact_code',
                                 'ORG_NAME':'method_bact_name',
                                 'ISOLATE_NUM':'method_colNum',
                                 'AB_ITEMID':'method_antibiotic_code',
                                 'AB_NAME':'method_antibiotic_name',
                                 'DILUTION_TEXT':'method_dilution_description',
                                 'DILUTION_COMPARISON':'method_dilution_comp',
                                 'DILUTION_VALUE':'method_dilution_value',
                                 'INTERPRETATION':'note_interpretation'}, inplace=True)

    specimen_mbe.to_csv(output_path+'specimen_mbe.csv.gz', compression='gzip', index=False)
    return specimen_mbe

specimen_mbe = transform_microbiologyevents(data_path, output_path)
specimen_mbe.head()

Unnamed: 0,identifier,subject,request_encounter_admission,collection_dateTime,type_code,type_name,method_bact_code,method_bact_name,method_colNum,method_antibiotic_code,method_antibiotic_name,method_dilution_description,method_dilution_comp,method_dilution_value,note_interpretation,type_category,note
0,706,96,170324,2156-04-08 15:48:00,70021.0,BRONCHOALVEOLAR LAVAGE,80026.0,PSEUDOMONAS AERUGINOSA,1.0,90021.0,PIPERACILLIN,<=4,<=,4.0,S,SPECIMEN,BRONCHOALVEOLAR LAVAGE hospital PSEUDOMONAS A...
1,5400,419,148312,2113-06-20 17:35:00,70021.0,BRONCHOALVEOLAR LAVAGE,80026.0,PSEUDOMONAS AERUGINOSA,1.0,90021.0,PIPERACILLIN,=>128,=>,128.0,R,SPECIMEN,BRONCHOALVEOLAR LAVAGE hospital PSEUDOMONAS A...
2,29349,2322,169179,2176-03-15 15:00:00,70021.0,BRONCHOALVEOLAR LAVAGE,80026.0,PSEUDOMONAS AERUGINOSA,1.0,90021.0,PIPERACILLIN,<=4,<=,4.0,S,SPECIMEN,BRONCHOALVEOLAR LAVAGE hospital PSEUDOMONAS A...
3,39371,3064,172794,2186-06-01 14:15:00,70021.0,BRONCHOALVEOLAR LAVAGE,80026.0,PSEUDOMONAS AERUGINOSA,1.0,90021.0,PIPERACILLIN,16,=,16.0,S,SPECIMEN,BRONCHOALVEOLAR LAVAGE hospital PSEUDOMONAS A...
4,32461,2514,147513,2131-06-20 17:52:00,70021.0,BRONCHOALVEOLAR LAVAGE,80026.0,PSEUDOMONAS AERUGINOSA,1.0,90021.0,PIPERACILLIN,<=4,<=,4.0,S,SPECIMEN,BRONCHOALVEOLAR LAVAGE hospital PSEUDOMONAS A...


## fhir.serviceRequest table

#### MAPPING:<br>

||Original format | FHIR resource format|
|------|:-----|:-----|
|1|mimic.services.ROW_ID | fhir.serviceRequest.identifier|
|2|mimic.services.SUBJECT_ID | fhir.serviceRequest.subject|
|3|mimic.services.HADM_ID | fhir.serviceRequest.encounter|
|4|mimic.services.TRANSFERTIME | fhir.serviceRequest.occuranceDateTime|
|5|mimic.services.PREV_SERVICE | fhir.serviceRequest.replaces|
|6|mimic.services.CURR_SERVICE | fhir.serviceRequest.code_name|

In [45]:
def transform_services(data_path, output_path):
    services = pd.read_csv(data_path+'SERVICES'+file_ext, compression=compression,
                           dtype = {'ROW_ID': int, 'SUBJECT_ID': int, 'HADM_ID': int,
                                    'PREV_SERVICE': str, 'CURR_SERVICE': str},
                           parse_dates=['TRANSFERTIME'],
                           )
    
    services.rename(columns={'ROW_ID':'identifier',
                             'SUBJECT_ID':'subject',
                             'HADM_ID':'encounter',
                             'TRANSFERTIME':'occuranceDateTime',
                             'PREV_SERVICE':'replaces',
                             'CURR_SERVICE':'code_name'}, inplace=True)
    
    services.to_csv(output_path+'services.csv.gz', compression='gzip', index=False)
    return services

serviceRequest = transform_services(data_path, output_path)
serviceRequest.head()

Unnamed: 0,identifier,subject,encounter,occuranceDateTime,replaces,code_name
0,758,471,135879,2122-07-22 14:07:27,TSURG,MED
1,759,471,135879,2122-07-26 18:31:49,MED,TSURG
2,760,472,173064,2172-09-28 19:22:15,,CMED
3,761,473,129194,2201-01-09 20:16:45,,NB
4,762,474,194246,2181-03-23 08:24:41,,NB
