# MIMIC-III tables to FHIR resource mapping in this notebook

||Original format | FHIR resource| Progress|Final Check|
|------|:-----|:-----|:-----|---:---|
|8|prescriptions | medicationRequest| C,L,A|Done|
|9|chartevents | observation| C,L,A|Done|
|10|datetimeevents | observation| C,L,A|Done|
|11|labevents | observation| C,L,A|Done|


In [1]:
import numpy as np
import pandas as pd
import os
import gc

In [2]:
data_path = './data/mimic-iii-clinical-database-1.4/'
output_path = './data/fhir_out/'
file_ext = '.csv.gz'
compression = 'gzip'

data_files = os.listdir(data_path)

## fhir.medicationRequest table

#### MAPPING: <br>
||Original format | FHIR resource format|
|------|:-----|:-----|
|1|mimic.prescriptions.ROW_ID | fhir.medicationRequest.identifier|
|2|mimic.prescriptions.SUBJECT_ID | fhir.medicationRequest.subject|
|3|mimic.prescriptions.HADM_ID| fhir.medicationRequest.encounter|
|4|mimic.prescriptions.ICUSTAY_ID | fhir.medicationRequest.partOf|
|5|mimic.prescriptions.STARTDATE | fhir.medicationRequest.dispenseRequest_start|
|6|mimic.prescriptions.ENDDATE | fhir.medicationRequest.dispenseRequest_end|
|7|mimic.prescriptions.DRUG_TYPE | fhir.medicationRequest.category|
|8|mimic.prescriptions.DRUG | fhir.medicationRequest.medication_name|
|9|mimic.prescriptions.DRUG_NAME_GENERIC | fhir.medicationRequest.medication_genericName|
|10|mimic.prescriptions.FORMULARY_DRUG_CD | fhir.medicationRequest.medication_code_CD|
|11|mimic.prescriptions.GSN | fhir.medicationRequest.medication_code_GSN|
|12|mimic.prescriptions.NDC | fhir.medicationRequest.medication_code_NDC|
|13|mimic.prescriptions.DOSE_VAL_RX | fhir.medicationRequest.dosageInstruction_value|
|14|mimic.prescriptions.DOSE_UNIT_RX | fhir.medicationRequest.dosageInstruction_unit|
|15|mimic.prescriptions.FORM_VAL_DISP | fhir.medicationRequest.dispenseRequest_value|
|16|mimic.prescriptions.FORM_UNIT_DISP | fhir.medicationRequest.dispenseRequest_unit|
|17|mimic.prescriptions.ROUTE | fhir.medicationRequest.courseOfTherapyType|

In [4]:
def transform_prescriptions(data_path, output_path):
    prescriptions = pd.read_csv(data_path+'PRESCRIPTIONS'+file_ext, compression=compression,
                                usecols=lambda x: x not in ['DRUG_NAME_POE', 'PROD_STRENGTH'],
                                dtype={'ROW_ID': int, 'SUBJECT_ID': int, 'HADM_ID': int, 'ICUSTAY_ID': float,
                                       'DRUG_TYPE': str, 'DRUG': str, 'DRUG_NAME_GENERIC': str,
                                       'FORMULARY_DRUG_CD': str, 'GSN': str, 'NDC': str,
                                       'DOSE_VAL_RX': str, 'DOSE_UNIT_RX': str, 'FORM_VAL_DISP': str,
                                       'FORM_UNIT_DISP': str, 'ROUTE': str},
                                parse_dates=['STARTDATE', 'ENDDATE'])

#     prescriptions.STARTDATE = pd.to_datetime(prescriptions.STARTDATE, format = '%Y-%m-%d', errors = 'coerce')
#     prescriptions.ENDDATE = pd.to_datetime(prescriptions.ENDDATE, format = '%Y-%m-%d', errors = 'coerce')
#     
#     Drop extra columns
#     prescriptions.drop(['DRUG_NAME_POE', 'PROD_STRENGTH'], axis=1, inplace=True)

    prescriptions.rename(columns={'ROW_ID':'identifier',
                                  'SUBJECT_ID':'subject', 
                                  'HADM_ID':'encounter', 
                                  'ICUSTAY_ID':'partOf',
                                  'STARTDATE':'dispenseRequest_start', 
                                  'ENDDATE':'dispenseRequest_end',
                                  'DRUG_TYPE':'category', 
                                  'DRUG':'medication_name', 
                                  'DRUG_NAME_GENERIC':'medication_genericName',
                                  'FORMULARY_DRUG_CD':'medication_code_CD', 
                                  'GSN':'medication_code_GSN', 
                                  'NDC':'medication_code_NDC', 
                                  'DOSE_VAL_RX':'dosageInstruction_value',
                                  'DOSE_UNIT_RX':'dosageInstruction_unit', 
                                  'FORM_VAL_DISP':'dispenseRequest_value', 
                                  'FORM_UNIT_DISP':'dispenseRequest_unit', 
                                  'ROUTE':'courseOfTherapyType'}, inplace=True)

#     prescriptions.to_csv(output_path+'medicationRequest.csv.gz', compression='gzip', index=False)
    return prescriptions

medicationRequest = transform_prescriptions(data_path, output_path)
medicationRequest.head()

Unnamed: 0,identifier,subject,encounter,partOf,dispenseRequest_start,dispenseRequest_end,category,medication_name,medication_genericName,medication_code_CD,medication_code_GSN,medication_code_NDC,dosageInstruction_value,dosageInstruction_unit,dispenseRequest_value,dispenseRequest_unit,courseOfTherapyType
0,2214776,6,107064,,2175-06-11,2175-06-12,MAIN,Tacrolimus,Tacrolimus,TACR1,21796.0,469061711,2,mg,2,CAP,PO
1,2214775,6,107064,,2175-06-11,2175-06-12,MAIN,Warfarin,Warfarin,WARF5,6562.0,56017275,5,mg,1,TAB,PO
2,2215524,6,107064,,2175-06-11,2175-06-12,MAIN,Heparin Sodium,,HEPAPREMIX,6522.0,338055002,25000,UNIT,1,BAG,IV
3,2216265,6,107064,,2175-06-11,2175-06-12,BASE,D5W,,HEPBASE,,0,250,ml,250,ml,IV
4,2214773,6,107064,,2175-06-11,2175-06-12,MAIN,Furosemide,Furosemide,FURO20,8208.0,54829725,20,mg,1,TAB,PO


In [5]:
medicationRequest.to_csv(output_path+'medicationRequest.csv.gz',
                         compression={'method': 'gzip', 'compresslevel': 1}, index=False)

In [7]:
del medicationRequest
gc.collect()

17

# fhir.observation table

## CHARTEVENTS MAPPING:<br>

||Original format | FHIR resource format|
|------|:-----|:-----|
||mimic.chartevents.ROW_ID| fhir.observation.identifier|
||mimic.chartevents.SUBJECT_ID | fhir.observation.subject|
||mimic.chartevents.HADM_ID | fhir.observation.encounter|
||mimic.chartevents.ICUSTAY_ID | fhir.observation.partOf|
||mimic.chartevents.ITEMID | fhir.observation.code|
||mimic.chartevents.CHARTTIME | fhir.observation.effectiveDateTime|
||mimic.chartevents.CGID | fhir.observation.performer|
||mimic.chartevents.VALUE | fhir.observation.value|
||mimic.chartevents.VALUENUM | fhir.observation.value_quantity|
||mimic.chartevents.VALUEUOM | fhir.observation.unit|
||mimic.chartevents.WARNING | fhir.observation.interpretation|
||mimic.chartevents.RESULTSTATUS | fhir.observation.status|
||mimic.d_items.(LABEL+DBSOURCE+PARAM_TYPE) | fhir.observation.note|
||mimic.d_items.CATEGORY | fhir.observation.category_sub|
||'chartevents' | fhir.observation.category|

In [3]:
from pathlib import Path
import time
import gc


def transform_chartevents(data_path, output_path, chunksize=10**7):
    """ ~6GB RAM in peak consumption with default chunksize
    """
    # delete outputfile if exists
    output_filename = output_path+'observation_ce.csv.gz'
    Path(output_filename).unlink(missing_ok=True)
    
    d_items = pd.read_csv(data_path+'D_ITEMS'+file_ext, compression=compression, index_col=0,
                      # dropped 'ABBREVIATION', 'LINKSTO', 'CONCEPTID', 'UNITNAME'
                      usecols=['ROW_ID', 'ITEMID', 'LABEL', 'DBSOURCE', 'CATEGORY', 'PARAM_TYPE'],
                      dtype={'ROW_ID': int, 'ITEMID': int, 'LABEL': str, 'DBSOURCE': 'category',
                             'CATEGORY': 'category', 'PARAM_TYPE': str})
    
    # it is the biggest file ~4GB gzipped, 33GB unpacked, 330M strings
    # looks like CareVue and Metavision data should be processed separately
    chunk_container =  pd.read_csv(data_path+'CHARTEVENTS'+file_ext, compression=compression,
                                   # STORETIME
                                   usecols=['ROW_ID', 'SUBJECT_ID', 'HADM_ID', 'ICUSTAY_ID', 'ITEMID', 'CHARTTIME',
                                            'STORETIME', 'CGID', 'VALUE', 'VALUENUM', 'VALUEUOM', 'WARNING', 'ERROR',
                                            'RESULTSTATUS', 'STOPPED'],
                                   dtype={'ROW_ID': int, 'SUBJECT_ID': int, 'HADM_ID': int, 'ICUSTAY_ID': float,
                                          'ITEMID': int, 'CGID': float, 'VALUE': str, 'VALUENUM': float, 
                                          'VALUEUOM': str, 'WARNING': float, 'ERROR': float,
                                          'RESULTSTATUS': str, 'STOPPED': str},
                                   parse_dates=['CHARTTIME'],
                                   chunksize=chunksize)  # 2.67GB for 10**7
    for i, chartevents in enumerate(chunk_container):
        # Show progress (~330M strings)
        print(f'{i + 1}/{330*10**6 / chunksize}', flush=True, end =" ")
        start_time = time.time()

        observation_ce = pd.merge(chartevents, d_items, on='ITEMID')

        observation_ce['note'] = observation_ce['LABEL'].str.cat(observation_ce['DBSOURCE'], sep=' ', na_rep='NA')
        observation_ce['note'] = observation_ce['note'].str.cat(observation_ce['PARAM_TYPE'], sep=' ', na_rep='')

        observation_ce.loc[observation_ce['STOPPED'] == "D/C'd", 'RESULTSTATUS'] = 'discharged'
        observation_ce.loc[observation_ce['ERROR'] == 1, 'RESULTSTATUS'] = 'Error'
        # New columns to adapt to Chartevents observations
        observation_ce['category'] = 'chartevents'  # ????

        observation_ce.drop(['LABEL', 'PARAM_TYPE', 'ERROR', 'DBSOURCE', 'STOPPED'], axis=1, inplace=True)

        observation_ce.rename(columns={'ROW_ID':'identifier',
                                       'SUBJECT_ID':'subject',
                                       'HADM_ID':'encounter',                               
                                       'ICUSTAY_ID':'partOf',
                                       'ITEMID':'code',
                                       'CGID':'performer',
                                       'CHARTTIME':'effectiveDateTime',
                                       'VALUE':'value',
                                       'VALUENUM':'value_quantity',
                                       'VALUEUOM':'unit',
                                       'WARNING':'interpretation',
                                       'RESULTSTATUS':'status',
                                       'CATEGORY':'category_sub'}, inplace=True)

        observation_ce = observation_ce.reindex(columns=['identifier',
                                                         'subject', 
                                                         'encounter', 
                                                         'partOf', 
                                                         'code',
                                                         'effectiveDateTime',
                                                         'performer',
                                                         'value',
                                                         'value_quantity',
                                                         'unit', 
                                                         'interpretation',
                                                         'status',
                                                         'note',
                                                         'category_sub',
                                                         'category'], copy=False)

        observation_ce.to_csv(output_filename, compression={'method': 'gzip', 'compresslevel': 1},
                              index=False, mode='a')
        # force free mem, for some reasons without it, RAM ends pretty quick
        gc.collect()
        # show execution time per chunk
        print(f"--- {time.time() - start_time} seconds ---", flush=True)

        
transform_chartevents(data_path, output_path)
# observation_ce = transform_chartevents(data_path, output_path)
# observation_ce.head()

1/33.0 --- 99.12988948822021 seconds ---
2/33.0 --- 102.19948816299438 seconds ---
3/33.0 --- 102.49104332923889 seconds ---
4/33.0 --- 98.38828468322754 seconds ---
5/33.0 --- 99.37262392044067 seconds ---
6/33.0 --- 98.72399520874023 seconds ---
7/33.0 --- 98.82018399238586 seconds ---
8/33.0 --- 97.12989234924316 seconds ---
9/33.0 --- 98.4189944267273 seconds ---
10/33.0 --- 99.18599510192871 seconds ---
11/33.0 --- 98.32328128814697 seconds ---
12/33.0 --- 97.49378180503845 seconds ---
13/33.0 --- 98.69335556030273 seconds ---
14/33.0 --- 99.97787261009216 seconds ---
15/33.0 --- 101.6583559513092 seconds ---
16/33.0 --- 98.44779443740845 seconds ---
17/33.0 --- 98.28255653381348 seconds ---
18/33.0 --- 103.05923461914062 seconds ---
19/33.0 --- 101.67482829093933 seconds ---
20/33.0 --- 93.02813959121704 seconds ---
21/33.0 --- 91.26527094841003 seconds ---
22/33.0 --- 93.36439061164856 seconds ---
23/33.0 --- 91.3999195098877 seconds ---
24/33.0 --- 91.02369546890259 seconds ---

## DATETIMEEVENTS MAPPING:<br>

||Original format | FHIR resource format|
|------|:-----|:-----|
||mimic.datetimeevents.ROW_ID| fhir.observation.identifier|
||mimic.datetimeevents.SUBJECT_ID | fhir.observation.subject|
||mimic.datetimeevents.HADM_ID | fhir.observation.encounter|
||mimic.datetimeevents.ICUSTAY_ID | fhir.observation.partOf|
||mimic.datetimeevents.ITEMID | fhir.observation.code|
||mimic.datetimeevents.CHARTTIME | fhir.observation.effectiveDateTime|
||mimic.datetimeevents.CGID | fhir.observation.performer|
||mimic.datetimeevents.VALUE | fhir.observation.value|
||mimic.datetimeevents.VALUEUOM | fhir.observation.unit|
||mimic.datetimeevents.WARNING | fhir.observation.interpretation|
||mimic.datetimeevents.RESULTSTATUS | fhir.observation.status|
||mimic.d_items.(LABEL+DBSOURCE+PARAM_TYPE) | fhir.observation.note|
||mimic.d_items.CATEGORY | fhir.observation.category_sub|
||'datetimeevents' | fhir.observation.category|



In [5]:
def transform_datetimeevents(data_path, output_path):
    # 'STORETIME'
    datetimeevents = pd.read_csv(data_path+'DATETIMEEVENTS'+file_ext, compression=compression,
                                 usecols=['ROW_ID', 'SUBJECT_ID', 'HADM_ID', 'ICUSTAY_ID',
                                          'ITEMID', 'CHARTTIME', 'CGID', 'VALUE',
                                          'VALUEUOM', 'WARNING', 'ERROR', 'RESULTSTATUS',
                                          'STOPPED'],
                                 dtype={'ROW_ID': int, 'SUBJECT_ID': int, 'HADM_ID': float,
                                        'ICUSTAY_ID': float, 'ITEMID': int, 'CGID': float,
                                        'VALUEUOM': str, 'WARNING': float, 'ERROR': float,
                                        'RESULTSTATUS': str, 'STOPPED': str},
                                 parse_dates=['CHARTTIME', 'VALUE']
                                )

    
    d_items = pd.read_csv(data_path+'D_ITEMS'+file_ext, compression=compression, index_col=0,
                      # dropped 'ABBREVIATION', 'LINKSTO', 'CONCEPTID', 'UNITNAME'
                      usecols=['ROW_ID', 'ITEMID', 'LABEL', 'DBSOURCE', 'CATEGORY', 'PARAM_TYPE'],
                      dtype={'ROW_ID': int, 'ITEMID': int, 'LABEL': str, 'DBSOURCE': 'category',
                             'CATEGORY': 'category', 'PARAM_TYPE': str})
    
    observation_dte = pd.merge(datetimeevents, d_items, on='ITEMID')
    
    observation_dte['note'] = observation_dte['LABEL'].str.cat(observation_dte['DBSOURCE'], sep=' ', na_rep='NA')
    observation_dte['note'] = observation_dte['note'].str.cat(observation_dte['PARAM_TYPE'], sep=' ', na_rep='')
        
    observation_dte.loc[observation_dte['STOPPED'] == "D/C'd", 'RESULTSTATUS'] = 'discharged'
    observation_dte.loc[observation_dte['ERROR'] == 1, 'RESULTSTATUS'] = 'Error'
    # New columns to adapt to DateTimeEvents observations
    observation_dte['category'] = 'datetimeevents'  # ???

    observation_dte.drop(['LABEL', 'PARAM_TYPE', 'ERROR', 'DBSOURCE', 'STOPPED'], axis=1, inplace=True)

    observation_dte.rename(columns={'ROW_ID':'identifier',
                                    'SUBJECT_ID':'subject',
                                    'HADM_ID':'encounter',
                                    'ICUSTAY_ID':'partOf',
                                    'ITEMID':'code',
                                    'CGID':'performer',
                                    'CHARTTIME':'effectiveDateTime',
                                    'VALUE':'value',
                                    'VALUEUOM':'unit',
                                    'WARNING':'interpretation',
                                    'RESULTSTATUS':'status',
                                    'CATEGORY':'category_sub'}, inplace=True)

    observation_dte = observation_dte.reindex(columns=['identifier',
                                                       'subject', 
                                                       'encounter', 
                                                       'partOf',
                                                       'code',
                                                       'effectiveDateTime', 
                                                       'performer',
                                                       'value',
                                                       'unit', 
                                                       'interpretation',
                                                       'status',
                                                       'note',
                                                       'category_sub',
                                                       'category'], copy=False)

#     observation_dte.to_csv(output_path+'observation_dte.csv.gz', compression='gzip', index=False)
    return observation_dte

observation_dte = transform_datetimeevents(data_path, output_path)
observation_dte.head()

Unnamed: 0,identifier,subject,encounter,partOf,code,effectiveDateTime,performer,value,unit,interpretation,status,note,category_sub,category
0,711,7657,121183.0,297945.0,3411,2172-03-14 11:00:00,16446.0,,Date,,,Equip Change [MM] carevue,,datetimeevents
1,712,7657,121183.0,297945.0,3411,2172-03-14 13:00:00,16446.0,,Date,,,Equip Change [MM] carevue,,datetimeevents
2,713,7657,121183.0,297945.0,3411,2172-03-14 15:00:00,14957.0,,Date,,,Equip Change [MM] carevue,,datetimeevents
3,714,7657,121183.0,297945.0,3411,2172-03-14 17:00:00,16446.0,,Date,,,Equip Change [MM] carevue,,datetimeevents
4,715,7657,121183.0,297945.0,3411,2172-03-14 19:00:00,14815.0,,Date,,,Equip Change [MM] carevue,,datetimeevents


In [6]:
observation_dte.to_csv(output_path+'observation_dte.csv.gz',
                       compression={'method': 'gzip', 'compresslevel': 1}, index=False)

In [7]:
del observation_dte
gc.collect()

17

## LABEVENTS MAPPING:<br>

- Consider assigning loinc_code to code not to method. LOINC_CODE would first need to be assigned, which isn't straightforward.

||Original format | FHIR resource format|
|------|:-----|:-----|
||mimic.labevents.ROW_ID|fhir.observation.identifier|
||mimic.labevents.SUBJECT_ID|fhir.observation.subject|
||mimic.labevents.HADM_ID|fhir.observation.encounter|
||mimic.labevents.CHARTTIME|fhir.observation.effectiveDateTime|
||mimic.labevents.ITEMID | fhir.observation.code|
||mimic.d_labitems.LOINC_CODE | fhir.observation.code_loinc|
||mimic.labevents.VALUE | fhir.observation.value|
||mimic.labevents.VALUENUM | fhir.observation.value_quantity|
||mimic.labevents.VALUEUOM | fhir.observation.unit|
||mimic.labevents.FLAG | fhir.observation.interpretation|
||mimic.d_labitems.(LABEL+FLUID) | fhir.observation.note|
||mimic.d_labitems.CATEGORY | fhir.observation.category_sub|
||'labevents' | fhir.observation.category|

In [14]:
from pathlib import Path
import time
import gc


def transform_labevents(data_path, output_path, chunksize=10**6):
    """ Surprisingly, it is memory greed thing """
    # delete outputfile if exists
    output_filename = output_path + 'observation_le.csv.gz'
    Path(output_filename).unlink(missing_ok=True)
    
    d_labitems = pd.read_csv(data_path+'D_LABITEMS'+file_ext, compression=compression, index_col=0,
                             usecols=['ROW_ID', 'ITEMID', 'LABEL', 'FLUID', 'CATEGORY', 'LOINC_CODE'],
                             dtype={'ROW_ID': int, 'ITEMID': int, 'LABEL': str,
                                    'FLUID': str, 'CATEGORY': str, 'LOINC_CODE': str}
                            )

    # 5.5GB
    chunk_container = pd.read_csv(data_path+'LABEVENTS'+file_ext, compression=compression,
                                  usecols=['ROW_ID', 'SUBJECT_ID', 'HADM_ID', 'ITEMID',
                                           'CHARTTIME', 'VALUE', 'VALUENUM','VALUEUOM', 'FLAG'],
                                  dtype={'ROW_ID': int, 'SUBJECT_ID': int, 'HADM_ID': float, 'ITEMID': int,
                                         'VALUE': str, 'VALUENUM': float, 'VALUEUOM': str, 'FLAG': str},
                                  parse_dates=['CHARTTIME'],
                                  chunksize=chunksize)
    for i, labevents in enumerate(chunk_container):
        # Show progress (~28M strings)
        print(f'{i + 1}/{28*10**6 / chunksize}', flush=True, end =" ")
        start_time = time.time()

        observation_le = pd.merge(labevents, d_labitems, on='ITEMID')

        observation_le['note'] = observation_le['LABEL'].str.cat(observation_le['FLUID'], sep=' ', na_rep='NA')
        observation_le.drop(['LABEL', 'FLUID'], axis=1, inplace=True)

        # Add observation type
        observation_le['category'] = 'labevents'  # ???

        # SUBJECT_ID will be underfilled
        observation_le.rename(columns={'ROW_ID':'identifier',
                                       'SUBJECT_ID':'subject',
                                       'HADM_ID':'encounter',
                                       'ITEMID':'code',
                                       'LOINC_CODE':'code_loinc',
                                       'CHARTTIME':'effectiveDateTime',
                                       'VALUE':'value',
                                       'VALUENUM':'value_quantity',
                                       'VALUEUOM':'unit',
                                       'FLAG':'interpretation',
                                       'CATEGORY':'category_sub'}, inplace=True)

        observation_le = observation_le.reindex(columns=['identifier',
                                                         'subject', 
                                                         'encounter',
                                                         'effectiveDateTime',
                                                         'code',
                                                         'code_loinc',
                                                         'value',
                                                         'value_quantity',
                                                         'unit', 
                                                         'interpretation',
                                                         'note',
                                                         'category_sub',
                                                         'category'], copy=False)

        observation_le.to_csv(output_filename, compression={'method': 'gzip', 'compresslevel': 1},
                              index=False, mode='a')
        # force free mem, for some reasons without it, RAM ends pretty quick
        gc.collect()
        # show execution time per chunk
        print(f"--- {time.time() - start_time} seconds ---", flush=True)

        
transform_labevents(data_path, output_path)
# observation_le.head()

1/28.0 --- 6.980336904525757 seconds ---
2/28.0 --- 7.179809093475342 seconds ---
3/28.0 --- 7.201792240142822 seconds ---
4/28.0 --- 7.046716928482056 seconds ---
5/28.0 --- 7.200324773788452 seconds ---
6/28.0 --- 7.3242011070251465 seconds ---
7/28.0 --- 7.068134069442749 seconds ---
8/28.0 --- 7.024747610092163 seconds ---
9/28.0 --- 7.105806827545166 seconds ---
10/28.0 --- 7.503292083740234 seconds ---
11/28.0 --- 7.739151954650879 seconds ---
12/28.0 --- 7.523775815963745 seconds ---
13/28.0 --- 7.35314154624939 seconds ---
14/28.0 --- 7.229565858840942 seconds ---
15/28.0 --- 6.952656030654907 seconds ---
16/28.0 --- 7.112035512924194 seconds ---
17/28.0 --- 7.4885265827178955 seconds ---
18/28.0 --- 6.993064641952515 seconds ---
19/28.0 --- 6.979121208190918 seconds ---
20/28.0 --- 6.980049133300781 seconds ---
21/28.0 --- 7.230544805526733 seconds ---
22/28.0 --- 7.312502384185791 seconds ---
23/28.0 --- 7.299519300460815 seconds ---
24/28.0 --- 7.856963872909546 seconds ---


In [15]:
pd.read_csv(output_path + 'observation_le.csv.gz', nrows=10)

Unnamed: 0,identifier,subject,encounter,effectiveDateTime,code,code_loinc,value,value_quantity,unit,interpretation,note,category_sub,category
0,281,3,,2101-10-12 16:07:00,50820,11558-4,7.39,7.39,units,,pH Blood,Blood Gas,labevents
1,289,3,,2101-10-12 18:17:00,50820,11558-4,7.42,7.42,units,,pH Blood,Blood Gas,labevents
2,396,3,,2101-10-15 12:42:00,50820,11558-4,7.49,7.49,units,,pH Blood,Blood Gas,labevents
3,976,3,145834.0,2101-10-22 04:31:00,50820,11558-4,7.4,7.4,units,,pH Blood,Blood Gas,labevents
4,989,3,145834.0,2101-10-22 07:13:00,50820,11558-4,7.37,7.37,units,,pH Blood,Blood Gas,labevents
5,998,3,145834.0,2101-10-22 10:16:00,50820,11558-4,7.37,7.37,units,,pH Blood,Blood Gas,labevents
6,1007,3,145834.0,2101-10-22 11:21:00,50820,11558-4,7.39,7.39,units,,pH Blood,Blood Gas,labevents
7,1020,3,145834.0,2101-10-22 13:02:00,50820,11558-4,7.39,7.39,units,,pH Blood,Blood Gas,labevents
8,1030,3,145834.0,2101-10-22 15:59:00,50820,11558-4,7.41,7.41,units,,pH Blood,Blood Gas,labevents
9,1044,3,145834.0,2101-10-22 16:02:00,50820,11558-4,7.36,7.36,units,,pH Blood,Blood Gas,labevents


In [10]:
# import sys
# sys.getsizeof(labevents) / 1024 / 1024 / 1024

5.434226114302874