In [1]:
seed = 3906303

In [2]:
import numpy as np
import pandas as pd
import gzip
import random
np.random.seed(seed)
random.seed(seed)

In [2]:
hosp_file_path = 'mimic-iv-3.1/mimic-iv-3.1/hosp/labevents.csv.gz'
with gzip.open(file_path, 'rt') as f:
    row_count = sum(1 for _ in f)

print(f"Total number of rows (including header): {row_count}")

Total number of rows (including header): 158374765


In [3]:
icu_file_path = 'mimic-iv-3.1/mimic-iv-3.1/icu/chartevents.csv.gz'
with gzip.open(file_path, 'rt') as f:
    row_count = sum(1 for _ in f)

print(f"Total number of rows (including header): {row_count}")

Total number of rows (including header): 432997492


## The cell below creates a file with all blood glucose measurements from the ICU

In [10]:
icu_file_path = 'mimic-iv-3.1/mimic-iv-3.1/icu/chartevents.csv.gz'
output_path = "bg_filtered_chartevents.csv"

with gzip.open(icu_file_path, mode="rt", encoding="utf-8") as infile, \
     open(output_path, "w", encoding="utf-8") as outfile:

    header = next(infile)  # read and write header
    outfile.write(header)

    for line in infile:
        # Checks if itemid corresponds to blood glucose measurements denoted by 4 values. These 4 values are the 
        # blood glucose measurements we will use to predict dysglycemia
        if line.split(',')[6] in ['220621', '225664', '226537', '228388']:
            outfile.write(line)

In [12]:
df_icu_filtered = pd.read_csv(output_path)
df_icu_filtered

Unnamed: 0,subject_id,hadm_id,stay_id,caregiver_id,charttime,storetime,itemid,value,valuenum,valueuom,warning
0,10000032,29079034,39553978,,2180-07-23 21:45:00,2180-07-23 22:30:00,220621,115.0,115.0,mg/dL,1
1,10000690,25860671,37081114,84407.0,2150-11-04 10:00:00,2150-11-04 09:39:00,225664,117.0,117.0,,0
2,10000690,25860671,37081114,,2150-11-03 02:56:00,2150-11-03 04:22:00,220621,77.0,77.0,mg/dL,0
3,10000690,25860671,37081114,,2150-11-04 03:03:00,2150-11-04 03:44:00,220621,84.0,84.0,mg/dL,0
4,10000690,25860671,37081114,,2150-11-04 17:54:00,2150-11-04 18:41:00,220621,120.0,120.0,mg/dL,1
...,...,...,...,...,...,...,...,...,...,...,...
1814457,19999987,23865745,36195440,68756.0,2145-11-04 02:00:00,2145-11-04 05:07:00,225664,112.0,112.0,,0
1814458,19999987,23865745,36195440,69532.0,2145-11-03 08:00:00,2145-11-03 07:58:00,225664,122.0,122.0,,0
1814459,19999987,23865745,36195440,96092.0,2145-11-04 08:00:00,2145-11-04 08:11:00,225664,127.0,127.0,,0
1814460,19999987,23865745,36195440,,2145-11-03 01:35:00,2145-11-03 02:42:00,220621,113.0,113.0,mg/dL,1


In [14]:
bg_icu = df_icu_filtered.copy()

In [20]:
bg_icu['charttime'] = pd.to_datetime(
    bg_icu['charttime'],
    format='%Y-%m-%d %H:%M:%S',   # matches 2180-07-23 21:45:00
    errors='raise'               # or 'coerce' to turn bad rows into NaT
)
bg_icu['storetime'] = pd.to_datetime(
    bg_icu['storetime'],
    format='%Y-%m-%d %H:%M:%S',   # matches 2180-07-23 21:45:00
    errors='raise'               # or 'coerce' to turn bad rows into NaT
)

In [21]:
bg_icu.head()

Unnamed: 0,subject_id,hadm_id,stay_id,caregiver_id,charttime,storetime,itemid,value,valuenum,valueuom,warning
0,10000032,29079034,39553978,,2180-07-23 21:45:00,2180-07-23 22:30:00,220621,115.0,115.0,mg/dL,1
1,10000690,25860671,37081114,84407.0,2150-11-04 10:00:00,2150-11-04 09:39:00,225664,117.0,117.0,,0
2,10000690,25860671,37081114,,2150-11-03 02:56:00,2150-11-03 04:22:00,220621,77.0,77.0,mg/dL,0
3,10000690,25860671,37081114,,2150-11-04 03:03:00,2150-11-04 03:44:00,220621,84.0,84.0,mg/dL,0
4,10000690,25860671,37081114,,2150-11-04 17:54:00,2150-11-04 18:41:00,220621,120.0,120.0,mg/dL,1


In [22]:
bg_icu.sort_values(by=['subject_id', 'charttime'])

Unnamed: 0,subject_id,hadm_id,stay_id,caregiver_id,charttime,storetime,itemid,value,valuenum,valueuom,warning
0,10000032,29079034,39553978,,2180-07-23 21:45:00,2180-07-23 22:30:00,220621,115.0,115.0,mg/dL,1
2,10000690,25860671,37081114,,2150-11-03 02:56:00,2150-11-03 04:22:00,220621,77.0,77.0,mg/dL,0
3,10000690,25860671,37081114,,2150-11-04 03:03:00,2150-11-04 03:44:00,220621,84.0,84.0,mg/dL,0
1,10000690,25860671,37081114,84407.0,2150-11-04 10:00:00,2150-11-04 09:39:00,225664,117.0,117.0,,0
4,10000690,25860671,37081114,,2150-11-04 17:54:00,2150-11-04 18:41:00,220621,120.0,120.0,mg/dL,1
...,...,...,...,...,...,...,...,...,...,...,...
1814458,19999987,23865745,36195440,69532.0,2145-11-03 08:00:00,2145-11-03 07:58:00,225664,122.0,122.0,,0
1814456,19999987,23865745,36195440,68756.0,2145-11-03 20:00:00,2145-11-03 20:00:00,225664,105.0,105.0,,0
1814457,19999987,23865745,36195440,68756.0,2145-11-04 02:00:00,2145-11-04 05:07:00,225664,112.0,112.0,,0
1814461,19999987,23865745,36195440,,2145-11-04 05:01:00,2145-11-04 05:51:00,220621,100.0,100.0,mg/dL,0


In [23]:
bg_icu['']

Unnamed: 0,subject_id,hadm_id,stay_id,caregiver_id,itemid,value,valuenum,warning
count,1814462.0,1814462.0,1814462.0,1048430.0,1814462.0,1814462.0,1814462.0,1814462.0
mean,15001620.0,24996190.0,34975570.0,47819.17,224137.0,209.1426,209.1426,0.359274
std,2893341.0,2868619.0,2891540.0,28525.88,2439.564,7727.934,7727.934,0.4797878
min,10000030.0,20000090.0,30000150.0,45.0,220621.0,-124.0,-124.0,0.0
25%,12492850.0,22502050.0,32470110.0,21320.0,220621.0,110.0,110.0,0.0
50%,15019290.0,25022700.0,34966940.0,46354.0,225664.0,135.0,135.0,0.0
75%,17516320.0,27446330.0,37470370.0,72447.0,225664.0,172.0,172.0,1.0
max,19999990.0,29999830.0,39999860.0,99923.0,226537.0,1653550.0,1653550.0,1.0


In [17]:
bg_icu.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1814462 entries, 0 to 1814461
Data columns (total 11 columns):
 #   Column        Dtype         
---  ------        -----         
 0   subject_id    int64         
 1   hadm_id       int64         
 2   stay_id       int64         
 3   caregiver_id  float64       
 4   charttime     datetime64[ns]
 5   storetime     object        
 6   itemid        int64         
 7   value         float64       
 8   valuenum      float64       
 9   valueuom      object        
dtypes: datetime64[ns](1), float64(3), int64(5), object(2)
memory usage: 152.3+ MB


In [13]:
df_icu_filtered.describe()

Unnamed: 0,subject_id,hadm_id,stay_id,caregiver_id,itemid,value,valuenum,warning
count,1814462.0,1814462.0,1814462.0,1048430.0,1814462.0,1814462.0,1814462.0,1814462.0
mean,15001620.0,24996190.0,34975570.0,47819.17,224137.0,209.1426,209.1426,0.359274
std,2893341.0,2868619.0,2891540.0,28525.88,2439.564,7727.934,7727.934,0.4797878
min,10000030.0,20000090.0,30000150.0,45.0,220621.0,-124.0,-124.0,0.0
25%,12492850.0,22502050.0,32470110.0,21320.0,220621.0,110.0,110.0,0.0
50%,15019290.0,25022700.0,34966940.0,46354.0,225664.0,135.0,135.0,0.0
75%,17516320.0,27446330.0,37470370.0,72447.0,225664.0,172.0,172.0,1.0
max,19999990.0,29999830.0,39999860.0,99923.0,226537.0,1653550.0,1653550.0,1.0


In [4]:
chunk_size = 500_000  # adjust based on memory
# Stop after reading first N rows
num_rows_to_read = 20_000_000
rows_read = 0
dfs = []

for chunk in pd.read_csv(icu_file_path, compression='gzip', chunksize=chunk_size):
    dfs.append(chunk)
    rows_read += len(chunk)
    if rows_read >= num_rows_to_read:
        break

df_icu = pd.concat(dfs)

In [3]:
chunk_size = 500_000  # adjust based on memory
# Stop after reading first N rows
num_rows_to_read = 20_000_000
rows_read = 0
dfs = []

for chunk in pd.read_csv(hosp_file_path, compression='gzip', chunksize=chunk_size):
    dfs.append(chunk)
    rows_read += len(chunk)
    if rows_read >= num_rows_to_read:
        break

df_subset = pd.concat(dfs)

In [5]:
df_icu.sample(5)

Unnamed: 0,subject_id,hadm_id,stay_id,caregiver_id,charttime,storetime,itemid,value,valuenum,valueuom,warning
14570517,10339317,26032056,36320742,40490.0,2174-09-27 08:42:00,2174-09-27 09:43:00,227969,Adequate lighting,,,0.0
19506339,10459005,25159727,32245271,23731.0,2142-08-18 07:30:00,2142-08-18 07:31:00,224055,Rarely Moist,4.0,,0.0
8503415,10209126,23279996,39390511,78695.0,2116-03-08 16:00:00,2116-03-08 16:48:00,220045,87,87.0,bpm,0.0
12276695,10287015,27614346,37924967,37604.0,2170-02-11 08:00:00,2170-02-11 11:19:00,225664,202,202.0,,0.0
7348224,10176741,28781268,37423816,87240.0,2117-09-28 17:30:00,2117-09-28 17:38:00,229108,Yes,,,0.0


In [9]:
df_subset.sample(5)

Unnamed: 0,labevent_id,subject_id,hadm_id,specimen_id,itemid,order_provider_id,charttime,storetime,value,valuenum,valueuom,ref_range_lower,ref_range_upper,flag,priority,comments
14570517,14632849,10947245,24307798.0,64557878,51248,,2136-10-04 19:48:00,2136-10-04 20:17:00,28.8,28.8,pg,27.0,32.0,,STAT,
19506339,19589455,11255988,28595371.0,38661105,51200,,2152-07-07 00:00:00,2152-07-07 02:20:00,12.0,12.0,%,1.0,7.0,abnormal,ROUTINE,
8503415,8540196,10564151,,24208270,50863,P61XZH,2163-04-02 11:00:00,2163-04-02 12:15:00,327.0,327.0,IU/L,40.0,130.0,abnormal,STAT,
12276695,12329430,10805461,22063602.0,23389992,50983,,2142-12-23 02:45:00,2142-12-23 04:12:00,139.0,139.0,mEq/L,133.0,145.0,,ROUTINE,
7348224,7379835,10489424,28337031.0,89322675,50868,,2135-07-21 04:55:00,2135-07-21 06:44:00,17.0,17.0,mEq/L,10.0,18.0,,ROUTINE,


In [12]:
df_labdict = pd.read_csv('mimic-iv-3.1/mimic-iv-3.1/hosp/d_labitems.csv.gz', compression='gzip')
df_labdict.shape

(1650, 4)

In [10]:
df_icu_labdict = pd.read_csv('mimic-iv-3.1/mimic-iv-3.1/icu/d_items.csv.gz', compression='gzip')
df_icu_labdict.shape

(4095, 9)

In [4]:
print(df_icu_labdict[df_icu_labdict['label'].str.contains('glucose', case=False, na=False)])

      itemid                                label  \
64    220395                       Glucose (ingr)   
150   220621                      Glucose (serum)   
1429  225664  Glucose finger stick (range 70-100)   
1871  226537                Glucose (whole blood)   
2058  227015                     Glucose_ApacheIV   
2059  227016                GlucoseScore_ApacheIV   
2542  227976          Boost Glucose Control (1/4)   
2543  227977          Boost Glucose Control (1/2)   
2544  227978          Boost Glucose Control (3/4)   
2545  227979         Boost Glucose Control (Full)   
2883  228388         Glucose (whole blood) (soft)   
3104  228692             Glucose Control - Prophy   

                      abbreviation           linksto  \
64                  Glucose (ingr)  ingredientevents   
150                Glucose (serum)       chartevents   
1429    Glucose FS (range 70 -100)       chartevents   
1871         Glucose (whole blood)       chartevents   
2058              Glucose_Apac

In [22]:
print(df_labdict[df_labdict['label'].str.contains('glucose', case=False, na=False)])

      itemid                 label                fluid    category
7      50809               Glucose                Blood   Blood Gas
40     50842      Glucose, Ascites              Ascites   Chemistry
129    50931               Glucose                Blood   Chemistry
210    51022  Glucose, Joint Fluid          Joint Fluid   Chemistry
222    51034   Glucose, Body Fluid     Other Body Fluid   Chemistry
241    51053      Glucose, Pleural              Pleural   Chemistry
272    51084        Glucose, Urine                Urine   Chemistry
638    51478               Glucose                Urine  Hematology
906    51790          Glucose, CSF  Cerebrospinal Fluid   Chemistry
1032   51941        Glucose, Stool                Stool   Chemistry
1071   51981               Glucose                Urine   Chemistry
1117   52027  Glucose, Whole Blood                Blood   Blood Gas
1525   52569               Glucose                Blood   Chemistry


In [20]:
df_labdict.value_counts()

itemid  label                       fluid        category  
50801   Alveolar-arterial Gradient  Blood        Blood Gas     1
52005   UTX2                        Urine        Chemistry     1
52015   Xylose                      Urine        Chemistry     1
52014   Voided Specimen             Urine        Chemistry     1
52013   Vanillylmandelic Acid       Urine        Chemistry     1
                                                              ..
51383   RBC, Joint Fluid            Joint Fluid  Hematology    1
51382   Polys                       Joint Fluid  Hematology    1
51381   Other                       Joint Fluid  Hematology    1
51380   NRBC                        Joint Fluid  Hematology    1
53190   MXD%                        Blood        Chemistry     1
Length: 1646, dtype: int64

In [26]:
df_subset[(df_subset['itemid']== 50931) | (df_subset['itemid']== 52569)].shape

(458922, 16)

In [12]:
df_icu[(df_icu['itemid']== 220621) | (df_icu['itemid']== 225664) |
       (df_icu['itemid']== 226537) | (df_icu['itemid']== 228388)].shape

(84568, 11)

In [17]:
df_icu[(df_icu['itemid']== 220621) | (df_icu['itemid']== 225664) |
       (df_icu['itemid']== 226537) | (df_icu['itemid']== 228388)].head()

Unnamed: 0,subject_id,hadm_id,stay_id,caregiver_id,charttime,storetime,itemid,value,valuenum,valueuom,warning
468,10000032,29079034,39553978,,2180-07-23 21:45:00,2180-07-23 22:30:00,220621,115,115.0,mg/dL,1.0
3755,10000690,25860671,37081114,84407.0,2150-11-04 10:00:00,2150-11-04 09:39:00,225664,117,117.0,,0.0
4248,10000690,25860671,37081114,,2150-11-03 02:56:00,2150-11-03 04:22:00,220621,77,77.0,mg/dL,0.0
4265,10000690,25860671,37081114,,2150-11-04 03:03:00,2150-11-04 03:44:00,220621,84,84.0,mg/dL,0.0
4276,10000690,25860671,37081114,,2150-11-04 17:54:00,2150-11-04 18:41:00,220621,120,120.0,mg/dL,1.0


In [16]:
icu_bg = df_icu[(df_icu['itemid']== 220621) | (df_icu['itemid']== 225664) |
       (df_icu['itemid']== 226537) | (df_icu['itemid']== 228388)]
icu_bg = icu_bg.dropna(subset=['hadm_id'])
icu_bg.shape

(84568, 11)

Checks blood glucose measurements from the df_sample

In [30]:
df_subset[(df_subset['itemid']== 50931) | (df_subset['itemid']== 52569)].head(5)

Unnamed: 0,labevent_id,subject_id,hadm_id,specimen_id,itemid,order_provider_id,charttime,storetime,value,valuenum,valueuom,ref_range_lower,ref_range_upper,flag,priority,comments
0,1,10000032,,2704548,50931,P69FQC,2180-03-23 11:51:00,2180-03-23 15:56:00,___,95.0,mg/dL,70.0,100.0,,ROUTINE,"IF FASTING, 70-100 NORMAL, >125 PROVISIONAL DI..."
105,106,10000032,,95700408,50931,,2180-05-06 22:25:00,2180-05-06 23:16:00,___,109.0,mg/dL,70.0,100.0,abnormal,STAT,"IF FASTING, 70-100 NORMAL, >125 PROVISIONAL DI..."
150,151,10000032,22595853.0,86271148,50931,,2180-05-07 05:05:00,2180-05-07 07:03:00,___,99.0,mg/dL,70.0,100.0,,ROUTINE,"IF FASTING, 70-100 NORMAL, >125 PROVISIONAL DI..."
176,177,10000032,,19543630,50931,P85UQ1,2180-06-03 12:00:00,2180-06-03 13:04:00,___,122.0,mg/dL,70.0,100.0,abnormal,ROUTINE,"IF FASTING, 70-100 NORMAL, >125 PROVISIONAL DI..."
188,189,10000032,,58691952,50931,P69FQC,2180-06-03 12:00:00,2180-06-03 13:04:00,___,127.0,mg/dL,70.0,100.0,abnormal,ROUTINE,"IF FASTING, 70-100 NORMAL, >125 PROVISIONAL DI..."


In [29]:
bg = df_subset[(df_subset['itemid']== 50931) | (df_subset['itemid']== 52569)]

In [31]:
bg = bg.dropna(subset=['hadm_id'])
bg.shape

(320230, 16)

In [32]:
bg.head()

Unnamed: 0,labevent_id,subject_id,hadm_id,specimen_id,itemid,order_provider_id,charttime,storetime,value,valuenum,valueuom,ref_range_lower,ref_range_upper,flag,priority,comments
150,151,10000032,22595853.0,86271148,50931,,2180-05-07 05:05:00,2180-05-07 07:03:00,___,99.0,mg/dL,70.0,100.0,,ROUTINE,"IF FASTING, 70-100 NORMAL, >125 PROVISIONAL DI..."
337,339,10000032,22841357.0,88261647,50931,,2180-06-27 05:10:00,2180-06-27 07:26:00,___,71.0,mg/dL,70.0,100.0,,ROUTINE,"IF FASTING, 70-100 NORMAL, >125 PROVISIONAL DI..."
398,400,10000032,29079034.0,55621508,50931,,2180-07-23 21:45:00,2180-07-23 22:30:00,___,115.0,mg/dL,70.0,100.0,abnormal,STAT,"IF FASTING, 70-100 NORMAL, >125 PROVISIONAL DI..."
414,416,10000032,29079034.0,20101859,50931,,2180-07-24 06:35:00,2180-07-24 08:11:00,___,107.0,mg/dL,70.0,100.0,abnormal,STAT,"IF FASTING, 70-100 NORMAL, >125 PROVISIONAL DI..."
446,448,10000032,29079034.0,66433308,50931,,2180-07-25 04:45:00,2180-07-25 07:44:00,___,121.0,mg/dL,70.0,100.0,abnormal,ROUTINE,"IF FASTING, 70-100 NORMAL, >125 PROVISIONAL DI..."


In [41]:
bg[(bg['valuenum'] <= 70)]

Unnamed: 0,labevent_id,subject_id,hadm_id,specimen_id,itemid,order_provider_id,charttime,storetime,value,valuenum,valueuom,ref_range_lower,ref_range_upper,flag,priority,comments
3334,3344,10000690,26504700.0,75814348,50931,,2150-07-07 05:40:00,2150-07-07 08:19:00,70,70.0,mg/dL,70.0,105.0,,ROUTINE,
3485,3495,10000690,23280645.0,67246443,50931,,2150-09-20 07:20:00,2150-09-20 08:22:00,70,70.0,mg/dL,70.0,105.0,,ROUTINE,
6016,6028,10000935,29541074.0,1210266,50931,,2183-11-03 06:50:00,2183-11-03 09:10:00,68,68.0,mg/dL,70.0,105.0,abnormal,STAT,
6657,6671,10000935,25849114.0,27669265,50931,,2187-10-11 06:25:00,2187-10-11 08:27:00,___,69.0,mg/dL,70.0,100.0,abnormal,ROUTINE,"IF FASTING, 70-100 NORMAL, >125 PROVISIONAL DI..."
6985,7000,10000935,25849114.0,24084896,50931,,2187-10-20 10:41:00,2187-10-20 11:58:00,___,65.0,mg/dL,70.0,100.0,abnormal,ROUTINE,"IF FASTING, 70-100 NORMAL, >125 PROVISIONAL DI..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19998694,20083984,11287462,21318836.0,78547943,50931,,2146-12-09 07:15:00,2146-12-09 09:11:00,___,68.0,mg/dL,70.0,100.0,abnormal,STAT,"IF FASTING, 70-100 NORMAL, >125 PROVISIONAL DI..."
19998790,20084080,11287462,21318836.0,24674004,50931,,2146-12-11 07:05:00,2146-12-11 10:35:00,___,66.0,mg/dL,70.0,100.0,abnormal,STAT,"IF FASTING, 70-100 NORMAL, >125 PROVISIONAL DI..."
19998838,20084128,11287462,21318836.0,72815613,50931,,2146-12-12 06:50:00,2146-12-12 08:38:00,___,65.0,mg/dL,70.0,100.0,abnormal,ROUTINE,"IF FASTING, 70-100 NORMAL, >125 PROVISIONAL DI..."
19998875,20084165,11287462,21318836.0,61184311,50931,,2146-12-13 06:50:00,2146-12-13 08:50:00,___,58.0,mg/dL,70.0,100.0,abnormal,ROUTINE,"IF FASTING, 70-100 NORMAL, >125 PROVISIONAL DI..."


In [48]:
bg[(bg['hadm_id']==29079034) & (bg['valuenum']<= 200)]

Unnamed: 0,labevent_id,subject_id,hadm_id,specimen_id,itemid,order_provider_id,charttime,storetime,value,valuenum,valueuom,ref_range_lower,ref_range_upper,flag,priority,comments
398,400,10000032,29079034.0,55621508,50931,,2180-07-23 21:45:00,2180-07-23 22:30:00,___,115.0,mg/dL,70.0,100.0,abnormal,STAT,"IF FASTING, 70-100 NORMAL, >125 PROVISIONAL DI..."
414,416,10000032,29079034.0,20101859,50931,,2180-07-24 06:35:00,2180-07-24 08:11:00,___,107.0,mg/dL,70.0,100.0,abnormal,STAT,"IF FASTING, 70-100 NORMAL, >125 PROVISIONAL DI..."
446,448,10000032,29079034.0,66433308,50931,,2180-07-25 04:45:00,2180-07-25 07:44:00,___,121.0,mg/dL,70.0,100.0,abnormal,ROUTINE,"IF FASTING, 70-100 NORMAL, >125 PROVISIONAL DI..."


In [53]:
bg['hadm_id'].nunique()

51495

#### Store all cases of repeated dysglycemia per hospital admission in a variable.

In [50]:
mul = []
for hadm_id in bg['hadm_id'].unique():
    cases = bg[(bg['hadm_id']==hadm_id) & ((bg['valuenum'] <= 70) | (bg['valuenum'] >= 270))].shape[0]
    if cases > 1:
        mul.append(cases)
print(len(mul))

3146


In [51]:
sum(mul) - len(mul)

8755

In [35]:
bg.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 320230 entries, 150 to 19999423
Data columns (total 16 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   labevent_id        320230 non-null  int64  
 1   subject_id         320230 non-null  int64  
 2   hadm_id            320230 non-null  float64
 3   specimen_id        320230 non-null  int64  
 4   itemid             320230 non-null  int64  
 5   order_provider_id  31 non-null      object 
 6   charttime          320230 non-null  object 
 7   storetime          320230 non-null  object 
 8   value              320190 non-null  object 
 9   valuenum           320190 non-null  float64
 10  valueuom           320230 non-null  object 
 11  ref_range_lower    320230 non-null  float64
 12  ref_range_upper    320230 non-null  float64
 13  flag               220182 non-null  object 
 14  priority           320230 non-null  object 
 15  comments           287592 non-null  object 
dty

In [18]:
print(df_labdict.to_string())

      itemid                                       label                fluid    category
0      50801                  Alveolar-arterial Gradient                Blood   Blood Gas
1      50802                                 Base Excess                Blood   Blood Gas
2      50803         Calculated Bicarbonate, Whole Blood                Blood   Blood Gas
3      50804                        Calculated Total CO2                Blood   Blood Gas
4      50805                           Carboxyhemoglobin                Blood   Blood Gas
5      50806                       Chloride, Whole Blood                Blood   Blood Gas
6      50808                                Free Calcium                Blood   Blood Gas
7      50809                                     Glucose                Blood   Blood Gas
8      50810                      Hematocrit, Calculated                Blood   Blood Gas
9      50811                                  Hemoglobin                Blood   Blood Gas
10     508