In [1]:

%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np

In [3]:
database_type = "/demo"

The `lab` table provides Laboratory tests that have have been mapped to a standard set of measurements. 
Unmapped measurements are recorded in the customLab table.
See: http://eicu-crd.mit.edu/eicutables/lab/

#### Reading in lab dataset and keeping relevant columns to our study.

In [4]:
columns = ['patientunitstayid', 'labresultoffset', 'labname', 'labresult']
lab = pd.read_csv('../../eICU' + database_type + '/lab.csv', usecols=columns)
lab

Unnamed: 0,patientunitstayid,labresultoffset,labname,labresult
0,1754323,-647,Hct,38.30
1,1754323,-647,platelets x 1000,181.00
2,1754323,-647,RBC,4.86
3,1754323,-647,-monos,8.70
4,1754323,-647,MCHC,30.40
...,...,...,...,...
434655,2754778,2041,bedside glucose,91.00
434656,2754778,37,troponin - I,5.92
434657,2754778,37,MCV,92.00
434658,2754778,236,bedside glucose,119.00


#### Keeping lab results relevant to diagnosing sepsis

In [5]:
# converting labname 'urinary creatinine to 'creatinine'
lab.loc[lab['labname'].str.contains("urinary creatinine"), 'labname'] = "creatinine"

# Only keeping rows where labname is 'WBC x 1000', 'lactate', 'creatinine'
lab_name = ['WBC x 1000', 'lactate', 'creatinine']
lab = lab[lab.labname.isin(lab_name)]

#### Transposing Lab Names and their results into columns.

In [6]:
# duplicating labs to transpose labresult and labname
labc = lab.copy()
labw = lab.copy()
labl = lab.copy()

labc = labc.loc[(labc['labname'].str.contains("creatinine"))]
labw = labw.loc[(labw['labname'].str.contains("WBC x 1000"))]
labl = labl.loc[(labl['labname'].str.contains("lactate"))]

# Transpose labresults and lab name to cols
labc['creatinine'] = labc.apply(lambda x: x['labresult'] if x['labname'] == 'creatinine' else -1, axis=1)
labw['wbcx1000'] = labw.apply(lambda x: x['labresult'] if x['labname'] == 'WBC x 1000' else -1, axis=1)
labl['lactate'] = labl.apply(lambda x: x['labresult'] if x['labname'] == 'lactate' else -1, axis=1)

labc = labc.drop(['labname', 'labresult'], axis = 1)
labw = labw.drop(['labname', 'labresult'], axis = 1)
labl = labl.drop(['labname', 'labresult'], axis = 1)

#### Merging Lab Dataframes into Final Lab Dataset

In [7]:
# merge lab results dataframes
finallab = labc.merge(labw, how = 'left', on = ['patientunitstayid', 'labresultoffset'])
finallab = finallab.merge(labl, how = 'left', on = ['patientunitstayid', 'labresultoffset'])
finallab = finallab.drop_duplicates()
finallab

Unnamed: 0,patientunitstayid,labresultoffset,creatinine,wbcx1000,lactate
0,1754323,-647,0.90,5.4,
1,1754323,603,0.94,12.8,
2,1754323,3579,0.74,,
3,1754323,2113,0.87,9.5,
4,1754323,5073,0.87,8.4,
...,...,...,...,...,...
14764,2715137,1951,0.60,,
14765,2715137,-29,0.60,,
14766,2715137,1001,0.60,,
14767,2754778,1282,1.05,6.7,


#### Saving the Cleaned Patient DataFrame to a `csv` file

In [8]:
path = "../../eICU/training/"
finallab.to_csv(path + "lab.csv", sep=',', index=False, encoding='utf-8')