In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np

In [3]:
database_type = "/full"

The `lab` table provides Laboratory tests that have have been mapped to a standard set of measurements. 
Unmapped measurements are recorded in the customLab table.
See: http://eicu-crd.mit.edu/eicutables/lab/

#### Reading in lab dataset and keeping relevant columns to our study.

In [4]:
columns = ['patientunitstayid', 'labresultoffset', 'labname', 'labresult']
lab = pd.read_csv('../../eICU' + database_type + '/lab.csv', usecols=columns)
lab

Unnamed: 0,patientunitstayid,labresultoffset,labname,labresult
0,141168,2026,fibrinogen,177.00
1,141168,1133,PT - INR,2.50
2,141168,2026,magnesium,2.00
3,141168,1133,PT,26.60
4,141168,2141,pH,7.20
...,...,...,...,...
39132526,3353263,-7,WBC x 1000,6.40
39132527,3353263,1733,RBC,4.67
39132528,3353263,-7,-monos,10.00
39132529,3353263,1733,WBC x 1000,6.60


#### Keeping lab results relevant to diagnosing sepsis

In [5]:
# converting labname 'urinary creatinine to 'creatinine'
lab.loc[lab['labname'].str.contains("urinary creatinine"), 'labname'] = "creatinine"

# Only keeping rows where labname is 'WBC x 1000', 'lactate', 'creatinine'
lab_name = ['WBC x 1000', 'lactate', 'creatinine']
lab = lab[lab.labname.isin(lab_name)]

#### Transposing Lab Names and their results into columns.

In [6]:
# duplicating labs to transpose labresult and labname
labc = lab.copy()
labw = lab.copy()
labl = lab.copy()

labc = labc.loc[(labc['labname'].str.contains("creatinine"))]
labw = labw.loc[(labw['labname'].str.contains("WBC x 1000"))]
labl = labl.loc[(labl['labname'].str.contains("lactate"))]

# Transpose labresults and lab name to cols
labc['creatinine'] = labc.apply(lambda x: x['labresult'] if x['labname'] == 'creatinine' else -1, axis=1)
labw['wbcx1000'] = labw.apply(lambda x: x['labresult'] if x['labname'] == 'WBC x 1000' else -1, axis=1)
labl['lactate'] = labl.apply(lambda x: x['labresult'] if x['labname'] == 'lactate' else -1, axis=1)

labc = labc.drop(['labname', 'labresult'], axis = 1)
labw = labw.drop(['labname', 'labresult'], axis = 1)
labl = labl.drop(['labname', 'labresult'], axis = 1)

#### Merging Lab Dataframes into Final Lab Dataset

In [7]:
# merge lab results dataframes
finallab = labc.merge(labw, how = 'left', on = ['patientunitstayid', 'labresultoffset'])
finallab = finallab.merge(labl, how = 'left', on = ['patientunitstayid', 'labresultoffset'])
finallab.drop_duplicates()
finallab

Unnamed: 0,patientunitstayid,labresultoffset,creatinine,wbcx1000,lactate
0,141168,1701,173.12,,
1,141168,2026,2.95,19.8,12.2
2,141168,1133,2.30,14.7,
3,141168,516,1.95,9.8,
4,141178,-280,0.70,7.6,
...,...,...,...,...,...
1292236,3353254,5558,1.67,11.7,
1292237,3353254,4144,1.61,,
1292238,3353254,-256,2.38,13.5,
1292239,3353263,-7,1.06,6.5,


#### Saving the Cleaned Patient DataFrame to a `csv` file

In [8]:
path = "../../eICU/training/"
finallab.to_csv(path + "lab.csv", sep=',', index=False, encoding='utf-8')
