In [1]:
import pandas as pd

The `lab` table provides Laboratory tests that have have been mapped to a standard set of measurements. 
Unmapped measurements are recorded in the customLab table.
See: http://eicu-crd.mit.edu/eicutables/lab/

#### Reading in lab dataset and keeping relevant columns to our study.

In [2]:
databasePath = "../../eICU/full/"
exportPath = "../../eICU/training/"

In [3]:
columns = ['patientunitstayid', 'labresultoffset', 'labname', 'labresult']
lab = pd.read_csv(databasePath + 'lab.csv', usecols=columns)
lab

Unnamed: 0,patientunitstayid,labresultoffset,labname,labresult
0,141168,2026,fibrinogen,177.00
1,141168,1133,PT - INR,2.50
2,141168,2026,magnesium,2.00
3,141168,1133,PT,26.60
4,141168,2141,pH,7.20
...,...,...,...,...
39132526,3353263,-7,WBC x 1000,6.40
39132527,3353263,1733,RBC,4.67
39132528,3353263,-7,-monos,10.00
39132529,3353263,1733,WBC x 1000,6.60


#### Reading in patient ids to keep

In [4]:
patientIds = pd.read_csv('../../eICU' + '/training' + '/patientIds.csv')
patientIds = patientIds['patientunitstayid'].tolist()
print('Number of unique patients: ', len(patientIds))

Number of unique patients:  173109


#### Keeping patient ids from lab table that exist in patient id file

In [5]:
lab = lab.loc[lab['patientunitstayid'].isin(patientIds)]
lab

Unnamed: 0,patientunitstayid,labresultoffset,labname,labresult
0,141168,2026,fibrinogen,177.00
1,141168,1133,PT - INR,2.50
2,141168,2026,magnesium,2.00
3,141168,1133,PT,26.60
4,141168,2141,pH,7.20
...,...,...,...,...
39132526,3353263,-7,WBC x 1000,6.40
39132527,3353263,1733,RBC,4.67
39132528,3353263,-7,-monos,10.00
39132529,3353263,1733,WBC x 1000,6.60


#### Updating patient ids with patient ids that remain from dataset

In [6]:
newPatientIds = pd.DataFrame(lab['patientunitstayid'].unique())
newPatientIds.rename(columns={0: 'patientunitstayid'}, inplace=True)
newPatientIds

Unnamed: 0,patientunitstayid
0,141168
1,141203
2,141227
3,141229
4,141266
...,...
171459,3353235
171460,3353237
171461,3353251
171462,3353254


In [7]:
newPatientIds.to_csv(exportPath + "patientIds.csv", sep=',', index=False, encoding='utf-8')

In [8]:
del newPatientIds, patientIds

#### Keeping lab results relevant to diagnosing sepsis

In [9]:
# converting labname 'urinary creatinine to 'creatinine'
lab.loc[lab['labname'].str.contains("urinary creatinine"), 'labname'] = "creatinine"

# Only keeping rows where labname is 'WBC x 1000', 'lactate', 'creatinine'
labName = ['WBC x 1000', 'lactate', 'creatinine']
lab = lab[lab.labname.isin(labName)]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


#### Transposing Lab Names and their results into columns.

In [10]:
# duplicating labs to transpose labresult and labname
labc = lab.copy()
labw = lab.copy()
labl = lab.copy()

labc = labc.loc[(labc['labname'].str.contains("creatinine"))]
labw = labw.loc[(labw['labname'].str.contains("WBC x 1000"))]
labl = labl.loc[(labl['labname'].str.contains("lactate"))]

# Transpose labresults and lab name to cols
labc['creatinine'] = labc.apply(lambda x: x['labresult'] if x['labname'] == 'creatinine' else -1, axis=1)
labw['wbcx1000'] = labw.apply(lambda x: x['labresult'] if x['labname'] == 'WBC x 1000' else -1, axis=1)
labl['lactate'] = labl.apply(lambda x: x['labresult'] if x['labname'] == 'lactate' else -1, axis=1)

labc = labc.drop(['labname', 'labresult'], axis = 1)
labw = labw.drop(['labname', 'labresult'], axis = 1)
labl = labl.drop(['labname', 'labresult'], axis = 1)

#### Merging Lab Dataframes into Final Lab Dataset

In [11]:
# merge lab results dataframes
finalLab = labc.merge(labw, how = 'left', on = ['patientunitstayid', 'labresultoffset'])
finalLab = finalLab.merge(labl, how = 'left', on = ['patientunitstayid', 'labresultoffset'])
finalLab = finalLab.drop_duplicates()
finalLab

Unnamed: 0,patientunitstayid,labresultoffset,creatinine,wbcx1000,lactate
0,141168,1701,173.12,,
1,141168,2026,2.95,19.8,12.2
2,141168,1133,2.30,14.7,
3,141168,516,1.95,9.8,
4,141203,1,0.56,12.7,3.5
...,...,...,...,...,...
1198122,3353254,5558,1.67,11.7,
1198123,3353254,4144,1.61,,
1198124,3353254,-256,2.38,13.5,
1198125,3353263,-7,1.06,6.5,


#### Saving the Cleaned Patient DataFrame to a `csv` file

In [12]:
finalLab.to_csv(exportPath + "lab.csv", sep=',', index=False, encoding='utf-8')
