In [1]:
import pandas as pd

In [2]:
database_type = "/full"

The `lab` table provides Laboratory tests that have have been mapped to a standard set of measurements. 
Unmapped measurements are recorded in the customLab table.
See: http://eicu-crd.mit.edu/eicutables/lab/

#### Reading in lab dataset and keeping relevant columns to our study.

In [3]:
columns = ['patientunitstayid', 'labresultoffset', 'labname', 'labresult']
lab = pd.read_csv('../../eICU' + database_type + '/lab.csv', usecols=columns)
lab

Unnamed: 0,patientunitstayid,labresultoffset,labname,labresult
0,141168,2026,fibrinogen,177.00
1,141168,1133,PT - INR,2.50
2,141168,2026,magnesium,2.00
3,141168,1133,PT,26.60
4,141168,2141,pH,7.20
...,...,...,...,...
39132526,3353263,-7,WBC x 1000,6.40
39132527,3353263,1733,RBC,4.67
39132528,3353263,-7,-monos,10.00
39132529,3353263,1733,WBC x 1000,6.60


In [4]:
patientIds = pd.read_csv('../../eICU' + '/training' + '/patientIds.csv')
patientIds = patientIds['patientunitstayid'].tolist()

In [5]:
lab = lab.loc[lab['patientunitstayid'].isin(patientIds)]
lab

Unnamed: 0,patientunitstayid,labresultoffset,labname,labresult
815,141227,417,magnesium,2.000
816,141227,1467,lactate,1.200
817,141227,-13,-lymphs,15.000
818,141227,417,potassium,4.000
819,141227,-13,HCO3,11.000
...,...,...,...,...
39132258,3353251,4049,bedside glucose,129.000
39132259,3353251,1849,BUN,32.000
39132260,3353251,310,pH,7.194
39132261,3353251,409,potassium,4.400


In [6]:
newPatientIds = pd.DataFrame(lab['patientunitstayid'].unique())
newPatientIds.rename(columns={0: 'patientunitstayid'}, inplace=True)
newPatientIds

Unnamed: 0,patientunitstayid
0,141227
1,141288
2,141289
3,141297
4,141304
...,...
43579,3353177
43580,3353184
43581,3353197
43582,3353199


In [7]:
path = "../../eICU/training/"
newPatientIds.to_csv(path + "patientIds.csv", sep=',', index=False, encoding='utf-8')

In [8]:
del newPatientIds, patientIds

#### Keeping lab results relevant to diagnosing sepsis

In [9]:
# converting labname 'urinary creatinine to 'creatinine'
lab.loc[lab['labname'].str.contains("urinary creatinine"), 'labname'] = "creatinine"

# Only keeping rows where labname is 'WBC x 1000', 'lactate', 'creatinine'
lab_name = ['WBC x 1000', 'lactate', 'creatinine']
lab = lab[lab.labname.isin(lab_name)]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


#### Transposing Lab Names and their results into columns.

In [10]:
# duplicating labs to transpose labresult and labname
labc = lab.copy()
labw = lab.copy()
labl = lab.copy()

labc = labc.loc[(labc['labname'].str.contains("creatinine"))]
labw = labw.loc[(labw['labname'].str.contains("WBC x 1000"))]
labl = labl.loc[(labl['labname'].str.contains("lactate"))]

# Transpose labresults and lab name to cols
labc['creatinine'] = labc.apply(lambda x: x['labresult'] if x['labname'] == 'creatinine' else -1, axis=1)
labw['wbcx1000'] = labw.apply(lambda x: x['labresult'] if x['labname'] == 'WBC x 1000' else -1, axis=1)
labl['lactate'] = labl.apply(lambda x: x['labresult'] if x['labname'] == 'lactate' else -1, axis=1)

labc = labc.drop(['labname', 'labresult'], axis = 1)
labw = labw.drop(['labname', 'labresult'], axis = 1)
labl = labl.drop(['labname', 'labresult'], axis = 1)

#### Merging Lab Dataframes into Final Lab Dataset

In [11]:
# merge lab results dataframes
finallab = labc.merge(labw, how = 'left', on = ['patientunitstayid', 'labresultoffset'])
finallab = finallab.merge(labl, how = 'left', on = ['patientunitstayid', 'labresultoffset'])
finallab = finallab.drop_duplicates()
finallab

Unnamed: 0,patientunitstayid,labresultoffset,creatinine,wbcx1000,lactate
0,141227,1362,1.90,42.7,
1,141227,-1566,1.40,48.2,
2,141227,-13,1.50,65.9,4.3
3,141227,937,1.60,,
4,141227,-128,1.40,47.7,
...,...,...,...,...,...
363333,3353251,-217,1.81,,
363334,3353251,5459,3.29,11.9,
363335,3353251,6868,3.35,13.0,
363336,3353251,1849,2.46,25.9,


#### Saving the Cleaned Patient DataFrame to a `csv` file

In [12]:
path = "../../eICU/training/"
finallab.to_csv(path + "lab.csv", sep=',', index=False, encoding='utf-8')
