In [1]:
import pandas as pd

The `lab` table provides Laboratory tests that have have been mapped to a standard set of measurements. 
Unmapped measurements are recorded in the customLab table.
See: http://eicu-crd.mit.edu/eicutables/lab/

#### Reading in lab dataset and keeping relevant columns to our study.

In [None]:
databasePath = "../../eICU/full/"
exportPath = "../../eICU/training/"

In [3]:
columns = ['patientunitstayid', 'labresultoffset', 'labname', 'labresult']
lab = pd.read_csv(databasePath + 'lab.csv', usecols=columns)
lab

Unnamed: 0,patientunitstayid,labresultoffset,labname,labresult
0,141168,2026,fibrinogen,177.00
1,141168,1133,PT - INR,2.50
2,141168,2026,magnesium,2.00
3,141168,1133,PT,26.60
4,141168,2141,pH,7.20
5,141168,231,PT - INR,1.70
6,141168,1701,urinary creatinine,173.12
7,141168,2026,MCH,29.20
8,141168,516,BUN,26.00
9,141168,1701,urinary sodium,12.00


#### Reading in patient ids to keep

In [4]:
patientIds = pd.read_csv('../../eICU' + '/training' + '/patientIds.csv')
patientIds = patientIds['patientunitstayid'].tolist()
print('Number of unique patients: ', len(patientIds))

#### Keeping patient ids from lab table that exist in patient id file

In [5]:
lab = lab.loc[lab['patientunitstayid'].isin(patientIds)]
lab

Unnamed: 0,patientunitstayid,labresultoffset,labname,labresult
815,141227,417,magnesium,2.00
816,141227,1467,lactate,1.20
817,141227,-13,-lymphs,15.00
818,141227,417,potassium,4.00
819,141227,-13,HCO3,11.00
820,141227,1362,BUN,32.00
821,141227,-13,MCHC,30.10
822,141227,-128,Hgb,8.40
823,141227,-13,lactate,4.30
824,141227,-1566,RDW,22.40


#### Updating patient ids with patient ids that remain from dataset

In [6]:
newPatientIds = pd.DataFrame(lab['patientunitstayid'].unique())
newPatientIds.rename(columns={0: 'patientunitstayid'}, inplace=True)
newPatientIds

Unnamed: 0,patientunitstayid
0,141227
1,141288
2,141289
3,141297
4,141304
5,141314
6,141362
7,141392
8,141432
9,141454


In [7]:
newPatientIds.to_csv(exportPath + "patientIds.csv", sep=',', index=False, encoding='utf-8')

In [8]:
del newPatientIds, patientIds

#### Keeping lab results relevant to diagnosing sepsis

In [9]:
# converting labname 'urinary creatinine to 'creatinine'
lab.loc[lab['labname'].str.contains("urinary creatinine"), 'labname'] = "creatinine"

# Only keeping rows where labname is 'WBC x 1000', 'lactate', 'creatinine'
labName = ['WBC x 1000', 'lactate', 'creatinine']
lab = lab[lab.labname.isin(labName)]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


#### Transposing Lab Names and their results into columns.

In [10]:
# duplicating labs to transpose labresult and labname
labc = lab.copy()
labw = lab.copy()
labl = lab.copy()

labc = labc.loc[(labc['labname'].str.contains("creatinine"))]
labw = labw.loc[(labw['labname'].str.contains("WBC x 1000"))]
labl = labl.loc[(labl['labname'].str.contains("lactate"))]

# Transpose labresults and lab name to cols
labc['creatinine'] = labc.apply(lambda x: x['labresult'] if x['labname'] == 'creatinine' else -1, axis=1)
labw['wbcx1000'] = labw.apply(lambda x: x['labresult'] if x['labname'] == 'WBC x 1000' else -1, axis=1)
labl['lactate'] = labl.apply(lambda x: x['labresult'] if x['labname'] == 'lactate' else -1, axis=1)

labc = labc.drop(['labname', 'labresult'], axis = 1)
labw = labw.drop(['labname', 'labresult'], axis = 1)
labl = labl.drop(['labname', 'labresult'], axis = 1)

#### Merging Lab Dataframes into Final Lab Dataset

In [11]:
# merge lab results dataframes
finalLab = labc.merge(labw, how = 'left', on = ['patientunitstayid', 'labresultoffset'])
finalLab = finalLab.merge(labl, how = 'left', on = ['patientunitstayid', 'labresultoffset'])
finalLab = finalLab.drop_duplicates()
finalLab

Unnamed: 0,patientunitstayid,labresultoffset,creatinine,wbcx1000,lactate
0,141227,1362,1.90,42.7,
1,141227,-1566,1.40,48.2,
2,141227,-13,1.50,65.9,4.3
3,141227,937,1.60,,
4,141227,-128,1.40,47.7,
5,141288,3697,0.60,11.5,
6,141288,2202,0.66,16.0,
7,141288,6544,0.54,13.0,
8,141288,5097,0.71,11.9,
9,141288,797,0.73,16.3,1.1


#### Saving the Cleaned Patient DataFrame to a `csv` file

In [12]:
finalLab.to_csv(exportPath + "lab.csv", sep=',', index=False, encoding='utf-8')
