In [1]:
import pandas as pd
# import numpy as np

In [2]:
database_type = "/full"

The `patient` table includes general information about the patient admissions (for example, demographics, admission and discharge details). 
See: http://eicu-crd.mit.edu/eicutables/patient/

In [3]:
# loads patient table
patient = pd.read_csv('../eICU' + database_type + '/patient.csv', nrows=999999) 
patient = patient[['patientunitstayid', 'admissionweight']].set_index('patientunitstayid')
patient.head()

Unnamed: 0_level_0,admissionweight
patientunitstayid,Unnamed: 1_level_1
141168,84.3
141178,54.4
141179,
141194,73.9
141196,


The `vitalperiodic` table comprises data that is consistently interfaced from bedside vital signs monitors into eCareManager. 
Data are generally interfaced as 1 minute averages, and archived into the `vitalperiodic` table as 5 minute median values. 
For more detail, see: http://eicu-crd.mit.edu/eicutables/vitalPeriodic/

In [4]:
vitalperiodic = pd.read_csv('../eICU' + database_type + '/vitalPeriodic.csv', nrows=999999)
columns = ['patientunitstayid', 'observationoffset', 'temperature', 'heartrate', 'respiration', 'systemicsystolic']
vitalperiodic = vitalperiodic[columns].set_index('observationoffset')
vitalperiodic = vitalperiodic.sort_values(by='observationoffset')
vitalperiodic.head()

Unnamed: 0_level_0,patientunitstayid,temperature,heartrate,respiration,systemicsystolic
observationoffset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
-1445,148349,,60.0,20.0,
-1440,148349,,60.0,21.0,
-1435,150049,,96.0,19.0,
-1435,146418,,71.0,23.0,
-1435,148349,,60.0,21.0,


The `vitalAperiodic` table provides invasive vital sign data that is recorded at irregular intervals. 
See: http://eicu-crd.mit.edu/eicutables/vitalAperiodic/

In [5]:
vitalaperiodic = pd.read_csv('../eICU' + database_type + '/vitalAperiodic.csv', nrows=999999)
columns = ['observationoffset', 'patientunitstayid','noninvasivesystolic','cardiacoutput','cardiacinput']
vitalaperiodic = vitalaperiodic[columns].set_index('observationoffset')
vitalaperiodic = vitalaperiodic.sort_values(by='observationoffset')
vitalaperiodic.head()

Unnamed: 0_level_0,patientunitstayid,noninvasivesystolic,cardiacoutput,cardiacinput
observationoffset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
-4320,218289,187.0,,
-4319,218289,178.0,,
-3883,218287,138.0,,
-3659,218287,134.0,,
-3632,218287,147.0,,


The `lab` table provides Laboratory tests that have have been mapped to a standard set of measurements. 
Unmapped measurements are recorded in the customLab table.
See: http://eicu-crd.mit.edu/eicutables/lab/

In [6]:
lab = pd.read_csv('../eICU' + database_type + '/lab.csv', nrows=999999)
columns = ['labresultoffset', 'patientunitstayid','labtypeid','labname', 'labresult', 'labmeasurenamesystem', 'labmeasurenameinterface']
lab = lab[columns].set_index('labresultoffset')
lab = lab.sort_values(by='labresultoffset')
lab.head()

Unnamed: 0_level_0,patientunitstayid,labtypeid,labname,labresult,labmeasurenamesystem,labmeasurenameinterface
labresultoffset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
-73277,154910,4,bedside glucose,77.0,mg/dL,mg/dL
-73231,154910,3,MCH,24.9,pg,pg
-73231,154910,3,-monos,2.0,%,%
-73231,154910,3,platelets x 1000,76.0,K/mcL,K/mcL
-73231,154910,3,WBC x 1000,2.9,K/mcL,K/mcL


In [7]:
# Pull out dataframe for labresults 'WBC x 1000', 'lactate', 'creatinine'


In [8]:
# converts all occurances of urinary creatinine in labname col to creatinine
# alternativy check labtypeid creatinine = 1,  urinary creatinine = 4

lab.loc[lab['labname'].str.contains("urinary creatinine"), 'labname'] = "creatinine"
lab.loc[(lab['labname'].str.contains("creatinine"))]

Unnamed: 0_level_0,patientunitstayid,labtypeid,labname,labresult,labmeasurenamesystem,labmeasurenameinterface
labresultoffset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
-72872,154910,1,creatinine,5.40,mg/dL,mg/dL
-71858,154910,1,creatinine,6.07,mg/dL,mg/dL
-70836,154910,1,creatinine,3.61,mg/dL,mg/dL
-69363,154910,1,creatinine,2.86,mg/dL,mg/dL
-67941,154910,1,creatinine,3.67,mg/dL,mg/dL
...,...,...,...,...,...,...
130238,157644,1,creatinine,3.30,mg/dL,mg/dL
131628,157644,1,creatinine,4.34,mg/dL,mg/dL
133088,157644,1,creatinine,3.10,mg/dL,mg/dL
137425,174525,1,creatinine,0.64,mg/dL,mg/dL


In [9]:
# showing all results for parient with id 154910

lab['patientunitstayid'] = lab['patientunitstayid'].astype(str)
lab.loc[(lab['patientunitstayid'].str.contains("154910"))]


Unnamed: 0_level_0,patientunitstayid,labtypeid,labname,labresult,labmeasurenamesystem,labmeasurenameinterface
labresultoffset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
-73277,154910,4,bedside glucose,77.0,mg/dL,mg/dL
-73231,154910,3,MCH,24.9,pg,pg
-73231,154910,3,-monos,2.0,%,%
-73231,154910,3,platelets x 1000,76.0,K/mcL,K/mcL
-73231,154910,3,WBC x 1000,2.9,K/mcL,K/mcL
...,...,...,...,...,...,...
7162,154910,4,bedside glucose,280.0,mg/dL,mg/dL
7290,154910,4,bedside glucose,215.0,mg/dL,mg/dL
7748,154910,4,bedside glucose,274.0,mg/dL,mg/dL
7887,154910,4,bedside glucose,318.0,mg/dL,mg/dL


The `intakeOutput` table provides Intake and output recorded for patients entered from the nursing flowsheet.

In [29]:
intakeOutput = pd.read_csv('../eICU' + database_type + '/intakeOutput.csv', nrows=999999)
columns = ['patientunitstayid', 'intakeoutputoffset', 'intaketotal', 'outputtotal', 'celllabel']
intakeOutput = intakeOutput[columns]
intakeOutput.sort_values(by='intakeoutputoffset')
intakeOutputUrine = intakeOutput.loc[(intakeOutput['celllabel'].str.contains("Urine"))]
intakeOutputUrine = pd.DataFrame.merge(patient, intakeOutputUrine, on='patientunitstayid')
intakeOutputUrine.head()

Unnamed: 0,patientunitstayid,admissionweight,intakeoutputoffset,intaketotal,outputtotal,celllabel
0,141179,,1420,0.0,700.0,Urine
1,141179,,933,0.0,500.0,Urine
2,141179,,1703,0.0,1000.0,Urine
3,141179,,678,0.0,700.0,Urine
4,141194,73.9,12201,290.4,100.0,Urine


In [31]:
intakeOutputUrine['admissionweight'].fillna(intakeOutputUrine['admissionweight'].mean(), inplace=True)
intakeOutputUrine['urineoutputbyweight'] = intakeOutputUrine.apply(lambda x: x['outputtotal'] / x['admissionweight'], axis=1)
intakeOutputUrine = intakeOutputUrine[['patientunitstayid', 'urineoutputbyweight']]
intakeOutputUrine

Unnamed: 0,patientunitstayid,urineoutputbyweight
0,141179,7.851544
1,141179,5.608246
2,141179,11.216491
3,141179,7.851544
4,141194,1.353180
...,...,...
207342,425009,4.559271
207343,425010,2.279635
207344,425072,3.727370
207345,425072,0.000000
