In [1]:
import pandas as pd

In [2]:
database_type = "/full"

The `intakeOutput` table provides Intake and output recorded for patients entered from the nursing flowsheet.
See: http://eicu-crd.mit.edu/eicutables/intakeOutput/

Urine output is measured in mL/kg.
Urine output in mL/ Patient weight in kg.

#### Reading in lab dataset and keeping relevant columns to our study .

In [3]:
columns = ['patientunitstayid', 'intakeoutputoffset', 'intaketotal', 'outputtotal', 'celllabel']
intakeOutput = pd.read_csv('../../eICU' + database_type + '/intakeOutput.csv', usecols=columns)
intakeOutput

Unnamed: 0,patientunitstayid,intakeoutputoffset,intaketotal,outputtotal,celllabel
0,205505,661,0.00,0.0,Bodyweight (lb)
1,205505,661,0.00,0.0,Bodyweight (kg)
2,150708,3523,727.00,0.0,P.O.
3,150708,3523,727.00,0.0,Volume (mL)-sodium chloride 0.9 % flush IVPB 5...
4,158159,24801,120.00,0.0,P.O.
5,174890,-11303,0.00,0.0,Bodyweight (lb)
6,174890,-11303,0.00,0.0,Bodyweight (kg)
7,200733,7223,240.00,0.0,P.O.
8,235783,9127,978.00,0.0,Volume (mL)-dextrose 5 % / sodium chloride 0.4...
9,178962,-1507,240.00,400.0,P.O.


In [4]:
patientIds = pd.read_csv('../../eICU' + '/training' + '/patientIds.csv')
patientIds = patientIds['patientunitstayid'].tolist()

In [5]:
intakeOutput = intakeOutput.loc[intakeOutput['patientunitstayid'].isin(patientIds)]
intakeOutput

Unnamed: 0,patientunitstayid,intakeoutputoffset,intaketotal,outputtotal,celllabel
2,150708,3523,727.00,0.0,P.O.
3,150708,3523,727.00,0.0,Volume (mL)-sodium chloride 0.9 % flush IVPB 5...
17,178592,4211,240.00,0.0,P.O.
18,153118,20506,0.00,300.0,Output (ml)-Nephrostomy Left
22,216156,13810,0.00,151.0,EVD Output (ml)-ICP/External Ventricular Drain...
23,216156,13810,0.00,151.0,EVD Output (ml)-ICP/External Ventricular Drain...
25,214103,-167,0.00,0.0,Bodyweight (lb)
26,214103,-167,0.00,0.0,Bodyweight (kg)
27,232385,1566,75.00,0.0,Volume-Transfuse cryoprecipitate
37,188603,44371,360.00,0.0,P.O.


In [6]:
newPatientIds = pd.DataFrame(intakeOutput['patientunitstayid'].unique())
newPatientIds.rename(columns={0: 'patientunitstayid'}, inplace=True)
newPatientIds

Unnamed: 0,patientunitstayid
0,150708
1,178592
2,153118
3,216156
4,214103
5,232385
6,188603
7,209311
8,226225
9,229128


In [7]:
path = "../../eICU/training/"
newPatientIds.to_csv(path + "patientIds.csv", sep=',', index=False, encoding='utf-8')

#### Reading in cleaned patient data

In [8]:
patient = pd.read_csv('../../eICU/training/patient.csv')
patient

Unnamed: 0,patientunitstayid,admissionweight
0,141227,82.2
1,141288,166.5
2,141289,166.5
3,141297,194.7
4,141304,194.7
5,141314,200.9
6,141362,194.7
7,141392,166.5
8,141432,131.5
9,141454,82.2


#### Filtering out reading pertaining to urine output 

In [9]:
intakeOutputUrine = intakeOutput.loc[(intakeOutput['celllabel'].str.contains("Urine"))]
intakeOutputUrine

Unnamed: 0,patientunitstayid,intakeoutputoffset,intaketotal,outputtotal,celllabel
89,201456,-1472,120.00,300.0,Urine
103,187412,4435,360.00,600.0,Urine
116,158074,924,0.00,200.0,Urine
123,214017,1554,0.00,100.0,Urine
161,177611,60546,0.00,250.0,Urine
175,181491,769,0.00,280.0,Urine
242,232626,17854,0.00,800.0,Urine
300,224705,-5908,0.00,75.0,Urine
374,145272,13514,0.00,250.0,Urine
395,193274,4005,950.00,500.0,Urine


#### Merging on patient table to get patient weight

In [10]:
intakeOutputUrine = pd.DataFrame.merge(patient, intakeOutputUrine, on='patientunitstayid')

#### Calculating patient urine output by diving urine output in mL by the patient weight

In [11]:
intakeOutputUrine['urineoutputbyweight'] = intakeOutputUrine.apply(lambda x: x['outputtotal'] / x['admissionweight'], axis=1)

#### Dropping irrelevant columns to study.

In [12]:
intakeOutputUrine = intakeOutputUrine[['patientunitstayid','intakeoutputoffset', 'urineoutputbyweight']]
intakeOutputUrine = intakeOutputUrine.drop_duplicates()
intakeOutputUrine

Unnamed: 0,patientunitstayid,intakeoutputoffset,urineoutputbyweight
0,141227,-1893,2.433090
1,141227,-1663,2.433090
2,141227,-1773,2.433090
3,141227,-1351,2.433090
4,141288,2737,6.006006
5,141289,835,1.171171
6,141289,1255,1.051051
7,141289,1015,0.360360
8,141304,10842,1.540832
9,141304,10157,2.439651


#### Saving the intakeOutputUrine DataFrame to a `csv` file

In [13]:
path = "../../eICU/training/"
intakeOutputUrine.to_csv(path + "intakeOutputUrine.csv", sep=',', index=False, encoding='utf-8')