In [1]:
import pandas as pd

In [2]:
database_type = "/full"

The `intakeOutput` table provides Intake and output recorded for patients entered from the nursing flowsheet.
See: http://eicu-crd.mit.edu/eicutables/intakeOutput/

Urine output is measured in mL/kg.
Urine output in mL/ Patient weight in kg.

#### Reading in lab dataset and keeping relevant columns to our study .

In [3]:
columns = ['patientunitstayid', 'intakeoutputoffset', 'intaketotal', 'outputtotal', 'celllabel']
intakeOutput = pd.read_csv('../../eICU' + database_type + '/intakeOutput.csv', usecols=columns)
intakeOutput

Unnamed: 0,patientunitstayid,intakeoutputoffset,intaketotal,outputtotal,celllabel
0,205505,661,0.00,0.0,Bodyweight (lb)
1,205505,661,0.00,0.0,Bodyweight (kg)
2,150708,3523,727.00,0.0,P.O.
3,150708,3523,727.00,0.0,Volume (mL)-sodium chloride 0.9 % flush IVPB 5...
4,158159,24801,120.00,0.0,P.O.
...,...,...,...,...,...
12030284,3335854,1140,152.10,0.0,Crystalloids
12030285,3334548,766,340.00,350.0,Crystalloids
12030286,3330152,5102,300.00,0.0,Nutrition Total
12030287,3327194,3834,101.18,55.0,Crystalloids


In [4]:
patientIds = pd.read_csv('../../eICU' + '/training' + '/patientIds.csv')
patientIds = patientIds['patientunitstayid'].tolist()

In [5]:
intakeOutput = intakeOutput.loc[intakeOutput['patientunitstayid'].isin(patientIds)]
intakeOutput

Unnamed: 0,patientunitstayid,intakeoutputoffset,intaketotal,outputtotal,celllabel
2,150708,3523,727.00,0.0,P.O.
3,150708,3523,727.00,0.0,Volume (mL)-sodium chloride 0.9 % flush IVPB 5...
17,178592,4211,240.00,0.0,P.O.
18,153118,20506,0.00,300.0,Output (ml)-Nephrostomy Left
22,216156,13810,0.00,151.0,EVD Output (ml)-ICP/External Ventricular Drain...
...,...,...,...,...,...
12030277,3336512,1710,100.00,230.0,Crystalloids
12030278,3336512,1710,100.00,230.0,Urine
12030279,3347200,12668,0.00,750.0,Urine
12030287,3327194,3834,101.18,55.0,Crystalloids


In [6]:
newPatientIds = pd.DataFrame(intakeOutput['patientunitstayid'].unique())
newPatientIds.rename(columns={0: 'patientunitstayid'}, inplace=True)
newPatientIds

Unnamed: 0,patientunitstayid
0,150708
1,178592
2,153118
3,216156
4,214103
...,...
40044,3327254
40045,3353038
40046,3337644
40047,3329996


In [7]:
path = "../../eICU/training/"
newPatientIds.to_csv(path + "patientIds.csv", sep=',', index=False, encoding='utf-8')

#### Reading in cleaned patient data

In [8]:
patient = pd.read_csv('../../eICU/training/patient.csv')
patient

Unnamed: 0,patientunitstayid,admissionweight
0,141227,82.2
1,141288,166.5
2,141289,166.5
3,141297,194.7
4,141304,194.7
...,...,...
43894,3353177,64.9
43895,3353184,46.7
43896,3353197,71.5
43897,3353199,71.5


#### Filtering out reading pertaining to urine output 

In [9]:
intakeOutputUrine = intakeOutput.loc[(intakeOutput['celllabel'].str.contains("Urine"))]
intakeOutputUrine

Unnamed: 0,patientunitstayid,intakeoutputoffset,intaketotal,outputtotal,celllabel
89,201456,-1472,120.0,300.0,Urine
105,236993,2714,0.0,100.0,Urine
116,158074,924,0.0,200.0,Urine
123,214017,1554,0.0,100.0,Urine
175,181491,769,0.0,280.0,Urine
...,...,...,...,...,...
12030240,3341064,34163,168.0,15.0,Urine
12030252,3331141,2841,0.0,90.0,Urine
12030260,3343420,13681,0.0,225.0,Urine
12030278,3336512,1710,100.0,230.0,Urine


#### Merging on patient table to get patient weight

In [10]:
intakeOutputUrine = pd.DataFrame.merge(patient, intakeOutputUrine, on='patientunitstayid')

#### Calculating patient urine output by diving urine output in mL by the patient weight

In [11]:
intakeOutputUrine['urineoutputbyweight'] = intakeOutputUrine.apply(lambda x: x['outputtotal'] / x['admissionweight'], axis=1)

#### Dropping irrelevant columns to study.

In [12]:
intakeOutputUrine = intakeOutputUrine[['patientunitstayid','intakeoutputoffset', 'urineoutputbyweight']]
intakeOutputUrine = intakeOutputUrine.drop_duplicates()
intakeOutputUrine

Unnamed: 0,patientunitstayid,intakeoutputoffset,urineoutputbyweight
0,141227,-1893,2.433090
1,141227,-1663,2.433090
2,141227,-1773,2.433090
3,141227,-1351,2.433090
4,141288,2737,6.006006
...,...,...,...
941330,3353251,13543,1.078431
941331,3353251,12163,0.735294
941332,3353251,4363,3.431373
941333,3353251,3223,0.343137


#### Saving the intakeOutputUrine DataFrame to a `csv` file

In [13]:
path = "../../eICU/training/"
intakeOutputUrine.to_csv(path + "intakeOutputUrine.csv", sep=',', index=False, encoding='utf-8')