In [1]:
import pandas as pd

The `intakeOutput` table provides Intake and output recorded for patients entered from the nursing flowsheet.
See: http://eicu-crd.mit.edu/eicutables/intakeOutput/

Urine output is measured in mL/kg.
Urine output in mL/ Patient weight in kg.

#### Reading in lab dataset and keeping relevant columns to our study .

In [2]:
databasePath = "../../eICU/full/"
exportPath = "../../eICU/training/"

In [3]:
columns = ['patientunitstayid', 'intakeoutputoffset', 'intaketotal', 'outputtotal', 'celllabel']
intakeOutput = pd.read_csv(databasePath + '/intakeOutput.csv', usecols=columns)
intakeOutput

Unnamed: 0,patientunitstayid,intakeoutputoffset,intaketotal,outputtotal,celllabel
0,205505,661,0.00,0.0,Bodyweight (lb)
1,205505,661,0.00,0.0,Bodyweight (kg)
2,150708,3523,727.00,0.0,P.O.
3,150708,3523,727.00,0.0,Volume (mL)-sodium chloride 0.9 % flush IVPB 5...
4,158159,24801,120.00,0.0,P.O.
...,...,...,...,...,...
12030284,3335854,1140,152.10,0.0,Crystalloids
12030285,3334548,766,340.00,350.0,Crystalloids
12030286,3330152,5102,300.00,0.0,Nutrition Total
12030287,3327194,3834,101.18,55.0,Crystalloids


#### Reading in patient ids to keep

In [4]:
patientIds = pd.read_csv(exportPath + '/patientIds.csv')
patientIds = patientIds['patientunitstayid'].tolist()

#### Keeping patient ids from lab table that exist in patient id file

In [5]:
intakeOutput = intakeOutput.loc[intakeOutput['patientunitstayid'].isin(patientIds)]
intakeOutput

Unnamed: 0,patientunitstayid,intakeoutputoffset,intaketotal,outputtotal,celllabel
0,205505,661,0.00,0.0,Bodyweight (lb)
1,205505,661,0.00,0.0,Bodyweight (kg)
2,150708,3523,727.00,0.0,P.O.
3,150708,3523,727.00,0.0,Volume (mL)-sodium chloride 0.9 % flush IVPB 5...
7,200733,7223,240.00,0.0,P.O.
...,...,...,...,...,...
12030284,3335854,1140,152.10,0.0,Crystalloids
12030285,3334548,766,340.00,350.0,Crystalloids
12030286,3330152,5102,300.00,0.0,Nutrition Total
12030287,3327194,3834,101.18,55.0,Crystalloids


#### Updating patient ids with patient ids that remain from dataset

In [6]:
newPatientIds = pd.DataFrame(intakeOutput['patientunitstayid'].unique())
newPatientIds.rename(columns={0: 'patientunitstayid'}, inplace=True)
newPatientIds

Unnamed: 0,patientunitstayid
0,205505
1,150708
2,200733
3,235783
4,178962
...,...
158084,3351044
158085,3342617
158086,3348452
158087,3331034


In [7]:
newPatientIds.to_csv(exportPath + "patientIds.csv", sep=',', index=False, encoding='utf-8')

#### Reading in cleaned patient data

In [8]:
patient = pd.read_csv(exportPath + 'patient.csv')
patient

Unnamed: 0,patientunitstayid,admissionweight
0,141168,84.3
1,141203,70.2
2,141227,82.2
3,141229,89.8
4,141266,120.4
...,...,...
173104,3353235,90.0
173105,3353237,78.4
173106,3353251,102.0
173107,3353254,83.9


#### Filtering out reading pertaining to urine output 

In [9]:
intakeOutputUrine = intakeOutput.loc[(intakeOutput['celllabel'].str.contains("Urine"))]
intakeOutputUrine

Unnamed: 0,patientunitstayid,intakeoutputoffset,intaketotal,outputtotal,celllabel
10,178962,-1507,240.0,400.0,Urine
29,162156,129,100.0,0.0,Urine
34,202558,-891,0.0,100.0,Urine
47,237097,5850,240.0,250.0,Urine
53,192734,1523,636.0,150.0,Urine
...,...,...,...,...,...
12030268,3339291,3337,0.0,400.0,Urine
12030273,3326946,1501,0.0,400.0,Urine
12030278,3336512,1710,100.0,230.0,Urine
12030279,3347200,12668,0.0,750.0,Urine


#### Merging on patient table to get patient weight

In [10]:
intakeOutputUrine = pd.DataFrame.merge(patient, intakeOutputUrine, on='patientunitstayid')

#### Calculating patient urine output by dividing urine output in mL by the patient weight

In [11]:
intakeOutputUrine['urineoutputbyweight'] = intakeOutputUrine.apply(lambda x: x['outputtotal'] / x['admissionweight'], axis=1)

#### Dropping irrelevant columns to study.

In [12]:
intakeOutputUrine = intakeOutputUrine[['patientunitstayid','intakeoutputoffset', 'urineoutputbyweight']]
intakeOutputUrine = intakeOutputUrine.drop_duplicates()
intakeOutputUrine

Unnamed: 0,patientunitstayid,intakeoutputoffset,urineoutputbyweight
0,141227,-1893,2.433090
1,141227,-1663,2.433090
2,141227,-1773,2.433090
3,141227,-1351,2.433090
4,141229,3430,4.454343
...,...,...,...
3160133,3353254,4169,19.070322
3160134,3353254,5326,4.767580
3160135,3353254,2626,10.131108
3160136,3353254,3101,15.494636


#### Saving the intakeOutputUrine DataFrame to a `csv` file

In [13]:
intakeOutputUrine.to_csv(exportPath + "intakeOutputUrine.csv", sep=',', index=False, encoding='utf-8')