In [9]:
import pandas as pd
# import numpy as np

In [10]:
database_type = "/full"

The `patient` table includes general information about the patient admissions (for example, demographics, admission and discharge details). 
See: http://eicu-crd.mit.edu/eicutables/patient/

In [11]:
# loads patient table
patient = pd.read_csv('../eICU' + database_type + '/patient.csv', nrows=999999) 
patient = patient[['patientunitstayid', 'admissionweight']].set_index('patientunitstayid')
patient

Unnamed: 0_level_0,admissionweight
patientunitstayid,Unnamed: 1_level_1
141168,84.3
141178,54.4
141179,
141194,73.9
141196,
...,...
3353235,90.0
3353237,78.4
3353251,102.0
3353254,83.9


The `vitalperiodic` table comprises data that is consistently interfaced from bedside vital signs monitors into eCareManager. 
Data are generally interfaced as 1 minute averages, and archived into the `vitalperiodic` table as 5 minute median values. 
For more detail, see: http://eicu-crd.mit.edu/eicutables/vitalPeriodic/

In [12]:
vitalperiodic = pd.read_csv('../eICU' + database_type + '/vitalPeriodic.csv', nrows=999999)
columns = ['patientunitstayid', 'observationoffset', 'temperature', 'heartrate', 'respiration', 'systemicsystolic']
vitalperiodic = vitalperiodic[columns]
vitalperiodic = vitalperiodic.sort_values(by='observationoffset')
vitalperiodic

Unnamed: 0,patientunitstayid,observationoffset,temperature,heartrate,respiration,systemicsystolic
653065,148349,-1445,,60.0,20.0,
653071,148349,-1440,,60.0,21.0,
790455,150049,-1435,,96.0,19.0,
479374,146418,-1435,,71.0,23.0,
653145,148349,-1435,,60.0,21.0,
...,...,...,...,...,...,...
273524,144297,71778,,72.0,33.0,
273140,144297,71783,,72.0,27.0,
273956,144297,71788,,72.0,34.0,
275114,144297,71793,,72.0,33.0,


The `lab` table provides Laboratory tests that have have been mapped to a standard set of measurements. 
Unmapped measurements are recorded in the customLab table.
See: http://eicu-crd.mit.edu/eicutables/lab/

In [13]:
lab = pd.read_csv('../eICU' + database_type + '/lab.csv', nrows=999999)
columns = ['labresultoffset', 'patientunitstayid','labtypeid','labname', 'labresult', 'labmeasurenamesystem', 'labmeasurenameinterface']
lab = lab[columns]
lab = lab.sort_values(by='labresultoffset')
lab

Unnamed: 0,labresultoffset,patientunitstayid,labtypeid,labname,labresult,labmeasurenamesystem,labmeasurenameinterface
337586,-73277,154910,4,bedside glucose,77.00,mg/dL,mg/dL
337274,-73231,154910,3,MCH,24.90,pg,pg
337258,-73231,154910,3,-monos,2.00,%,%
337290,-73231,154910,3,platelets x 1000,76.00,K/mcL,K/mcL
337298,-73231,154910,3,WBC x 1000,2.90,K/mcL,K/mcL
...,...,...,...,...,...,...,...
861141,144155,174525,3,-basos,0.00,%,%
860866,144155,174525,1,BUN,12.00,mg/dL,mg/dL
861160,144155,174525,3,RBC,3.87,M/mcL,mil/mcL
860818,144155,174525,1,glucose,105.00,mg/dL,mg/dL


In [14]:
#converting labname 'urinary creatinine to 'creatinine'
lab.loc[lab['labname'].str.contains("urinary creatinine"), 'labname'] = "creatinine"
lab.loc[(lab['labname'].str.contains("creatinine"))]

# Pull out dataframe for labresults 'WBC x 1000', 'lactate', 'creatinine'
lab_name = ['WBC x 1000', 'lactate', 'creatinine']
lab = lab[lab.labname.isin(lab_name)]
lab['creatinine'] = lab.apply(lambda x: x['labresult'] if x['labname'] == 'creatinine' else -1, axis=1)
lab['wbcx1000'] = lab.apply(lambda x: x['labresult'] if x['labname'] == 'WBC x 1000' else -1, axis=1)
lab['lactate'] = lab.apply(lambda x: x['labresult'] if x['labname'] == 'lactate' else -1, axis=1)

lab = lab.drop(['labtypeid', 'labmeasurenamesystem', 'labmeasurenameinterface', 'labname', 'labresult'], axis = 1)
lab

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.


Unnamed: 0,labresultoffset,patientunitstayid,creatinine,wbcx1000,lactate
337298,-73231,154910,-1.00,2.9,-1.0
337133,-72872,154910,-1.00,5.4,-1.0
337673,-72872,154910,5.40,-1.0,-1.0
337289,-71858,154910,-1.00,8.3,-1.0
337051,-71858,154910,6.07,-1.0,-1.0
...,...,...,...,...,...
418876,133088,157644,-1.00,6.1,-1.0
860135,137425,174525,0.64,-1.0,-1.0
860751,137425,174525,-1.00,11.1,-1.0
860898,144155,174525,0.65,-1.0,-1.0


The `intakeOutput` table provides Intake and output recorded for patients entered from the nursing flowsheet.
See: http://eicu-crd.mit.edu/eicutables/intakeOutput/

In [15]:
intakeOutput = pd.read_csv('../eICU' + database_type + '/intakeOutput.csv', nrows=999999)
columns = ['patientunitstayid', 'intakeoutputoffset', 'intaketotal', 'outputtotal', 'celllabel']
intakeOutput = intakeOutput[columns]
intakeOutput.sort_values(by='intakeoutputoffset')
intakeOutputUrine = intakeOutput.loc[(intakeOutput['celllabel'].str.contains("Urine"))]
intakeOutputUrine = pd.DataFrame.merge(patient, intakeOutputUrine, on='patientunitstayid')
intakeOutputUrine

Unnamed: 0,patientunitstayid,admissionweight,intakeoutputoffset,intaketotal,outputtotal,celllabel
0,141179,,1420,0.0,700.0,Urine
1,141179,,933,0.0,500.0,Urine
2,141179,,1703,0.0,1000.0,Urine
3,141179,,678,0.0,700.0,Urine
4,141194,73.9,12201,290.4,100.0,Urine


In [16]:
intakeOutputUrine['admissionweight'].fillna(intakeOutputUrine['admissionweight'].mean(), inplace=True)
intakeOutputUrine['urineoutputbyweight'] = intakeOutputUrine.apply(lambda x: x['outputtotal'] / x['admissionweight'], axis=1)
intakeOutputUrine = intakeOutputUrine[['patientunitstayid','intakeoutputoffset', 'urineoutputbyweight']]
intakeOutputUrine

Unnamed: 0,patientunitstayid,intakeoutputoffset,urineoutputbyweight
0,141179,1420,7.851544
1,141179,933,5.608246
2,141179,1703,11.216491
3,141179,678,7.851544
4,141194,12201,1.353180
...,...,...,...
207342,425009,11046,4.559271
207343,425010,1358,2.279635
207344,425072,28,3.727370
207345,425072,1108,0.000000


In [None]:
# Merging Final Training Table

