In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
# import numpy as np

In [3]:
database_type = "/training"

The `patient` table includes general information about the patient admissions (for example, demographics, admission and discharge details). 
See: http://eicu-crd.mit.edu/eicutables/patient/

In [4]:
# loads patient table
patient = pd.read_csv('../eICU' + database_type + '/patient.csv', nrows=999999) 
patient = patient[['patientunitstayid', 'admissionweight']].set_index('patientunitstayid')
patient

Unnamed: 0_level_0,admissionweight
patientunitstayid,Unnamed: 1_level_1
141168,84.3
141178,54.4
141179,176.4
141194,73.9
141196,194.7
...,...
3353235,90.0
3353237,78.4
3353251,102.0
3353254,83.9


The `vitalperiodic` table comprises data that is consistently interfaced from bedside vital signs monitors into eCareManager. 
Data are generally interfaced as 1 minute averages, and archived into the `vitalperiodic` table as 5 minute median values. 
For more detail, see: http://eicu-crd.mit.edu/eicutables/vitalPeriodic/

In [5]:
vitalperiodic = pd.read_csv('../eICU' + database_type + '/vitalPeriodic.csv', nrows=999999)
columns = ['patientunitstayid', 'observationoffset', 'temperature', 'heartrate', 'respiration', 'systemicsystolic']
vitalperiodic = vitalperiodic[columns]
vitalperiodic = vitalperiodic.sort_values(by='observationoffset')
vitalperiodic

Unnamed: 0,patientunitstayid,observationoffset,temperature,heartrate,respiration,systemicsystolic
653065,148349,-1445,,60.0,20.0,
653071,148349,-1440,,60.0,21.0,
790455,150049,-1435,,96.0,19.0,
479374,146418,-1435,,71.0,23.0,
653145,148349,-1435,,60.0,21.0,
...,...,...,...,...,...,...
273524,144297,71778,,72.0,33.0,
273140,144297,71783,,72.0,27.0,
273956,144297,71788,,72.0,34.0,
275114,144297,71793,,72.0,33.0,


The `lab` table provides Laboratory tests that have have been mapped to a standard set of measurements. 
Unmapped measurements are recorded in the customLab table.
See: http://eicu-crd.mit.edu/eicutables/lab/

In [6]:
finallab = pd.read_csv('../eICU' + database_type + '/lab.csv', nrows=999999)

In [7]:
intakeOutputUrine = pd.read_csv('../eICU' + database_type + '/intakeOutputUrine.csv', nrows=999999)

In [8]:
intakeOutputUrine

Unnamed: 0,patientunitstayid,intakeoutputoffset,urineoutputbyweight
0,141179,1420,3.968254
1,141179,933,2.834467
2,141179,1703,5.668934
3,141179,678,3.968254
4,141194,12201,1.353180
...,...,...,...
999994,1105948,150,1.557632
999995,1105948,1230,0.778816
999996,1105948,1170,2.336449
999997,1105948,510,3.894081


In [9]:
# Merging Final Training Table
merge1 = pd.merge(vitalperiodic, finallab,  how='outer', left_on=['patientunitstayid','observationoffset'], right_on = ['patientunitstayid','labresultoffset'])
finalMerge = pd.merge(merge1, intakeOutputUrine,  how='outer', left_on=['patientunitstayid','observationoffset'], right_on = ['patientunitstayid','intakeoutputoffset'])

finalMerge = finalMerge[['patientunitstayid', 'observationoffset', 'labresultoffset', 'intakeoutputoffset', 'temperature', 'heartrate', 'respiration', 'systemicsystolic', 'creatinine', 'wbcx1000', 'lactate', 'urineoutputbyweight']]
finalMerge = finalMerge.sort_values(by=['patientunitstayid', 'observationoffset'])
finalMerge


Unnamed: 0,patientunitstayid,observationoffset,labresultoffset,intakeoutputoffset,temperature,heartrate,respiration,systemicsystolic,creatinine,wbcx1000,lactate,urineoutputbyweight
34160,141168,119.0,,,,140.0,,,,,,
35433,141168,124.0,,,,140.0,,,,,,
37167,141168,129.0,,,,140.0,,,,,,
38458,141168,134.0,,,,140.0,,,,,,
40027,141168,139.0,,,,140.0,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
1999231,2848042,,7069.0,,,,,,0.83,,,
1999232,2848042,,8294.0,,,,,,0.72,,,
1999233,2848042,,5564.0,,,,,,0.61,,,
1999234,2848042,,2040.0,,,,,,0.69,8.4,,


In [10]:
finalMerge['observationoffset'] = finalMerge.apply(lambda x: x['observationoffset'] if pd.notna(x['observationoffset']) else (x['labresultoffset'] if pd.notna(x['labresultoffset']) else x['intakeoutputoffset']), axis = 1)
finalMerge

Unnamed: 0,patientunitstayid,observationoffset,labresultoffset,intakeoutputoffset,temperature,heartrate,respiration,systemicsystolic,creatinine,wbcx1000,lactate,urineoutputbyweight
34160,141168,119.0,,,,140.0,,,,,,
35433,141168,124.0,,,,140.0,,,,,,
37167,141168,129.0,,,,140.0,,,,,,
38458,141168,134.0,,,,140.0,,,,,,
40027,141168,139.0,,,,140.0,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
1999231,2848042,7069.0,7069.0,,,,,,0.83,,,
1999232,2848042,8294.0,8294.0,,,,,,0.72,,,
1999233,2848042,5564.0,5564.0,,,,,,0.61,,,
1999234,2848042,2040.0,2040.0,,,,,,0.69,8.4,,


In [11]:
finalMerge = finalMerge.drop(['labresultoffset', 'intakeoutputoffset'], axis = 1)
finalMerge

Unnamed: 0,patientunitstayid,observationoffset,temperature,heartrate,respiration,systemicsystolic,creatinine,wbcx1000,lactate,urineoutputbyweight
34160,141168,119.0,,140.0,,,,,,
35433,141168,124.0,,140.0,,,,,,
37167,141168,129.0,,140.0,,,,,,
38458,141168,134.0,,140.0,,,,,,
40027,141168,139.0,,140.0,,,,,,
...,...,...,...,...,...,...,...,...,...,...
1999231,2848042,7069.0,,,,,0.83,,,
1999232,2848042,8294.0,,,,,0.72,,,
1999233,2848042,5564.0,,,,,0.61,,,
1999234,2848042,2040.0,,,,,0.69,8.4,,


In [12]:
diagnosis = pd.read_csv('../eICU' + database_type + '/diagnosis.csv', nrows=999999)

In [13]:
diagnosis = diagnosis.astype({'diagnosis': int})

In [14]:
diagnosis.dtypes

patientunitstayid    int64
diagnosisoffset      int64
diagnosis            int32
dtype: object

In [15]:
merge2 = pd.merge(finalMerge, diagnosis,  how='outer', left_on=['patientunitstayid'], right_on = ['patientunitstayid'])

In [16]:
merge2

Unnamed: 0,patientunitstayid,observationoffset,temperature,heartrate,respiration,systemicsystolic,creatinine,wbcx1000,lactate,urineoutputbyweight,diagnosisoffset,diagnosis
0,141168,119.0,,140.0,,,,,,,72.0,0.0
1,141168,119.0,,140.0,,,,,,,118.0,0.0
2,141168,119.0,,140.0,,,,,,,72.0,0.0
3,141168,119.0,,140.0,,,,,,,118.0,0.0
4,141168,119.0,,140.0,,,,,,,118.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
69927295,1360545,,,,,,,,,,873.0,0.0
69927296,1360545,,,,,,,,,,873.0,0.0
69927297,1360545,,,,,,,,,,68.0,0.0
69927298,1360545,,,,,,,,,,68.0,0.0


In [17]:
idList = sorted(set(list(merge2['patientunitstayid'])))

In [None]:
ff = merge2.copy()
nfinalMerge = pd.DataFrame()

In [None]:
for id in idList:
    df = ff[ff['patientunitstayid'] == id]
    df.loc[:, 'temperature'] = df['temperature'].interpolate(method='linear', limit_direction='both')
    df.loc[:, 'heartrate'] = df['heartrate'].interpolate(method='linear', limit_direction='both')
    df.loc[:, 'respiration'] = df['respiration'].interpolate(method='linear', limit_direction='both')
    df.loc[:, 'systemicsystolic'] = df['systemicsystolic'].interpolate(method='linear', limit_direction='both')
    df.loc[:, 'creatinine'] = df['creatinine'].interpolate(method='linear', limit_direction='both')
    df.loc[:, 'wbcx1000'] = df['wbcx1000'].interpolate(method='linear', limit_direction='both')
    df.loc[:, 'lactate'] = df['lactate'].interpolate(method='linear', limit_direction='both')
    df.loc[:, 'urineoutputbyweight'] = df['urineoutputbyweight'].interpolate(method='linear', limit_direction='both')
    df.loc[:, 'diagnosis'] = df['diagnosis'].interpolate(method='linear')
    df['diagnosis'].fillna('0', inplace=True)
#     print(df)
#     print(df['lactate'].interpolate(method='linear', limit_direction='both').shape)
    nfinalMerge = nfinalMerge.append(df)
#     print("next")

In [None]:
nfinalMerge['diagnosis'].value_counts()

In [None]:
path = "../../eICU/training/"
nfinalMerge.to_csv(path + "finalData.csv", sep=',', index=False, encoding='utf-8')