In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
# import numpy as np

In [3]:
database_type = "/training"

The `patient` table includes general information about the patient admissions (for example, demographics, admission and discharge details). 
See: http://eicu-crd.mit.edu/eicutables/patient/

In [4]:
# loads patient table
patient = pd.read_csv('../eICU' + database_type + '/patient.csv') 
patient = patient[['patientunitstayid', 'admissionweight']].set_index('patientunitstayid')
patient

Unnamed: 0_level_0,admissionweight
patientunitstayid,Unnamed: 1_level_1
141168,84.3
141178,54.4
141179,176.4
141194,73.9
141196,194.7
...,...
3353235,90.0
3353237,78.4
3353251,102.0
3353254,83.9


The `vitalperiodic` table comprises data that is consistently interfaced from bedside vital signs monitors into eCareManager. 
Data are generally interfaced as 1 minute averages, and archived into the `vitalperiodic` table as 5 minute median values. 
For more detail, see: http://eicu-crd.mit.edu/eicutables/vitalPeriodic/

In [5]:
vitalperiodic = pd.read_csv('../eICU' + database_type + '/vitalPeriodic.csv')
columns = ['patientunitstayid', 'observationoffset', 'temperature', 'heartrate', 'respiration', 'systemicsystolic']
vitalperiodic = vitalperiodic[columns]
vitalperiodic = vitalperiodic.sort_values(by='observationoffset')
vitalperiodic

Unnamed: 0,patientunitstayid,observationoffset,temperature,heartrate,respiration,systemicsystolic
59725767,1446964,-49781,,101.0,,
59725709,1446964,-49776,,99.0,,
59725725,1446964,-49771,,97.0,,
59725755,1446964,-49766,,97.0,,
59725682,1446964,-49761,,99.0,,
...,...,...,...,...,...,...
109037567,2747640,766035,,81.0,15.0,
109037703,2747640,766040,,80.0,15.0,
109037799,2747640,766045,,80.0,15.0,
109037575,2747640,766050,,78.0,15.0,


The `lab` table provides Laboratory tests that have have been mapped to a standard set of measurements. 
Unmapped measurements are recorded in the customLab table.
See: http://eicu-crd.mit.edu/eicutables/lab/

In [6]:
finallab = pd.read_csv('../eICU' + database_type + '/lab.csv')

In [7]:
intakeOutputUrine = pd.read_csv('../eICU' + database_type + '/intakeOutputUrine.csv')

In [8]:
intakeOutputUrine

Unnamed: 0,patientunitstayid,admissionweight
0,141168,84.3
1,141178,54.4
2,141179,176.4
3,141194,73.9
4,141196,194.7
...,...,...
200854,3353235,90.0
200855,3353237,78.4
200856,3353251,102.0
200857,3353254,83.9


In [9]:
# Merging Final Training Table
merge1 = pd.merge(vitalperiodic, finallab,  how='outer', left_on=['patientunitstayid','observationoffset'], right_on = ['patientunitstayid','labresultoffset'])
finalMerge = pd.merge(merge1, intakeOutputUrine,  how='outer', left_on=['patientunitstayid','observationoffset'], right_on = ['patientunitstayid','intakeoutputoffset'])

finalMerge = finalMerge[['patientunitstayid', 'observationoffset', 'labresultoffset', 'intakeoutputoffset', 'temperature', 'heartrate', 'respiration', 'systemicsystolic', 'creatinine', 'wbcx1000', 'lactate', 'urineoutputbyweight']]
finalMerge = finalMerge.sort_values(by=['patientunitstayid', 'observationoffset'])
finalMerge


KeyError: 'intakeoutputoffset'

In [None]:
finalMerge['observationoffset'] = finalMerge.apply(lambda x: x['observationoffset'] if pd.notna(x['observationoffset']) else (x['labresultoffset'] if pd.notna(x['labresultoffset']) else x['intakeoutputoffset']), axis = 1)
finalMerge

In [None]:
finalMerge = finalMerge.drop(['labresultoffset', 'intakeoutputoffset'], axis = 1)
finalMerge

In [None]:
diagnosis = pd.read_csv('../eICU' + database_type + '/diagnosis.csv')

In [None]:
diagnosis = diagnosis.astype({'diagnosis': int})

In [None]:
diagnosis.dtypes

In [None]:
merge2 = pd.merge(finalMerge, diagnosis,  how='outer', left_on=['patientunitstayid'], right_on = ['patientunitstayid'])

In [None]:
merge2

In [None]:
idList = sorted(set(list(merge2['patientunitstayid'])))

In [None]:
ff = merge2.copy()
nfinalMerge = pd.DataFrame()

In [None]:
for id in idList:
    df = ff[ff['patientunitstayid'] == id]
    df.loc[:, 'temperature'] = df['temperature'].interpolate(method='linear', limit_direction='both')
    df.loc[:, 'heartrate'] = df['heartrate'].interpolate(method='linear', limit_direction='both')
    df.loc[:, 'respiration'] = df['respiration'].interpolate(method='linear', limit_direction='both')
    df.loc[:, 'systemicsystolic'] = df['systemicsystolic'].interpolate(method='linear', limit_direction='both')
    df.loc[:, 'creatinine'] = df['creatinine'].interpolate(method='linear', limit_direction='both')
    df.loc[:, 'wbcx1000'] = df['wbcx1000'].interpolate(method='linear', limit_direction='both')
    df.loc[:, 'lactate'] = df['lactate'].interpolate(method='linear', limit_direction='both')
    df.loc[:, 'urineoutputbyweight'] = df['urineoutputbyweight'].interpolate(method='linear', limit_direction='both')
    df.loc[:, 'diagnosis'] = df['diagnosis'].interpolate(method='linear')
    df['diagnosis'].fillna('0', inplace=True)
#     print(df)
#     print(df['lactate'].interpolate(method='linear', limit_direction='both').shape)
    nfinalMerge = nfinalMerge.append(df)
#     print("next")

In [None]:
nfinalMerge['diagnosis'].value_counts()

In [None]:
path = "../../eICU/training/"
nfinalMerge.to_csv(path + "finalData.csv", sep=',', index=False, encoding='utf-8')

In [None]:
for id in idList:
    df = ff[ff['patientunitstayid'] == id]
    df.loc[:, 'temperature'] = df['temperature'].interpolate(method='linear', limit_direction='both')
    df.loc[:, 'heartrate'] = df['heartrate'].interpolate(method='linear', limit_direction='both')
    df.loc[:, 'respiration'] = df['respiration'].interpolate(method='linear', limit_direction='both')
    df.loc[:, 'systemicsystolic'] = df['systemicsystolic'].interpolate(method='linear', limit_direction='both')
    df.loc[:, 'creatinine'] = df['creatinine'].interpolate(method='linear', limit_direction='both')
    df.loc[:, 'wbcx1000'] = df['wbcx1000'].interpolate(method='linear', limit_direction='both')
    df.loc[:, 'lactate'] = df['lactate'].interpolate(method='linear', limit_direction='both')
    df.loc[:, 'urineoutputbyweight'] = df['urineoutputbyweight'].interpolate(method='linear', limit_direction='both')
    df.loc[:, 'diagnosis'] = df['diagnosis'].interpolate(method='linear')
    df['diagnosis'].fillna('0', inplace=True)
#     print(df)
#     print(df['lactate'].interpolate(method='linear', limit_direction='both').shape)
    nfinalMerge = nfinalMerge.append(df)
#     print("next")

In [None]:
nfinalMerge['diagnosis'].value_counts()

In [None]:
path = "../../eICU/training/"
nfinalMerge.to_csv(path + "finalData.csv", sep=',', index=False, encoding='utf-8')