In [20]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [21]:
import pandas as pd
# import numpy as np

In [22]:
database_type = "/training"

The `patient` table includes general information about the patient admissions (for example, demographics, admission and discharge details). 
See: http://eicu-crd.mit.edu/eicutables/patient/

In [23]:
# loads patient table
patient = pd.read_csv('../eICU' + database_type + '/patient.csv', nrows=999999) 
patient = patient[['patientunitstayid', 'admissionweight']].set_index('patientunitstayid')
patient

Unnamed: 0_level_0,admissionweight
patientunitstayid,Unnamed: 1_level_1
141168,84.3
141178,54.4
141179,176.4
141194,73.9
141196,194.7
...,...
3353235,90.0
3353237,78.4
3353251,102.0
3353254,83.9


The `vitalperiodic` table comprises data that is consistently interfaced from bedside vital signs monitors into eCareManager. 
Data are generally interfaced as 1 minute averages, and archived into the `vitalperiodic` table as 5 minute median values. 
For more detail, see: http://eicu-crd.mit.edu/eicutables/vitalPeriodic/

In [24]:
vitalperiodic = pd.read_csv('../eICU' + database_type + '/vitalPeriodic.csv', nrows=999999)
columns = ['patientunitstayid', 'observationoffset', 'temperature', 'heartrate', 'respiration', 'systemicsystolic']
vitalperiodic = vitalperiodic[columns]
vitalperiodic = vitalperiodic.sort_values(by='observationoffset')
vitalperiodic

Unnamed: 0,patientunitstayid,observationoffset,temperature,heartrate,respiration,systemicsystolic
653065,148349,-1445,,60.0,20.0,
653071,148349,-1440,,60.0,21.0,
790455,150049,-1435,,96.0,19.0,
479374,146418,-1435,,71.0,23.0,
653145,148349,-1435,,60.0,21.0,
...,...,...,...,...,...,...
273524,144297,71778,,72.0,33.0,
273140,144297,71783,,72.0,27.0,
273956,144297,71788,,72.0,34.0,
275114,144297,71793,,72.0,33.0,


The `lab` table provides Laboratory tests that have have been mapped to a standard set of measurements. 
Unmapped measurements are recorded in the customLab table.
See: http://eicu-crd.mit.edu/eicutables/lab/

In [51]:
finallab = pd.read_csv('../eICU' + database_type + '/lab.csv', nrows=999999)

In [52]:
intakeOutputUrine = pd.read_csv('../eICU' + database_type + '/intakeOutputUrine.csv', nrows=999999)

In [53]:
intakeOutputUrine

Unnamed: 0,patientunitstayid,intakeoutputoffset,urineoutputbyweight
0,141179,1420,3.968254
1,141179,933,2.834467
2,141179,1703,5.668934
3,141179,678,3.968254
4,141194,12201,1.353180
...,...,...,...
999994,1105948,150,1.557632
999995,1105948,1230,0.778816
999996,1105948,1170,2.336449
999997,1105948,510,3.894081


In [54]:
# Merging Final Training Table
merge1 = pd.merge(vitalperiodic, finallab,  how='outer', left_on=['patientunitstayid','observationoffset'], right_on = ['patientunitstayid','labresultoffset'])
finalMerge = pd.merge(merge1, intakeOutputUrine,  how='outer', left_on=['patientunitstayid','observationoffset'], right_on = ['patientunitstayid','intakeoutputoffset'])

finalMerge = finalMerge[['patientunitstayid', 'observationoffset', 'labresultoffset', 'intakeoutputoffset', 'temperature', 'heartrate', 'respiration', 'systemicsystolic', 'creatinine', 'wbcx1000', 'lactate', 'urineoutputbyweight']]
finalMerge = finalMerge.sort_values(by=['patientunitstayid', 'observationoffset'])
finalMerge


Unnamed: 0,patientunitstayid,observationoffset,labresultoffset,intakeoutputoffset,temperature,heartrate,respiration,systemicsystolic,creatinine,wbcx1000,lactate,urineoutputbyweight
34160,141168,119.0,,,,140.0,,,,,,
35433,141168,124.0,,,,140.0,,,,,,
37167,141168,129.0,,,,140.0,,,,,,
38458,141168,134.0,,,,140.0,,,,,,
40027,141168,139.0,,,,140.0,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
1999231,2848042,,7069.0,,,,,,0.83,,,
1999232,2848042,,8294.0,,,,,,0.72,,,
1999233,2848042,,5564.0,,,,,,0.61,,,
1999234,2848042,,2040.0,,,,,,0.69,8.4,,


In [55]:
finalMerge['observationoffset'] = finalMerge.apply(lambda x: x['observationoffset'] if pd.notna(x['observationoffset']) else (x['labresultoffset'] if pd.notna(x['labresultoffset']) else x['intakeoutputoffset']), axis = 1)
finalMerge

Unnamed: 0,patientunitstayid,observationoffset,labresultoffset,intakeoutputoffset,temperature,heartrate,respiration,systemicsystolic,creatinine,wbcx1000,lactate,urineoutputbyweight
34160,141168,119.0,,,,140.0,,,,,,
35433,141168,124.0,,,,140.0,,,,,,
37167,141168,129.0,,,,140.0,,,,,,
38458,141168,134.0,,,,140.0,,,,,,
40027,141168,139.0,,,,140.0,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
1999231,2848042,7069.0,7069.0,,,,,,0.83,,,
1999232,2848042,8294.0,8294.0,,,,,,0.72,,,
1999233,2848042,5564.0,5564.0,,,,,,0.61,,,
1999234,2848042,2040.0,2040.0,,,,,,0.69,8.4,,


In [56]:
finalMerge = finalMerge.drop(['labresultoffset', 'intakeoutputoffset'], axis = 1)
finalMerge

Unnamed: 0,patientunitstayid,observationoffset,temperature,heartrate,respiration,systemicsystolic,creatinine,wbcx1000,lactate,urineoutputbyweight
34160,141168,119.0,,140.0,,,,,,
35433,141168,124.0,,140.0,,,,,,
37167,141168,129.0,,140.0,,,,,,
38458,141168,134.0,,140.0,,,,,,
40027,141168,139.0,,140.0,,,,,,
...,...,...,...,...,...,...,...,...,...,...
1999231,2848042,7069.0,,,,,0.83,,,
1999232,2848042,8294.0,,,,,0.72,,,
1999233,2848042,5564.0,,,,,0.61,,,
1999234,2848042,2040.0,,,,,0.69,8.4,,


In [None]:
# vitalperiodic = vitalperiodic.sort_values(by=['observationoffset'])
# finallab = finallab.sort_values(by=['labresultoffset'])
# intakeOutputUrine = intakeOutputUrine.sort_values(by=['intakeoutputoffset'])
# merge1 = pd.merge_asof(vitalperiodic, finallab,  by='patientunitstayid', left_on=['observationoffset'], right_on = ['labresultoffset'], direction='nearest')
# finalMerge = pd.merge_asof(merge1, intakeOutputUrine,  by='patientunitstayid', left_on=['observationoffset'], right_on = ['intakeoutputoffset'], direction='nearest')

# finalMerge = finalMerge[['patientunitstayid', 'observationoffset', 'labresultoffset', 'intakeoutputoffset', 'temperature', 'heartrate', 'respiration', 'systemicsystolic', 'creatinine', 'wbcx1000', 'lactate', 'urineoutputbyweight']]
# finalMerge = finalMerge.sort_values(by=['patientunitstayid', 'observationoffset'])
# finalMerge

In [57]:
idList = sorted(set(list(finalMerge['patientunitstayid'])))

In [58]:
ff = finalMerge.copy()
nfinalMerge = pd.DataFrame()

In [59]:
for id in idList[:51]:
    df = ff[ff['patientunitstayid'] == id]
    df.loc[:, 'temperature'] = df['temperature'].interpolate(method='linear', limit_direction='both')
    df.loc[:, 'heartrate'] = df['heartrate'].interpolate(method='linear', limit_direction='both')
    df.loc[:, 'respiration'] = df['respiration'].interpolate(method='linear', limit_direction='both')
    df.loc[:, 'systemicsystolic'] = df['systemicsystolic'].interpolate(method='linear', limit_direction='both')
    df.loc[:, 'creatinine'] = df['creatinine'].interpolate(method='linear', limit_direction='both')
    df.loc[:, 'wbcx1000'] = df['wbcx1000'].interpolate(method='linear', limit_direction='both')
    df.loc[:, 'lactate'] = df['lactate'].interpolate(method='linear', limit_direction='both')
    df.loc[:, 'urineoutputbyweight'] = df['urineoutputbyweight'].interpolate(method='linear', limit_direction='both')
    print(df)
    print(df['lactate'].interpolate(method='linear', limit_direction='both').shape)
    nfinalMerge = nfinalMerge.append(df)
    print("next")    

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


         patientunitstayid  observationoffset  temperature  heartrate  \
34160               141168              119.0          NaN      140.0   
35433               141168              124.0          NaN      140.0   
37167               141168              129.0          NaN      140.0   
38458               141168              134.0          NaN      140.0   
40027               141168              139.0          NaN      140.0   
...                    ...                ...          ...        ...   
496538              141168             2314.0          NaN      100.0   
1000002             141168             1701.0          NaN      100.0   
1000003             141168             2026.0          NaN      100.0   
1000004             141168             1133.0          NaN      100.0   
1000005             141168              516.0          NaN      100.0   

         respiration  systemicsystolic  creatinine  wbcx1000  lactate  \
34160            NaN              40.0      173.12

In [60]:
# groupeddf = finalMerge['temperature'].groupby('patientunitstayid')
# for i in groupeddf['patientunitstayid', 'temperature']:
#     if(i[0] == ['patientunitstayid', 'temperature']):    
#         print(i)
# #     print("next group")

In [61]:
nfinalMerge

Unnamed: 0,patientunitstayid,observationoffset,temperature,heartrate,respiration,systemicsystolic,creatinine,wbcx1000,lactate,urineoutputbyweight
34160,141168,119.0,,140.0,,40.0,173.12,19.8,12.2,
35433,141168,124.0,,140.0,,40.0,173.12,19.8,12.2,
37167,141168,129.0,,140.0,,40.0,173.12,19.8,12.2,
38458,141168,134.0,,140.0,,40.0,173.12,19.8,12.2,
40027,141168,139.0,,140.0,,40.0,173.12,19.8,12.2,
...,...,...,...,...,...,...,...,...,...,...
508268,141556,2405.0,,70.0,17.0,,,,,
508959,141556,2410.0,,68.0,17.0,,,,,
509539,141556,2415.0,,68.0,18.0,,,,,
510150,141556,2420.0,,0.0,17.0,,,,,
