## Import modules and data

In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
filename = '../data/original/all_hourly_data.h5'
df_vitals = pd.read_hdf(filename, 'vitals_labs_mean')
df_inter = pd.read_hdf(filename, 'interventions')

In [3]:
multi_to_patid = pd.read_csv('../data/processed/multi_to_patid.csv')
multi_to_patid.set_index(keys=['subject_id','hadm_id','icustay_id'], inplace=True)

## Prepare vitals

In [4]:
df_vitals.columns = df_vitals.columns.droplevel(level=1)
df_vitals = df_vitals.join(multi_to_patid)
df_vitals.reset_index(inplace=True)
df_vitals.set_index(['pat_id','hours_in'], inplace=True)
df_vitals.drop(['subject_id','hadm_id','icustay_id'], axis=1, inplace=True)

### Interpolate Null Values

In [5]:
df_vitals = df_vitals.groupby(level=0).apply(lambda pat: pat.interpolate(method='linear', axis=0, limit_direction='both'))
df_vitals = df_vitals.apply(lambda col: col.fillna(col.mean()), axis=0)

## Prepare interventions

In [6]:
df_inter = df_inter.join(multi_to_patid)
df_inter.reset_index(inplace=True)
df_inter.set_index(['pat_id','hours_in'], inplace=True)
df_inter.drop(['subject_id','hadm_id','icustay_id'], axis=1, inplace=True)

### Split & Save Results

In [7]:
processed_dir = '../data/processed/'
csv_vitals_filename = 'vitals.csv'
csv_inters_filename = 'interventions.csv'

In [8]:
train_idxs = np.load(os.path.join(processed_dir, 'train_idxs.npy'))
test_idxs = np.load(os.path.join(processed_dir, 'test_idxs.npy'))

In [9]:
df_vit_train = df_vitals.loc[train_idxs].copy()
df_vit_test = df_vitals.loc[test_idxs].copy()

In [10]:
df_int_train = df_inter.loc[train_idxs].copy()
df_int_test = df_inter.loc[test_idxs].copy()

In [15]:
df_vit_train.to_csv(os.path.join(processed_dir, 'train/', csv_vitals_filename))

In [17]:
df_vit_test.to_csv(os.path.join(processed_dir, 'test/', csv_vitals_filename))

In [18]:
df_int_train.to_csv(os.path.join(processed_dir, 'train/', csv_inters_filename))
df_int_test.to_csv(os.path.join(processed_dir, 'test/', csv_inters_filename))