In [1]:
!mkdir -p data
!mkdir -p data_input

## Extract the same vital sign features for WSRT vitals from flowsheet

In [2]:
import pandas as pd
import numpy as np

In [3]:
from tqdm import tqdm
tqdm.pandas()

  from pandas import Panel


In [4]:
df_enc = pd.read_csv('./data_win/enc.csv').set_index('hosp_id')
df_pop = pd.read_csv('./data_win/windows.csv').set_index('hosp_id')

In [5]:
df = pd.read_pickle('./data_win/flow-2020_0701-win.p')

In [6]:
vitals = {
    307792: 'temperature',
    307791:'heartrate',
    307788:'respiratoryrate',
    307834:'sbp',
    307836:'dbp',
    307927:'spo2',
    355405:'gcs',
    315753 :'rass',

    #add 309233 and 307793
    309233:'sbp&dbp',
    307793:'sbp&dbp',
}

In [None]:
df

In [8]:
df_out = df[df['observationtermid'].isin(vitals)]

In [None]:
df_out

In [10]:
# remove duplicated records
df_out = df_out.drop_duplicates(['hosp_id', 't', 'observationtermid'], keep='first')

In [11]:
df_pivoted = df_out.set_index(['hosp_id', 'window_id', 'window_t', "t", 'raw_t', 'observationtermid']).unstack()

In [12]:
df_pivoted.columns = df_pivoted.columns.droplevel(0)

In [13]:
# Clean up blood pressure measurements
df_pivoted[309233] = df_pivoted[309233].str.split('/').values
df_pivoted[307793] = df_pivoted[307793].str.split('/').values

In [14]:
def clean_up_sbp(row):
    if type(row[307793]) is list:
        return float(row[307793][0])
    elif type(row[309233]) is list:
        return float(row[309233][0])
    else:
        return float(row[307834])

def clean_up_dbp(row):
    if type(row[307793]) is list:
        return float(row[307793][1])
    elif type(row[309233]) is list:
        return float(row[309233][1])
    else:
        return float(row[307836])

In [15]:
# SBP
df_pivoted[307834] = df_pivoted[[307834, 309233, 307793]].progress_apply(clean_up_sbp, axis=1)
# DBP
df_pivoted[307836] = df_pivoted[[307836, 309233, 307793]].progress_apply(clean_up_dbp, axis=1)

100%|██████████| 1560098/1560098 [08:07<00:00, 3201.90it/s]
100%|██████████| 1560098/1560098 [07:39<00:00, 3393.12it/s]


In [16]:
df_pivoted = df_pivoted.drop(columns=[309233, 307793]).rename(columns=vitals)

In [None]:
df_pivoted

In [18]:
df_pivoted.to_pickle('data/vitals-wide.p')

### Format IDs

In [19]:
df_enc = pd.read_csv('./data_win/enc.csv').set_index('hosp_id')
df_win = pd.read_csv('./data_win/windows_map.csv')

In [20]:
df_pivoted.columns.name = 'observationtermid'

In [21]:
df_vitals = df_pivoted.stack().rename('value').reset_index()

In [None]:
df_vitals

In [23]:
df_vitals_ID = df_vitals.set_index(['hosp_id', 'window_id']).join(df_win.set_index(['hosp_id', 'window_id'])[['ID']]).reset_index()

In [24]:
df_vitals_ID.to_pickle('data_input/vitals_ID.p')

In [25]:
df = df_vitals_ID[['ID', 'window_t', 'observationtermid', 'value']] \
.rename(columns={'window_t': 't', 'observationtermid': 'variable_name', 'value': 'variable_value'})

In [26]:
df.to_pickle('data_input/vitals.p')

In [None]:
df