In [1]:
!mkdir -p data
!mkdir -p data_input

## Extract the same vital sign features for WSRT vitals from flowsheet

In [2]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt 

In [3]:
from tqdm import tqdm
tqdm.pandas()

  from pandas import Panel


# Train 

In [4]:
vitals = {
    307788:'respiratoryrate',
    307927:'spo2',
}

# min - 21, max = 100 
def calc_fio2(val):
    return max(21, min(100, 24 + (float(val) - 1) * 4))

def get_vitals_baseline(df):
    
    # repirate and spo02 
    other_vitals = df[df['observationtermid'].isin(vitals)]
    
    # fio02 directly from data 
    fio2_orig = df[df['observationtermid'].isin([307996, 308615])]

    # fio2 calculated 
    fio2_row = df[df['observationtermid'] == 307928]
    # exclude those with fio2_cols value < 0.5, because this converts to < 21 which is not possible? 
    fio2_row = fio2_row[fio2_row['result'].astype(float) >= 0.5]
    
    assert(min(fio2_row['result'].astype(float)) == 0.5)
    fio2_row['result'] = fio2_row['result'].apply(calc_fio2)
    assert(np.min(fio2_row['result']) == 22)
    room_air = df[(df['observationtermid'] == 307923) & (df['result'] == 'None (Room air)')]
    room_air['result'] = 21
    
    df = pd.concat([fio2_orig, fio2_row, room_air, other_vitals])
    return df 

In [5]:
# test
df = pd.read_pickle('data_win/flow-2020_0701-win.p')
test = get_vitals_baseline(df)

In [6]:
# remove duplicated records 
test = test.drop_duplicates(['hosp_id', 't', 'observationtermid'], keep='first')

In [7]:
# concatenate and finish before running through FIDDLE 
df_out = pd.concat([test])

In [8]:
df_pivoted = df_out.set_index(['hosp_id', 'window_id', 'window_t', "t", 'raw_t', 'observationtermid']).unstack()

In [9]:
df_pivoted.columns = df_pivoted.columns.droplevel(0)

In [10]:
df_pivoted.to_pickle('data/4baseline-wide.p')

### Format IDs

In [11]:
df_pivoted = pd.read_pickle('data/4baseline-wide.p')

In [12]:
# get ID column from windows_map 
win_test = pd.read_csv('data_win/windows_map.csv')
enc_test = pd.read_csv('data_win/enc.csv')

In [13]:
# only grab age from encounters
df_enc = pd.concat([enc_test])[["age"]]
df_win = pd.concat([win_test])

In [14]:
df_pivoted.columns.name = 'observationtermid'

In [15]:
df_vitals = df_pivoted.stack().rename('value').reset_index()

In [16]:
df_win.set_index(['hosp_id', 'window_id'])

Unnamed: 0_level_0,Unnamed: 1_level_0,window_start,window_end,ID
hosp_id,window_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
63990,0,0,240,63990-____0
63990,1,240,480,63990-____1
63990,2,480,720,63990-____2
63990,3,720,960,63990-____3
63990,4,960,1200,63990-____4
...,...,...,...,...
74168,303,72720,72960,74168-__303
74168,304,72960,73200,74168-__304
74168,305,73200,73440,74168-__305
74168,306,73440,73680,74168-__306


In [17]:
df_vitals_ID = df_vitals.set_index(['hosp_id', 'window_id']).join(df_win.set_index(['hosp_id', 'window_id'])[['ID']]).reset_index()

In [18]:
df_vitals_ID.to_pickle('data_input/4baseline_ID.p')

In [19]:
df = df_vitals_ID[['ID', 'window_t', 'observationtermid', 'value']] \
.rename(columns={'window_t': 't', 'observationtermid': 'variable_name', 'value': 'variable_value'})

In [20]:
df.variable_name[df.variable_name.isin([307923, 307996, 308615])] = 307928

vitals = {
    307788:'respiratoryrate',
    307927:'spo2',
    307928: 'fio2'
}


for val, name in vitals.items():
    df.variable_name[df.variable_name == val] = name


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()


In [21]:
df.to_pickle('data_input/4baseline.p')

## Apply FIDDLE

In [24]:
!mkdir out_4baseline

In [25]:
!python FIDDLE-transform-4baseline.py

Input data file: ./data_input/4baseline.p

Input arguments:
    T      = 240
    dt     = 240.0
    θ₁     = 0.001
    θ₂     = 0.001
    θ_freq = 1.0
    k      = 3 ['min', 'max', 'mean']
binarize = yes

N = 459902
L = 1

Done unstacking
Done reindexing
Time elapsed: 2.717582 seconds
Done! Time elapsed: 3.247810 seconds


# Load back in 

In [26]:
import joblib 
df = joblib.load("out_4baseline/sdf.joblib")

In [None]:
df