In [1]:
import pandas as pd
pd.options.display.max_columns = None

In [2]:
df = pd.read_csv('https://api.vitaldb.net/trks')
df.head()

Unnamed: 0,caseid,tname,tid
0,1,BIS/BIS,fd869e25ba82a66cc95b38ed47110bf4f14bb368
1,1,BIS/EEG1_WAV,0aa685df768489a18a5e9f53af0d83bf60890c73
2,1,BIS/EEG2_WAV,ad13b2c39b19193c8ae4a2de4f8315f18d61a57e
3,1,BIS/EMG,2525603efe18d982764dbca457affe7a45e766a9
4,1,BIS/SEF,1c91aec859304840dec75acf4a35da78be0e8ef0


In [3]:
df['tname'].value_counts()

Solar8000/HR                6388
Solar8000/PLETH_SPO2        6387
Solar8000/PLETH_HR          6387
Primus/CO2                  6363
Primus/SET_AGE              6362
                            ... 
EV1000/SQI                     1
Solar8000/ST_V5                1
Vigileo/ART_MBP                1
CardioQ/SVR                    1
Solar8000/VENT_FLOW_TRIG       1
Name: tname, Length: 208, dtype: int64

In [4]:
art_set = set(df[df['tname'].isin(['Solar8000/ART_SBP'])]['caseid'])
nibp_set = set(df[df['tname'].isin(['Solar8000/NIBP_SBP'])]['caseid'])

In [5]:
len(art_set & nibp_set)

3096

### SBP

In [6]:
print('ART_SBP : ', df['tname'].isin(['Solar8000/ART_SBP']).sum())
print('NIBP_SBP : ', df['tname'].isin(['Solar8000/NIBP_SBP']).sum())

ART_SBP :  3725
NIBP_SBP :  5752


* ART와 NIBP가 같이 있는 경우에는 다시 ART를 우선시해서

### MBP

In [7]:
print('ART_MBP : ', df['tname'].isin(['Solar8000/ART_SBP']).sum())
print('NIBP_MBP : ', df['tname'].isin(['Solar8000/NIBP_SBP']).sum())

ART_MBP :  3725
NIBP_MBP :  5752


### DBP

In [8]:
print('ART_DBP : ', df['tname'].isin(['Solar8000/ART_DBP']).sum())
print('NIBP_DBP : ', df['tname'].isin(['Solar8000/NIBP_DBP']).sum())

ART_DBP :  3725
NIBP_DBP :  5752


### HR

In [9]:
print('Solar8000/HR : ', df['tname'].isin(['Solar8000/HR']).sum())
print('Solar8000/PLETH_HR : ', df['tname'].isin(['Solar8000/PLETH_HR']).sum())
print('Vigilance/HR_AVG : ', df['tname'].isin(['Vigilance/HR_AVG']).sum())
print('CardioQ/HR : ', df['tname'].isin(['CardioQ/HR']).sum())

Solar8000/HR :  6388
Solar8000/PLETH_HR :  6387
Vigilance/HR_AVG :  53
CardioQ/HR :  29


### BT

In [10]:
print('Solar8000/BT1 : ', df['tname'].isin(['Solar8000/BT1']).sum())
print('Solar8000/BT2 : ', df['tname'].isin(['Solar8000/BT2']).sum())

Solar8000/BT1 :  5889
Solar8000/BT2 :  864


### SpO2

In [11]:
print('Solar8000/PLETH_SPO2: ', df['tname'].isin(['Solar8000/PLETH_SPO2']).sum())

Solar8000/PLETH_SPO2:  6387


### EtCO2

In [12]:
print('Solar8000/ETCO2: ', df['tname'].isin(['Solar8000/ETCO2']).sum())
print('Primus/ETCO2: ', df['tname'].isin(['Primus/ETCO2']).sum())

Solar8000/ETCO2:  6243
Primus/ETCO2:  6340


### FiO2

In [13]:
print('Solar8000/FIO2: ', df['tname'].isin(['Solar8000/FIO2']).sum())
print('Solar8000/VENT_SET_FIO2: ', df['tname'].isin(['Solar8000/VENT_SET_FIO2']).sum())
print('Primus/FIO2: ', df['tname'].isin(['Primus/FIO2']).sum())
print('Primus/SET_FIO2: ', df['tname'].isin(['Primus/SET_FIO2']).sum())

Solar8000/FIO2:  6240
Solar8000/VENT_SET_FIO2:  2213
Primus/FIO2:  6336
Primus/SET_FIO2:  0


### STII

In [14]:
print('Solar8000/ST_II: ', df['tname'].isin(['Solar8000/ST_II']).sum())

Solar8000/ST_II:  5979


In [15]:
df[df['tname'] == 'Solar8000/HR']

Unnamed: 0,caseid,tname,tid
62,1,Solar8000/HR,6326f61f2b89f8afb550c102fd1b9c9e44249fe0
149,2,Solar8000/HR,6a4d5f0bfe719c853f927c4443d1e191235091d9
234,3,Solar8000/HR,32412e5c00538be1b90caa7afd9356ab3f958210
325,4,Solar8000/HR,e7b604a97b88f410b52e05f88182dd7861b645d8
422,5,Solar8000/HR,70bb5af22bd71a852ae47543ae12daad54090361
...,...,...,...
536745,6384,Solar8000/HR,19e25a739f102aa582b664fa6638534d1e1039c1
536828,6385,Solar8000/HR,b7b299e885c89876a5fd0d2dace0a1da96611adc
536910,6386,Solar8000/HR,7df4cf17c3d605f9df251722fc53158de3eb44f6
536994,6387,Solar8000/HR,0658a0b6a670fc1c75ed3966678e964a513bb5fb


In [16]:
tmp = pd.read_csv('https://api.vitaldb.net/' + '6326f61f2b89f8afb550c102fd1b9c9e44249fe0')
tmp['Time_diff'] = tmp['Time'] - tmp['Time'].tail(1).values[0]

## Get Data

In [20]:
def load_data(data, track_nm):
    tmp = pd.read_csv('https://api.vitaldb.net/' + data['tid'].values[0])
    tmp['Time_diff'] = tmp['Time'] - tmp['Time'].tail(1).values[0]
    return pd.DataFrame([tmp[(tmp['Time_diff'] < (-60*30)) & (tmp['Time_diff'] > (-60*60))][track_nm].mean(), tmp[(tmp['Time_diff'] < (-60*30)) & (tmp['Time_diff'] > (-60*60))][track_nm].std()])

#### for loop..
from tqdm.notebook import tqdm
tqdm.pandas()
df[df['tname'].isin(['Solar8000/BT1'])][0:10].groupby(['caseid']).progress_apply(lambda x: load_data(x, 'Solar8000/BT1')).reset_index().rename(columns={'level_1':'cal_type',0:'Value'})

In [21]:
track_list = [   
              ## HR
              'Solar8000/HR', 
              ## BT
              'Solar8000/BT1', 
              ## SPO2
              'Solar8000/PLETH_SPO2',
              ## FIO2
              'Solar8000/FIO2', 
              ## ST_II
              'Solar8000/ST_II', 
              ## ETCO2
              'Solar8000/ETCO2', 
              ## SBP
              'Solar8000/ART_SBP', 'Solar8000/NIBP_SBP',
              ## DBP
              'Solar8000/NIBP_DBP', 'Solar8000/ART_DBP',
              ## MBP
              'Solar8000/NIBP_MBP', 'Solar8000/ART_MBP'
             ]

In [None]:
from tqdm.notebook import tqdm

for track_nm in track_list:
    tqdm.pandas()
    df[df['tname'].isin([track_nm])].groupby(['caseid'])\
    .progress_apply(lambda x: load_data(x, track_nm=track_nm))\
    .reset_index().rename(columns={'level_1':'cal_type',0:'Value'})\
    .to_csv('./{}.csv'.format(track_nm.split('/')[1]), index=False)

HBox(children=(FloatProgress(value=0.0, max=6387.0), HTML(value='')))

tmp = pd.DataFrame()
for track_nm in track_list:
    track_nm = track_nm.split('/')[1]
    df_tmp = pd.read_csv('./{}.csv'.format(track_nm))
    tmp = pd.pivot_table(data=df_tmp, values='Value', index='caseid', columns='cal_type')
    tmp.columns.name = None
    tmp.reset_index().rename(columns={0:'mean_{}'.format(track_nm),1:'std_{}'.format(track_nm)}).to_csv('./{}_pivot.csv'.format(track_nm), index=False)

In [None]:
tmp = pd.DataFrame()
for i, track_nm in enumerate(track_list):
    track_nm = track_nm.split('/')[1]
    df_tmp = pd.read_csv('./{}.csv'.format(track_nm))
    df_tmp = pd.pivot_table(data=df_tmp, values='Value', index='caseid', columns='cal_type')
    df_tmp.columns.name = None
    df_tmp = df_tmp.reset_index().rename(columns={0:'mean_{}'.format(track_nm),1:'std_{}'.format(track_nm)})
    if i == 0:
        tmp = tmp.append(df_tmp)
    else:
        tmp = pd.merge(tmp, df_tmp, how='outer')

In [None]:
tmp.head()

In [None]:
bp_list = ['DBP', 'SBP', 'MBP']
for bp_type in bp_list:
    tmp['mean_' + bp_type] = tmp['mean_ART_' + bp_type]
    tmp.loc[tmp['mean_' + bp_type].isnull(), 'mean_' + bp_type] = tmp.loc[tmp['mean_' + bp_type].isnull(), 'mean_NIBP_' + bp_type]
    tmp['std_' + bp_type] = tmp['std_ART_' + bp_type]
    tmp.loc[tmp['std_' + bp_type].isnull(), 'std_' + bp_type] = tmp.loc[tmp['std_' + bp_type].isnull(), 'std_NIBP_' + bp_type]

In [None]:
#tmp['mean_DBP'] = tmp['mean_ART_DBP']
#tmp.loc[tmp['mean_DBP'].isnull(), 'mean_DBP'] = tmp.loc[tmp['mean_DBP'].isnull(), 'mean_NIBP_DBP']
#tmp['std_DBP'] = tmp['std_ART_DBP']
#tmp.loc[tmp['std_DBP'].isnull(), 'std_DBP'] = tmp.loc[tmp['std_DBP'].isnull(), 'std_NIBP_DBP']

#tmp['mean_MBP'] = tmp['mean_ART_MBP']
#tmp.loc[tmp['mean_MBP'].isnull(), 'mean_MBP'] = tmp.loc[tmp['mean_MBP'].isnull(), 'mean_NIBP_MBP']
#tmp['std_MBP'] = tmp['std_ART_MBP']
#tmp.loc[tmp['std_MBP'].isnull(), 'std_MBP'] = tmp.loc[tmp['std_MBP'].isnull(), 'std_NIBP_MBP']

#tmp['mean_SBP'] = tmp['mean_ART_SBP']
#tmp.loc[tmp['mean_SBP'].isnull(), 'mean_SBP'] = tmp.loc[tmp['mean_SBP'].isnull(), 'mean_NIBP_SBP']
#tmp['std_SBP'] = tmp['std_ART_SBP']
#tmp.loc[tmp['std_SBP'].isnull(), 'std_SBP'] = tmp.loc[tmp['std_SBP'].isnull(), 'std_NIBP_SBP']

In [None]:
tmp.to_csv('./data_intraop.csv', index=False)

In [None]:
df = pd.read_csv('./data_preop_preprocessed.csv')
df.head()

In [None]:
df_final = pd.merge(df, tmp, how='left', on='caseid')
df_final.head()

In [None]:
import numpy as np
df_final['preop_pft'] = np.where(df_final['preop_pft'].str.contains('Normal'), 0, 1)

In [None]:
df_final.to_csv('./data_preprocessed.csv', index=False)