In [None]:
import numpy as np
import pandas as pd
import os, pickle, sys
from scipy import signal
import vitaldb

### input 설정
SRATE = 250 # 250Hz
LEN_INPUT = 20 # input 10s
LEN_PER_NRS = 60 # vital length for each NRS
OVERLAP = 2
n_aug = int((LEN_PER_NRS-LEN_INPUT)/OVERLAP) + 1 # data augmentation 개수


# peak path
vital_path = '../../cranberry2/Preprocessing/vital_data/PPG_100Hz_ECG_100Hz_pacu_2min/'
ecg_path = '../../cranberry2/Preprocessing/vital_data/ECG_250Hz_pacu_2min/ECG,'
df_preprocess_pacu = pickle.load(open('../DL_model/cache/preprocess3/input3/df_preprocess_pacu','rb'))
df_preprocess_pacu['file_path_vital']=[f_name.split('.csv')[0]+".vital" for f_name in df_preprocess_pacu['file_path'].values.flatten()]

### test set에 해당하는 file_path
caseid_test = pickle.load(open('../DL_model/caseid_test', 'rb'))
caseid_train = pickle.load(open('../DL_model/caseid_train', 'rb'))
caseid_val = pickle.load(open('../DL_model/caseid_val', 'rb'))


df_MBP = pd.DataFrame(columns=['file_path']+[str(i) for i in range(31,52)])
x_test_MBP_pacu, y_test_MBP_pacu = [], []
x_MBP_test_pacu = []

cnt = 0
for _, row in df_preprocess_pacu.iterrows():
    # 해당 caseid가 test set에 속하는 경우
    if row['caseids'] in caseid_test:
        cnt += 1
        print('loading data {}/{} ...'.format(cnt, '726'), end='')

        df_MBP.loc[cnt-1,'file_path'] = row['file_path']
        
        # MBP
        df_MBP = pickle.load(open('../../cranberry2/Preprocessing/vital_data/NIBP_250Hz_pacu_2min_pickle/'+row['file_path_vital'],'rb')).reset_index()
        MBP_samp = df_MBP['NIBP'].tolist()
        MBP_per_NRS = np.full(30000,np.nan)
        MBP_per_NRS[0:len(MBP_samp)] = MBP_samp
        
        # vital data - PPG (resampling 100 Hz to 250 Hz)
        df_vital = pickle.load(open(vital_path+'/'+row['file_path'], 'rb')).reset_index()
        pleth_samp = df_vital[['Pleth']].fillna(method='ffill', axis=0).fillna(method='bfill', axis=0).values.flatten()
        pleth_resamp = signal.resample(pleth_samp, 120*SRATE)
        ppg_per_NRS = np.full(30000, np.nan)
        ppg_per_NRS[0:len(pleth_resamp)] = pleth_resamp

        # vital data - ECG (250Hz)
        ecg_samp = pickle.load(open(ecg_path+row['file_path'][:-3]+'vital', 'rb')).reset_index()[['ECG']]
        ecg_samp = ecg_samp.fillna(method='ffill', axis=0).fillna(method='bfill', axis=0).values.flatten()[0:30000]
        ecg_per_NRS = np.full(30000,np.nan)
        ecg_per_NRS[0:len(ecg_samp)] = ecg_samp


        # 한 NRS에 대해 augmentated된 input 확인
        for i in range(30,51):
            # input이 전처리 통과한 경우
            if row[str(i+1)]:
                start_idx = i*OVERLAP*SRATE # 500i
                end_idx = (i*OVERLAP+LEN_INPUT)*SRATE # 500i + 1000

                # MBP 계산
                inp_MBP = MBP_per_NRS[start_idx:end_idx]
                MBP_mean = np.mean(inp_MBP)

                # input의 normalization
                pleth_inp = ppg_per_NRS[start_idx:end_idx]
                pleth_inp = pleth_inp - np.nanmean(pleth_inp)

                ecg_inp = ecg_per_NRS[start_idx:end_idx]
                ecg_inp = (ecg_inp - np.nanmean(ecg_inp)) / np.nanstd(ecg_inp)

                if MBP_mean > 0:                                
                    MBP= MBP_mean
                    df_MBP.loc[cnt-1, str(i+1)] = MBP

                    x_test_MBP_pacu.append(MBP)
                    y_test_MBP_pacu.append(float(row['file_path'].split(',')[0]))            
                    x_MBP_test_pacu.append([pleth_inp, ecg_inp])

                    
        print('completed')

