In [1]:
import statsmodels.api as sm


def smooth(y):
    #return savitzky_golay(y, window_size=2001, order=3)
    return lowess(y)

# 0.2가 제일 잘 없앴음
def lowess(y, f=0.2):
    x = np.arange(0, len(y))
    return sm.nonparametric.lowess(y, x, frac=f, it=0)[:, 1].T

import numpy as np
import pandas as pd
import os, pickle, sys
from scipy import signal
#sys.path.append('DL_model')


### input 설정
SRATE = 250 # 250Hz
LEN_INPUT = 20 # input 10s
LEN_PER_NRS = 60 # vital length for each NRS
OVERLAP = 2
n_aug = int((LEN_PER_NRS-LEN_INPUT)/OVERLAP) + 1 # data augmentation 개수




input_path = '../DL_model/dataset/preprocess4/input3/'
if not os.path.exists(input_path[:-1]):
    os.mkdir(input_path[:-1])

if os.path.exists(input_path+'x_train_pacu.npz'):
    print('loading train...', flush=True, end='')
    x_train_pacu = np.load(input_path+'x_train_pacu.npz', allow_pickle=True)['arr_0']
    y_train_pacu = np.load(input_path+'y_train_pacu.npz')['arr_0']
    x_test_pacu = np.load(input_path+'x_test_pacu.npz', allow_pickle=True)['arr_0']
    y_test_pacu = np.load(input_path+'y_test_pacu.npz')['arr_0']
    print('done', flush=True)

else:
    false_row_list_preop = []
    
    vital_path = '../../cranberry2/Preprocessing/vital_data/PPG_100Hz_ECG_100Hz_pacu_2min/'
    ecg_path = '../../cranberry2/Preprocessing/vital_data/ECG_250Hz_pacu_2min/ECG,'
    df_preprocess_pacu = pickle.load(open('../DL_model/cache/preprocess3/input3/df_preprocess_pacu_agender','rb'))
    df_demograph = pd.read_csv('../DL_model/caseids_age_gender.csv')
    

    ### test set에 해당하는 file_path
    caseid_test = pickle.load(open('../DL_model/caseid_test_new', 'rb'))
    caseid_train = pickle.load(open('../DL_model/caseid_train_new', 'rb'))
    caseid_val = pickle.load(open('../DL_model/caseid_val_new', 'rb'))
    
    non_lis = []
    x_train_pacu, y_train_pacu = [], []
    x_test_pacu, y_test_pacu = [], []
    x_val_pacu, y_val_pacu = [], []
    age_train_pacu, gender_train_pacu = [], []
    age_test_pacu, gender_test_pacu = [], []
    age_val_pacu, gender_val_pacu = [], []

    cnt = 0
    for _, row in df_preprocess_pacu.iterrows():
        cnt += 1
        if cnt<=3600:
            continue
        if cnt>5400:
            break
            
            
        print('loading data {}/{} ...'.format(cnt, len(df_preprocess_pacu)), end='')
        row_demo = df_demograph[df_demograph['caseids']==row['caseids']].iloc[0]
            

        # vital data - PPG (resampling 100 Hz to 250 Hz)
        df_vital = pickle.load(open(vital_path+row['file_path'], 'rb')).reset_index()
        pleth_samp = df_vital[['Pleth']].fillna(method='ffill', axis=0).fillna(method='bfill', axis=0).values.flatten()
        pleth_resamp = signal.resample(pleth_samp, 120*SRATE)
        ppg_per_NRS = np.full(30000, np.nan)
        ppg_per_NRS[0:len(pleth_resamp)] = pleth_resamp
        

        # vital data - ECG (250Hz)
        ecg_samp = pickle.load(open(ecg_path+row['file_path'][:-3]+'vital', 'rb')).reset_index()[['ECG']]
        ecg_samp = ecg_samp.fillna(method='ffill', axis=0).fillna(method='bfill', axis=0).values.flatten()[0:30000]
        ecg_per_NRS = np.full(30000,np.nan)
        ecg_per_NRS[0:len(ecg_samp)] = ecg_samp
   

        save_path = '../../cranberry2/Preprocessing/cache/lowess_filtered/pacu_'+row['caseids']
        # 한 NRS에 대해 23개의 input 확인
        for i in range(0,30):
            # input이 전처리 통과한 경우
            if row[str(i+1)]:
                start_idx = i*OVERLAP*SRATE # 500i
                end_idx = (i*OVERLAP+LEN_INPUT)*SRATE # 500i + 1000
                
                # input의 normalization
                ppg_inp = np.copy(ppg_per_NRS[start_idx:end_idx])
                if np.sum(np.isnan(ppg_inp))!=0:
                    ppg_inp = pd.DataFrame(ppg_inp).fillna(method='ffill', axis=0).fillna(method='bfill', axis=0).values.flatten()
                
                pleth_inp = ppg_inp - smooth(ppg_inp)
                pleth_inp = pleth_inp - np.nanmean(pleth_inp)

                ecg_inp = np.copy(ecg_per_NRS[start_idx:end_idx])
                if np.sum(np.isnan(ecg_inp))!=0:
                    ecg_inp = pd.DataFrame(ecg_inp).fillna(method='ffill', axis=0).fillna(method='bfill', axis=0).values.flatten()
                
                ecg_inp = ecg_inp - smooth(ecg_inp)
                ecg_inp = (ecg_inp - np.nanmean(ecg_inp)) / np.nanstd(ecg_inp)
                
                
                pickle.dump([pleth_inp, ecg_inp], open(save_path+'_{}'.format(i), 'wb'))
                # 해당 caseid가 test set에 속하는 경우
                if row['caseids'] in caseid_test:
                    age_test_pacu.append(int(row_demo['age']))
                    if row_demo['gender']=='F':
                        gender_test_pacu.append(1)
                    else:
                        gender_test_pacu.append(0)
                    x_test_pacu.append([pleth_inp, ecg_inp])
                    y_test_pacu.append(int(float(row['NRS'])))

                # 해당 caseid가 train set에 해당하는 경우
                elif row['caseids'] in caseid_val:
                    age_val_pacu.append(int(row_demo['age']))
                    if row_demo['gender']=='F':
                        gender_val_pacu.append(1)
                    else:
                        gender_val_pacu.append(0)                    
                    x_val_pacu.append([pleth_inp, ecg_inp])
                    y_val_pacu.append(int(float(row['NRS'])))
                    
                elif row['caseids'] in caseid_train:
                    age_train_pacu.append(int(row_demo['age']))
                    if row_demo['gender']=='F':
                        gender_train_pacu.append(1)
                    else:
                        gender_train_pacu.append(0)                    
                    x_train_pacu.append([pleth_inp, ecg_inp])
                    y_train_pacu.append(int(float(row['NRS'])))
                    
                else:
                    non_lis.append(row['caseids'])
                    
        print('completed')

    x_train_pacu = np.array(x_train_pacu, np.float32)
    x_test_pacu = np.array(x_test_pacu, np.float32)
    y_train_pacu = np.array(y_train_pacu, int)
    y_test_pacu = np.array(y_test_pacu, int)
    x_val_pacu = np.array(x_val_pacu, np.float32)
    y_val_pacu = np.array(y_val_pacu, int)
    
    age_train_pacu = np.array(age_train_pacu, int)
    age_test_pacu = np.array(age_test_pacu, int)
    age_val_pacu = np.array(age_val_pacu, int)
    gender_train_pacu = np.array(gender_train_pacu, int)
    gender_test_pacu = np.array(gender_test_pacu, int)
    gender_val_pacu = np.array(gender_val_pacu, int)
    
        
    # 저장하기
    print('saving...', end='', flush=True)
    np.savez_compressed(input_path+'x_train_pacu3.npz', x_train_pacu)
    np.savez_compressed(input_path+'x_test_pacu3.npz', x_test_pacu)
    np.savez_compressed(input_path+'x_val_pacu3.npz', x_val_pacu)
    np.savez_compressed(input_path+'y_train_pacu3.npz', y_train_pacu)
    np.savez_compressed(input_path+'y_test_pacu3.npz', y_test_pacu)
    np.savez_compressed(input_path+'y_val_pacu3.npz', y_val_pacu)
    
    np.savez_compressed(input_path+'age_train_pacu3.npz', age_train_pacu)
    np.savez_compressed(input_path+'age_test_pacu3.npz', age_test_pacu)
    np.savez_compressed(input_path+'age_val_pacu3.npz', age_val_pacu)    
    np.savez_compressed(input_path+'gender_train_pacu3.npz', gender_train_pacu)
    np.savez_compressed(input_path+'gender_test_pacu3.npz', gender_test_pacu)
    np.savez_compressed(input_path+'gender_val_pacu3.npz', gender_val_pacu)    
    
    print('done', flush=True)

    
    
    
print('size of training set(pacu):', len(x_train_pacu))
print('size of validation set(pacu):', len(x_val_pacu))
print('size of test set(pacu):', len(x_test_pacu))

loading data 3601/7253 ...completed
loading data 3602/7253 ...completed
loading data 3603/7253 ...completed
loading data 3604/7253 ...completed
loading data 3605/7253 ...completed
loading data 3606/7253 ...completed
loading data 3607/7253 ...completed
loading data 3608/7253 ...completed
loading data 3609/7253 ...completed
loading data 3610/7253 ...completed
loading data 3611/7253 ...completed
loading data 3612/7253 ...completed
loading data 3613/7253 ...completed
loading data 3614/7253 ...completed
loading data 3615/7253 ...completed
loading data 3616/7253 ...completed
loading data 3617/7253 ...completed
loading data 3618/7253 ...completed
loading data 3619/7253 ...completed
loading data 3620/7253 ...completed
loading data 3621/7253 ...completed
loading data 3622/7253 ...completed
loading data 3623/7253 ...completed
loading data 3624/7253 ...completed
loading data 3625/7253 ...completed
loading data 3626/7253 ...completed
loading data 3627/7253 ...completed
loading data 3628/7253 ...co

loading data 3828/7253 ...completed
loading data 3829/7253 ...completed
loading data 3830/7253 ...completed
loading data 3831/7253 ...completed
loading data 3832/7253 ...completed
loading data 3833/7253 ...completed
loading data 3834/7253 ...completed
loading data 3835/7253 ...completed
loading data 3836/7253 ...completed
loading data 3837/7253 ...completed
loading data 3838/7253 ...completed
loading data 3839/7253 ...completed
loading data 3840/7253 ...completed
loading data 3841/7253 ...completed
loading data 3842/7253 ...completed
loading data 3843/7253 ...completed
loading data 3844/7253 ...completed
loading data 3845/7253 ...completed
loading data 3846/7253 ...completed
loading data 3847/7253 ...completed
loading data 3848/7253 ...completed
loading data 3849/7253 ...completed
loading data 3850/7253 ...completed
loading data 3851/7253 ...completed
loading data 3852/7253 ...completed
loading data 3853/7253 ...completed
loading data 3854/7253 ...completed
loading data 3855/7253 ...co

loading data 4055/7253 ...completed
loading data 4056/7253 ...completed
loading data 4057/7253 ...completed
loading data 4058/7253 ...completed
loading data 4059/7253 ...completed
loading data 4060/7253 ...completed
loading data 4061/7253 ...completed
loading data 4062/7253 ...completed
loading data 4063/7253 ...completed
loading data 4064/7253 ...completed
loading data 4065/7253 ...completed
loading data 4066/7253 ...completed
loading data 4067/7253 ...completed
loading data 4068/7253 ...completed
loading data 4069/7253 ...completed
loading data 4070/7253 ...completed
loading data 4071/7253 ...completed
loading data 4072/7253 ...completed
loading data 4073/7253 ...completed
loading data 4074/7253 ...completed
loading data 4075/7253 ...completed
loading data 4076/7253 ...completed
loading data 4077/7253 ...completed
loading data 4078/7253 ...completed
loading data 4079/7253 ...completed
loading data 4080/7253 ...completed
loading data 4081/7253 ...completed
loading data 4082/7253 ...co

loading data 4282/7253 ...completed
loading data 4283/7253 ...completed
loading data 4284/7253 ...completed
loading data 4285/7253 ...completed
loading data 4286/7253 ...completed
loading data 4287/7253 ...completed
loading data 4288/7253 ...completed
loading data 4289/7253 ...completed
loading data 4290/7253 ...completed
loading data 4291/7253 ...completed
loading data 4292/7253 ...completed
loading data 4293/7253 ...completed
loading data 4294/7253 ...completed
loading data 4295/7253 ...completed
loading data 4296/7253 ...completed
loading data 4297/7253 ...completed
loading data 4298/7253 ...completed
loading data 4299/7253 ...completed
loading data 4300/7253 ...completed
loading data 4301/7253 ...completed
loading data 4302/7253 ...completed
loading data 4303/7253 ...completed
loading data 4304/7253 ...completed
loading data 4305/7253 ...completed
loading data 4306/7253 ...completed
loading data 4307/7253 ...completed
loading data 4308/7253 ...completed
loading data 4309/7253 ...co

loading data 4509/7253 ...completed
loading data 4510/7253 ...completed
loading data 4511/7253 ...completed
loading data 4512/7253 ...completed
loading data 4513/7253 ...completed
loading data 4514/7253 ...completed
loading data 4515/7253 ...completed
loading data 4516/7253 ...completed
loading data 4517/7253 ...completed
loading data 4518/7253 ...completed
loading data 4519/7253 ...completed
loading data 4520/7253 ...completed
loading data 4521/7253 ...completed
loading data 4522/7253 ...completed
loading data 4523/7253 ...completed
loading data 4524/7253 ...completed
loading data 4525/7253 ...completed
loading data 4526/7253 ...completed
loading data 4527/7253 ...completed
loading data 4528/7253 ...completed
loading data 4529/7253 ...completed
loading data 4530/7253 ...completed
loading data 4531/7253 ...completed
loading data 4532/7253 ...completed
loading data 4533/7253 ...completed
loading data 4534/7253 ...completed
loading data 4535/7253 ...completed
loading data 4536/7253 ...co

loading data 4736/7253 ...completed
loading data 4737/7253 ...completed
loading data 4738/7253 ...completed
loading data 4739/7253 ...completed
loading data 4740/7253 ...completed
loading data 4741/7253 ...completed
loading data 4742/7253 ...completed
loading data 4743/7253 ...completed
loading data 4744/7253 ...completed
loading data 4745/7253 ...completed
loading data 4746/7253 ...completed
loading data 4747/7253 ...completed
loading data 4748/7253 ...completed
loading data 4749/7253 ...completed
loading data 4750/7253 ...completed
loading data 4751/7253 ...completed
loading data 4752/7253 ...completed
loading data 4753/7253 ...completed
loading data 4754/7253 ...completed
loading data 4755/7253 ...completed
loading data 4756/7253 ...completed
loading data 4757/7253 ...completed
loading data 4758/7253 ...completed
loading data 4759/7253 ...completed
loading data 4760/7253 ...completed
loading data 4761/7253 ...completed
loading data 4762/7253 ...completed
loading data 4763/7253 ...co

loading data 4964/7253 ...completed
loading data 4965/7253 ...completed
loading data 4966/7253 ...completed
loading data 4967/7253 ...completed
loading data 4968/7253 ...completed
loading data 4969/7253 ...completed
loading data 4970/7253 ...completed
loading data 4971/7253 ...completed
loading data 4972/7253 ...completed
loading data 4973/7253 ...completed
loading data 4974/7253 ...completed
loading data 4975/7253 ...completed
loading data 4976/7253 ...completed
loading data 4977/7253 ...completed
loading data 4978/7253 ...completed
loading data 4979/7253 ...completed
loading data 4980/7253 ...completed
loading data 4981/7253 ...completed
loading data 4982/7253 ...completed
loading data 4983/7253 ...completed
loading data 4984/7253 ...completed
loading data 4985/7253 ...completed
loading data 4986/7253 ...completed
loading data 4987/7253 ...completed
loading data 4988/7253 ...completed
loading data 4989/7253 ...completed
loading data 4990/7253 ...completed
loading data 4991/7253 ...co

loading data 5192/7253 ...completed
loading data 5193/7253 ...completed
loading data 5194/7253 ...completed
loading data 5195/7253 ...completed
loading data 5196/7253 ...completed
loading data 5197/7253 ...completed
loading data 5198/7253 ...completed
loading data 5199/7253 ...completed
loading data 5200/7253 ...completed
loading data 5201/7253 ...completed
loading data 5202/7253 ...completed
loading data 5203/7253 ...completed
loading data 5204/7253 ...completed
loading data 5205/7253 ...completed
loading data 5206/7253 ...completed
loading data 5207/7253 ...completed
loading data 5208/7253 ...completed
loading data 5209/7253 ...completed
loading data 5210/7253 ...completed
loading data 5211/7253 ...completed
loading data 5212/7253 ...completed
loading data 5213/7253 ...completed
loading data 5214/7253 ...completed
loading data 5215/7253 ...completed
loading data 5216/7253 ...completed
loading data 5217/7253 ...completed
loading data 5218/7253 ...completed
loading data 5219/7253 ...co