In [2]:
import statsmodels.api as sm


def smooth(y):
    #return savitzky_golay(y, window_size=2001, order=3)
    return lowess(y)

# 0.2가 제일 잘 없앴음
def lowess(y, f=0.2):
    x = np.arange(0, len(y))
    return sm.nonparametric.lowess(y, x, frac=f, it=0)[:, 1].T


import numpy as np
import pandas as pd
import os, pickle, sys
from scipy import signal
#sys.path.append('DL_model')


### input 설정
SRATE = 100 # 250Hz
LEN_INPUT = 20 # input 10s
LEN_PER_NRS = 60 # vital length for each NRS
OVERLAP = 2
n_aug = int((LEN_PER_NRS-LEN_INPUT)/OVERLAP) + 1 # data augmentation 개수




input_path = '../DL_model/dataset/preprocess4/PPG_100Hz/'
if not os.path.exists(input_path[:-1]):
    os.mkdir(input_path[:-1])

if os.path.exists(input_path+'x_train_pacu.npz'):
    print('loading train...', flush=True, end='')
    x_train_pacu = np.load(input_path+'x_train_pacu.npz', allow_pickle=True)['arr_0']
    y_train_pacu = np.load(input_path+'y_train_pacu.npz')['arr_0']
    x_test_pacu = np.load(input_path+'x_test_pacu.npz', allow_pickle=True)['arr_0']
    y_test_pacu = np.load(input_path+'y_test_pacu.npz')['arr_0']
    print('done', flush=True)

else:
    false_row_list_preop = []
    
    vital_path = '../../cranberry2/Preprocessing/vital_data/PPG_100Hz_ECG_100Hz_pacu_2min/'
    ecg_path = '../../cranberry2/Preprocessing/vital_data/ECG_250Hz_pacu_2min/ECG,'
    df_preprocess_pacu = pickle.load(open('../DL_model/cache/preprocess3/input3/df_preprocess_pacu_agender','rb'))
    df_demograph = pd.read_csv('../DL_model/caseids_age_gender.csv')
    

    ### test set에 해당하는 file_path
    caseid_test = pickle.load(open('../DL_model/caseid_test_new', 'rb'))
    caseid_train = pickle.load(open('../DL_model/caseid_train_new', 'rb'))
    caseid_val = pickle.load(open('../DL_model/caseid_val_new', 'rb'))
    
    non_lis = []
    x_train_pacu, y_train_pacu = [], []
    x_test_pacu, y_test_pacu = [], []
    x_val_pacu, y_val_pacu = [], []
    age_train_pacu, gender_train_pacu = [], []
    age_test_pacu, gender_test_pacu = [], []
    age_val_pacu, gender_val_pacu = [], []

    cnt = 0
    for _, row in df_preprocess_pacu.iterrows():
        cnt += 1
        if cnt <=3600:
            continue
        print('loading data {}/{} ...'.format(cnt, len(df_preprocess_pacu)), end='')
                    
        
        row_demo = df_demograph[df_demograph['caseids']==row['caseids']].iloc[0]
            

        # vital data - PPG (resampling 100 Hz to 250 Hz)
        df_vital = pickle.load(open(vital_path+row['file_path'], 'rb')).reset_index()
        pleth_samp = df_vital[['Pleth']].fillna(method='ffill', axis=0).fillna(method='bfill', axis=0).values.flatten()[:12000]
        ppg_per_NRS = np.full(12000, np.nan)
        ppg_per_NRS[0:len(pleth_samp)] = pleth_samp
        

   

        save_path = '../../cranberry2/Preprocessing/cache/lowess_filtered_PPG_100Hz/pacu_'+row['caseids']
        # 한 NRS에 대해 23개의 input 확인
        for i in range(30,51):
            # input이 전처리 통과한 경우
            if row[str(i+1)]:
                start_idx = i*OVERLAP*SRATE # 500i
                end_idx = (i*OVERLAP+LEN_INPUT)*SRATE # 500i + 1000
                
                # input의 normalization
                ppg_inp = np.copy(ppg_per_NRS[start_idx:end_idx])
                if np.sum(np.isnan(ppg_inp))!=0:
                    ppg_inp = pd.DataFrame(ppg_inp).fillna(method='ffill', axis=0).fillna(method='bfill', axis=0).values.flatten()
    
                pleth_inp = ppg_inp - smooth(ppg_inp)
                pleth_inp = pleth_inp - np.nanmean(pleth_inp)
                
                
                pickle.dump(pleth_inp, open(save_path+'_{}'.format(i), 'wb'))
                # 해당 caseid가 test set에 속하는 경우
                if row['caseids'] in caseid_test:
                    x_test_pacu.append(pleth_inp)
                    y_test_pacu.append(int(float(row['NRS'])))

                # 해당 caseid가 train set에 해당하는 경우
                elif row['caseids'] in caseid_val:                
                    x_val_pacu.append(pleth_inp)
                    y_val_pacu.append(int(float(row['NRS'])))
                    
                elif row['caseids'] in caseid_train:                 
                    x_train_pacu.append(pleth_inp)
                    y_train_pacu.append(int(float(row['NRS'])))
                    
                else:
                    non_lis.append(row['caseids'])
                    
        print('completed')

    
    x_train_pacu = np.array(x_train_pacu, np.float32)
    x_test_pacu = np.array(x_test_pacu, np.float32)
    y_train_pacu = np.array(y_train_pacu, int)
    y_test_pacu = np.array(y_test_pacu, int)
    x_val_pacu = np.array(x_val_pacu, np.float32)
    y_val_pacu = np.array(y_val_pacu, int)
    
            
    # 저장하기
    print('saving...', end='', flush=True)
    np.savez_compressed(input_path+'x_train_pacu2.npz', x_train_pacu)
    np.savez_compressed(input_path+'x_test_pacu2.npz', x_test_pacu)
    np.savez_compressed(input_path+'x_val_pacu2.npz', x_val_pacu)
    np.savez_compressed(input_path+'y_train_pacu2.npz', y_train_pacu)
    np.savez_compressed(input_path+'y_test_pacu2.npz', y_test_pacu)
    np.savez_compressed(input_path+'y_val_pacu2.npz', y_val_pacu)
    print('done', flush=True)

    
    
    
print('size of training set(pacu):', len(x_train_pacu))
print('size of validation set(pacu):', len(x_val_pacu))
print('size of test set(pacu):', len(x_test_pacu))

loading data 3601/7253 ...completed
loading data 3602/7253 ...completed
loading data 3603/7253 ...completed
loading data 3604/7253 ...completed
loading data 3605/7253 ...completed
loading data 3606/7253 ...completed
loading data 3607/7253 ...completed
loading data 3608/7253 ...completed
loading data 3609/7253 ...completed
loading data 3610/7253 ...completed
loading data 3611/7253 ...completed
loading data 3612/7253 ...completed
loading data 3613/7253 ...completed
loading data 3614/7253 ...completed
loading data 3615/7253 ...completed
loading data 3616/7253 ...completed
loading data 3617/7253 ...completed
loading data 3618/7253 ...completed
loading data 3619/7253 ...completed
loading data 3620/7253 ...completed
loading data 3621/7253 ...completed
loading data 3622/7253 ...completed
loading data 3623/7253 ...completed
loading data 3624/7253 ...completed
loading data 3625/7253 ...completed
loading data 3626/7253 ...completed
loading data 3627/7253 ...completed
loading data 3628/7253 ...co

loading data 3828/7253 ...completed
loading data 3829/7253 ...completed
loading data 3830/7253 ...completed
loading data 3831/7253 ...completed
loading data 3832/7253 ...completed
loading data 3833/7253 ...completed
loading data 3834/7253 ...completed
loading data 3835/7253 ...completed
loading data 3836/7253 ...completed
loading data 3837/7253 ...completed
loading data 3838/7253 ...completed
loading data 3839/7253 ...completed
loading data 3840/7253 ...completed
loading data 3841/7253 ...completed
loading data 3842/7253 ...completed
loading data 3843/7253 ...completed
loading data 3844/7253 ...completed
loading data 3845/7253 ...completed
loading data 3846/7253 ...completed
loading data 3847/7253 ...completed
loading data 3848/7253 ...completed
loading data 3849/7253 ...completed
loading data 3850/7253 ...completed
loading data 3851/7253 ...completed
loading data 3852/7253 ...completed
loading data 3853/7253 ...completed
loading data 3854/7253 ...completed
loading data 3855/7253 ...co

loading data 4056/7253 ...completed
loading data 4057/7253 ...completed
loading data 4058/7253 ...completed
loading data 4059/7253 ...completed
loading data 4060/7253 ...completed
loading data 4061/7253 ...completed
loading data 4062/7253 ...completed
loading data 4063/7253 ...completed
loading data 4064/7253 ...completed
loading data 4065/7253 ...completed
loading data 4066/7253 ...completed
loading data 4067/7253 ...completed
loading data 4068/7253 ...completed
loading data 4069/7253 ...completed
loading data 4070/7253 ...completed
loading data 4071/7253 ...completed
loading data 4072/7253 ...completed
loading data 4073/7253 ...completed
loading data 4074/7253 ...completed
loading data 4075/7253 ...completed
loading data 4076/7253 ...completed
loading data 4077/7253 ...completed
loading data 4078/7253 ...completed
loading data 4079/7253 ...completed
loading data 4080/7253 ...completed
loading data 4081/7253 ...completed
loading data 4082/7253 ...completed
loading data 4083/7253 ...co

loading data 4283/7253 ...completed
loading data 4284/7253 ...completed
loading data 4285/7253 ...completed
loading data 4286/7253 ...completed
loading data 4287/7253 ...completed
loading data 4288/7253 ...completed
loading data 4289/7253 ...completed
loading data 4290/7253 ...completed
loading data 4291/7253 ...completed
loading data 4292/7253 ...completed
loading data 4293/7253 ...completed
loading data 4294/7253 ...completed
loading data 4295/7253 ...completed
loading data 4296/7253 ...completed
loading data 4297/7253 ...completed
loading data 4298/7253 ...completed
loading data 4299/7253 ...completed
loading data 4300/7253 ...completed
loading data 4301/7253 ...completed
loading data 4302/7253 ...completed
loading data 4303/7253 ...completed
loading data 4304/7253 ...completed
loading data 4305/7253 ...completed
loading data 4306/7253 ...completed
loading data 4307/7253 ...completed
loading data 4308/7253 ...completed
loading data 4309/7253 ...completed
loading data 4310/7253 ...co

loading data 4510/7253 ...completed
loading data 4511/7253 ...completed
loading data 4512/7253 ...completed
loading data 4513/7253 ...completed
loading data 4514/7253 ...completed
loading data 4515/7253 ...completed
loading data 4516/7253 ...completed
loading data 4517/7253 ...completed
loading data 4518/7253 ...completed
loading data 4519/7253 ...completed
loading data 4520/7253 ...completed
loading data 4521/7253 ...completed
loading data 4522/7253 ...completed
loading data 4523/7253 ...completed
loading data 4524/7253 ...completed
loading data 4525/7253 ...completed
loading data 4526/7253 ...completed
loading data 4527/7253 ...completed
loading data 4528/7253 ...completed
loading data 4529/7253 ...completed
loading data 4530/7253 ...completed
loading data 4531/7253 ...completed
loading data 4532/7253 ...completed
loading data 4533/7253 ...completed
loading data 4534/7253 ...completed
loading data 4535/7253 ...completed
loading data 4536/7253 ...completed
loading data 4537/7253 ...co

loading data 4737/7253 ...completed
loading data 4738/7253 ...completed
loading data 4739/7253 ...completed
loading data 4740/7253 ...completed
loading data 4741/7253 ...completed
loading data 4742/7253 ...completed
loading data 4743/7253 ...completed
loading data 4744/7253 ...completed
loading data 4745/7253 ...completed
loading data 4746/7253 ...completed
loading data 4747/7253 ...completed
loading data 4748/7253 ...completed
loading data 4749/7253 ...completed
loading data 4750/7253 ...completed
loading data 4751/7253 ...completed
loading data 4752/7253 ...completed
loading data 4753/7253 ...completed
loading data 4754/7253 ...completed
loading data 4755/7253 ...completed
loading data 4756/7253 ...completed
loading data 4757/7253 ...completed
loading data 4758/7253 ...completed
loading data 4759/7253 ...completed
loading data 4760/7253 ...completed
loading data 4761/7253 ...completed
loading data 4762/7253 ...completed
loading data 4763/7253 ...completed
loading data 4764/7253 ...co

loading data 4964/7253 ...completed
loading data 4965/7253 ...completed
loading data 4966/7253 ...completed
loading data 4967/7253 ...completed
loading data 4968/7253 ...completed
loading data 4969/7253 ...completed
loading data 4970/7253 ...completed
loading data 4971/7253 ...completed
loading data 4972/7253 ...completed
loading data 4973/7253 ...completed
loading data 4974/7253 ...completed
loading data 4975/7253 ...completed
loading data 4976/7253 ...completed
loading data 4977/7253 ...completed
loading data 4978/7253 ...completed
loading data 4979/7253 ...completed
loading data 4980/7253 ...completed
loading data 4981/7253 ...completed
loading data 4982/7253 ...completed
loading data 4983/7253 ...completed
loading data 4984/7253 ...completed
loading data 4985/7253 ...completed
loading data 4986/7253 ...completed
loading data 4987/7253 ...completed
loading data 4988/7253 ...completed
loading data 4989/7253 ...completed
loading data 4990/7253 ...completed
loading data 4991/7253 ...co

loading data 5191/7253 ...completed
loading data 5192/7253 ...completed
loading data 5193/7253 ...completed
loading data 5194/7253 ...completed
loading data 5195/7253 ...completed
loading data 5196/7253 ...completed
loading data 5197/7253 ...completed
loading data 5198/7253 ...completed
loading data 5199/7253 ...completed
loading data 5200/7253 ...completed
loading data 5201/7253 ...completed
loading data 5202/7253 ...completed
loading data 5203/7253 ...completed
loading data 5204/7253 ...completed
loading data 5205/7253 ...completed
loading data 5206/7253 ...completed
loading data 5207/7253 ...completed
loading data 5208/7253 ...completed
loading data 5209/7253 ...completed
loading data 5210/7253 ...completed
loading data 5211/7253 ...completed
loading data 5212/7253 ...completed
loading data 5213/7253 ...completed
loading data 5214/7253 ...completed
loading data 5215/7253 ...completed
loading data 5216/7253 ...completed
loading data 5217/7253 ...completed
loading data 5218/7253 ...co

loading data 5418/7253 ...completed
loading data 5419/7253 ...completed
loading data 5420/7253 ...completed
loading data 5421/7253 ...completed
loading data 5422/7253 ...completed
loading data 5423/7253 ...completed
loading data 5424/7253 ...completed
loading data 5425/7253 ...completed
loading data 5426/7253 ...completed
loading data 5427/7253 ...completed
loading data 5428/7253 ...completed
loading data 5429/7253 ...completed
loading data 5430/7253 ...completed
loading data 5431/7253 ...completed
loading data 5432/7253 ...completed
loading data 5433/7253 ...completed
loading data 5434/7253 ...completed
loading data 5435/7253 ...completed
loading data 5436/7253 ...completed
loading data 5437/7253 ...completed
loading data 5438/7253 ...completed
loading data 5439/7253 ...completed
loading data 5440/7253 ...completed
loading data 5441/7253 ...completed
loading data 5442/7253 ...completed
loading data 5443/7253 ...completed
loading data 5444/7253 ...completed
loading data 5445/7253 ...co

loading data 5645/7253 ...completed
loading data 5646/7253 ...completed
loading data 5647/7253 ...completed
loading data 5648/7253 ...completed
loading data 5649/7253 ...completed
loading data 5650/7253 ...completed
loading data 5651/7253 ...completed
loading data 5652/7253 ...completed
loading data 5653/7253 ...completed
loading data 5654/7253 ...completed
loading data 5655/7253 ...completed
loading data 5656/7253 ...completed
loading data 5657/7253 ...completed
loading data 5658/7253 ...completed
loading data 5659/7253 ...completed
loading data 5660/7253 ...completed
loading data 5661/7253 ...completed
loading data 5662/7253 ...completed
loading data 5663/7253 ...completed
loading data 5664/7253 ...completed
loading data 5665/7253 ...completed
loading data 5666/7253 ...completed
loading data 5667/7253 ...completed
loading data 5668/7253 ...completed
loading data 5669/7253 ...completed
loading data 5670/7253 ...completed
loading data 5671/7253 ...completed
loading data 5672/7253 ...co

loading data 5872/7253 ...completed
loading data 5873/7253 ...completed
loading data 5874/7253 ...completed
loading data 5875/7253 ...completed
loading data 5876/7253 ...completed
loading data 5877/7253 ...completed
loading data 5878/7253 ...completed
loading data 5879/7253 ...completed
loading data 5880/7253 ...completed
loading data 5881/7253 ...completed
loading data 5882/7253 ...completed
loading data 5883/7253 ...completed
loading data 5884/7253 ...completed
loading data 5885/7253 ...completed
loading data 5886/7253 ...completed
loading data 5887/7253 ...completed
loading data 5888/7253 ...completed
loading data 5889/7253 ...completed
loading data 5890/7253 ...completed
loading data 5891/7253 ...completed
loading data 5892/7253 ...completed
loading data 5893/7253 ...completed
loading data 5894/7253 ...completed
loading data 5895/7253 ...completed
loading data 5896/7253 ...completed
loading data 5897/7253 ...completed
loading data 5898/7253 ...completed
loading data 5899/7253 ...co

loading data 6099/7253 ...completed
loading data 6100/7253 ...completed
loading data 6101/7253 ...completed
loading data 6102/7253 ...completed
loading data 6103/7253 ...completed
loading data 6104/7253 ...completed
loading data 6105/7253 ...completed
loading data 6106/7253 ...completed
loading data 6107/7253 ...completed
loading data 6108/7253 ...completed
loading data 6109/7253 ...completed
loading data 6110/7253 ...completed
loading data 6111/7253 ...completed
loading data 6112/7253 ...completed
loading data 6113/7253 ...completed
loading data 6114/7253 ...completed
loading data 6115/7253 ...completed
loading data 6116/7253 ...completed
loading data 6117/7253 ...completed
loading data 6118/7253 ...completed
loading data 6119/7253 ...completed
loading data 6120/7253 ...completed
loading data 6121/7253 ...completed
loading data 6122/7253 ...completed
loading data 6123/7253 ...completed
loading data 6124/7253 ...completed
loading data 6125/7253 ...completed
loading data 6126/7253 ...co

loading data 6327/7253 ...completed
loading data 6328/7253 ...completed
loading data 6329/7253 ...completed
loading data 6330/7253 ...completed
loading data 6331/7253 ...completed
loading data 6332/7253 ...completed
loading data 6333/7253 ...completed
loading data 6334/7253 ...completed
loading data 6335/7253 ...completed
loading data 6336/7253 ...completed
loading data 6337/7253 ...completed
loading data 6338/7253 ...completed
loading data 6339/7253 ...completed
loading data 6340/7253 ...completed
loading data 6341/7253 ...completed
loading data 6342/7253 ...completed
loading data 6343/7253 ...completed
loading data 6344/7253 ...completed
loading data 6345/7253 ...completed
loading data 6346/7253 ...completed
loading data 6347/7253 ...completed
loading data 6348/7253 ...completed
loading data 6349/7253 ...completed
loading data 6350/7253 ...completed
loading data 6351/7253 ...completed
loading data 6352/7253 ...completed
loading data 6353/7253 ...completed
loading data 6354/7253 ...co

loading data 6554/7253 ...completed
loading data 6555/7253 ...completed
loading data 6556/7253 ...completed
loading data 6557/7253 ...completed
loading data 6558/7253 ...completed
loading data 6559/7253 ...completed
loading data 6560/7253 ...completed
loading data 6561/7253 ...completed
loading data 6562/7253 ...completed
loading data 6563/7253 ...completed
loading data 6564/7253 ...completed
loading data 6565/7253 ...completed
loading data 6566/7253 ...completed
loading data 6567/7253 ...completed
loading data 6568/7253 ...completed
loading data 6569/7253 ...completed
loading data 6570/7253 ...completed
loading data 6571/7253 ...completed
loading data 6572/7253 ...completed
loading data 6573/7253 ...completed
loading data 6574/7253 ...completed
loading data 6575/7253 ...completed
loading data 6576/7253 ...completed
loading data 6577/7253 ...completed
loading data 6578/7253 ...completed
loading data 6579/7253 ...completed
loading data 6580/7253 ...completed
loading data 6581/7253 ...co

loading data 6781/7253 ...completed
loading data 6782/7253 ...completed
loading data 6783/7253 ...completed
loading data 6784/7253 ...completed
loading data 6785/7253 ...completed
loading data 6786/7253 ...completed
loading data 6787/7253 ...completed
loading data 6788/7253 ...completed
loading data 6789/7253 ...completed
loading data 6790/7253 ...completed
loading data 6791/7253 ...completed
loading data 6792/7253 ...completed
loading data 6793/7253 ...completed
loading data 6794/7253 ...completed
loading data 6795/7253 ...completed
loading data 6796/7253 ...completed
loading data 6797/7253 ...completed
loading data 6798/7253 ...completed
loading data 6799/7253 ...completed
loading data 6800/7253 ...completed
loading data 6801/7253 ...completed
loading data 6802/7253 ...completed
loading data 6803/7253 ...completed
loading data 6804/7253 ...completed
loading data 6805/7253 ...completed
loading data 6806/7253 ...completed
loading data 6807/7253 ...completed
loading data 6808/7253 ...co

loading data 7008/7253 ...completed
loading data 7009/7253 ...completed
loading data 7010/7253 ...completed
loading data 7011/7253 ...completed
loading data 7012/7253 ...completed
loading data 7013/7253 ...completed
loading data 7014/7253 ...completed
loading data 7015/7253 ...completed
loading data 7016/7253 ...completed
loading data 7017/7253 ...completed
loading data 7018/7253 ...completed
loading data 7019/7253 ...completed
loading data 7020/7253 ...completed
loading data 7021/7253 ...completed
loading data 7022/7253 ...completed
loading data 7023/7253 ...completed
loading data 7024/7253 ...completed
loading data 7025/7253 ...completed
loading data 7026/7253 ...completed
loading data 7027/7253 ...completed
loading data 7028/7253 ...completed
loading data 7029/7253 ...completed
loading data 7030/7253 ...completed
loading data 7031/7253 ...completed
loading data 7032/7253 ...completed
loading data 7033/7253 ...completed
loading data 7034/7253 ...completed
loading data 7035/7253 ...co

loading data 7235/7253 ...completed
loading data 7236/7253 ...completed
loading data 7237/7253 ...completed
loading data 7238/7253 ...completed
loading data 7239/7253 ...completed
loading data 7240/7253 ...completed
loading data 7241/7253 ...completed
loading data 7242/7253 ...completed
loading data 7243/7253 ...completed
loading data 7244/7253 ...completed
loading data 7245/7253 ...completed
loading data 7246/7253 ...completed
loading data 7247/7253 ...completed
loading data 7248/7253 ...completed
loading data 7249/7253 ...completed
loading data 7250/7253 ...completed
loading data 7251/7253 ...completed
loading data 7252/7253 ...completed
loading data 7253/7253 ...completed
saving...done
size of training set(pacu): 32160
size of validation set(pacu): 3500
size of test set(pacu): 3904
