In [1]:
import statsmodels.api as sm


def smooth(y):
    #return savitzky_golay(y, window_size=2001, order=3)
    return lowess(y)

# 0.2가 제일 잘 없앴음
def lowess(y, f=0.2):
    x = np.arange(0, len(y))
    return sm.nonparametric.lowess(y, x, frac=f, it=0)[:, 1].T

import numpy as np
import pandas as pd
import os, pickle, sys
from scipy import signal
#sys.path.append('DL_model')


### input 설정
SRATE = 250 # 250Hz
LEN_INPUT = 20 # input 10s
LEN_PER_NRS = 60 # vital length for each NRS
OVERLAP = 2
n_aug = int((LEN_PER_NRS-LEN_INPUT)/OVERLAP) + 1 # data augmentation 개수




input_path = '../DL_model/dataset/preprocess4/input3/'
if not os.path.exists(input_path[:-1]):
    os.mkdir(input_path[:-1])

if os.path.exists(input_path+'x_train_pacu.npz'):
    print('loading train...', flush=True, end='')
    x_train_pacu = np.load(input_path+'x_train_pacu.npz', allow_pickle=True)['arr_0']
    y_train_pacu = np.load(input_path+'y_train_pacu.npz')['arr_0']
    x_test_pacu = np.load(input_path+'x_test_pacu.npz', allow_pickle=True)['arr_0']
    y_test_pacu = np.load(input_path+'y_test_pacu.npz')['arr_0']
    print('done', flush=True)

else:
    false_row_list_preop = []
    
    vital_path = '../../cranberry2/Preprocessing/vital_data/PPG_100Hz_ECG_100Hz_pacu_2min/'
    ecg_path = '../../cranberry2/Preprocessing/vital_data/ECG_250Hz_pacu_2min/ECG,'
    df_preprocess_pacu = pickle.load(open('../DL_model/cache/preprocess3/input3/df_preprocess_pacu_agender','rb'))
    df_demograph = pd.read_csv('../DL_model/caseids_age_gender.csv')
    

    ### test set에 해당하는 file_path
    caseid_test = pickle.load(open('../DL_model/caseid_test_new', 'rb'))
    caseid_train = pickle.load(open('../DL_model/caseid_train_new', 'rb'))
    caseid_val = pickle.load(open('../DL_model/caseid_val_new', 'rb'))
    
    non_lis = []
    x_train_pacu, y_train_pacu = [], []
    x_test_pacu, y_test_pacu = [], []
    x_val_pacu, y_val_pacu = [], []
    age_train_pacu, gender_train_pacu = [], []
    age_test_pacu, gender_test_pacu = [], []
    age_val_pacu, gender_val_pacu = [], []

    cnt = 0
    for _, row in df_preprocess_pacu.iterrows():
        cnt += 1
        if cnt<=5400:
            continue
            
            
        print('loading data {}/{} ...'.format(cnt, len(df_preprocess_pacu)), end='')
        row_demo = df_demograph[df_demograph['caseids']==row['caseids']].iloc[0]
            

        # vital data - PPG (resampling 100 Hz to 250 Hz)
        df_vital = pickle.load(open(vital_path+row['file_path'], 'rb')).reset_index()
        pleth_samp = df_vital[['Pleth']].fillna(method='ffill', axis=0).fillna(method='bfill', axis=0).values.flatten()
        pleth_resamp = signal.resample(pleth_samp, 120*SRATE)
        ppg_per_NRS = np.full(30000, np.nan)
        ppg_per_NRS[0:len(pleth_resamp)] = pleth_resamp
        

        # vital data - ECG (250Hz)
        ecg_samp = pickle.load(open(ecg_path+row['file_path'][:-3]+'vital', 'rb')).reset_index()[['ECG']]
        ecg_samp = ecg_samp.fillna(method='ffill', axis=0).fillna(method='bfill', axis=0).values.flatten()[0:30000]
        ecg_per_NRS = np.full(30000,np.nan)
        ecg_per_NRS[0:len(ecg_samp)] = ecg_samp
   

        save_path = '../../cranberry2/Preprocessing/cache/lowess_filtered/pacu_'+row['caseids']
        # 한 NRS에 대해 23개의 input 확인
        for i in range(0,30):
            # input이 전처리 통과한 경우
            if row[str(i+1)]:
                start_idx = i*OVERLAP*SRATE # 500i
                end_idx = (i*OVERLAP+LEN_INPUT)*SRATE # 500i + 1000
                
                # input의 normalization
                ppg_inp = np.copy(ppg_per_NRS[start_idx:end_idx])
                if np.sum(np.isnan(ppg_inp))!=0:
                    ppg_inp = pd.DataFrame(ppg_inp).fillna(method='ffill', axis=0).fillna(method='bfill', axis=0).values.flatten()
                
                pleth_inp = ppg_inp - smooth(ppg_inp)
                pleth_inp = pleth_inp - np.nanmean(pleth_inp)

                ecg_inp = np.copy(ecg_per_NRS[start_idx:end_idx])
                if np.sum(np.isnan(ecg_inp))!=0:
                    ecg_inp = pd.DataFrame(ecg_inp).fillna(method='ffill', axis=0).fillna(method='bfill', axis=0).values.flatten()
                
                ecg_inp = ecg_inp - smooth(ecg_inp)
                ecg_inp = (ecg_inp - np.nanmean(ecg_inp)) / np.nanstd(ecg_inp)
                
                
                pickle.dump([pleth_inp, ecg_inp], open(save_path+'_{}'.format(i), 'wb'))
                # 해당 caseid가 test set에 속하는 경우
                if row['caseids'] in caseid_test:
                    age_test_pacu.append(int(row_demo['age']))
                    if row_demo['gender']=='F':
                        gender_test_pacu.append(1)
                    else:
                        gender_test_pacu.append(0)
                    x_test_pacu.append([pleth_inp, ecg_inp])
                    y_test_pacu.append(int(float(row['NRS'])))

                # 해당 caseid가 train set에 해당하는 경우
                elif row['caseids'] in caseid_val:
                    age_val_pacu.append(int(row_demo['age']))
                    if row_demo['gender']=='F':
                        gender_val_pacu.append(1)
                    else:
                        gender_val_pacu.append(0)                    
                    x_val_pacu.append([pleth_inp, ecg_inp])
                    y_val_pacu.append(int(float(row['NRS'])))
                    
                elif row['caseids'] in caseid_train:
                    age_train_pacu.append(int(row_demo['age']))
                    if row_demo['gender']=='F':
                        gender_train_pacu.append(1)
                    else:
                        gender_train_pacu.append(0)                    
                    x_train_pacu.append([pleth_inp, ecg_inp])
                    y_train_pacu.append(int(float(row['NRS'])))
                    
                else:
                    non_lis.append(row['caseids'])
                    
        print('completed')

    x_train_pacu = np.array(x_train_pacu, np.float32)
    x_test_pacu = np.array(x_test_pacu, np.float32)
    y_train_pacu = np.array(y_train_pacu, int)
    y_test_pacu = np.array(y_test_pacu, int)
    x_val_pacu = np.array(x_val_pacu, np.float32)
    y_val_pacu = np.array(y_val_pacu, int)
    
    age_train_pacu = np.array(age_train_pacu, int)
    age_test_pacu = np.array(age_test_pacu, int)
    age_val_pacu = np.array(age_val_pacu, int)
    gender_train_pacu = np.array(gender_train_pacu, int)
    gender_test_pacu = np.array(gender_test_pacu, int)
    gender_val_pacu = np.array(gender_val_pacu, int)
    
        
    # 저장하기
    print('saving...', end='', flush=True)
    np.savez_compressed(input_path+'x_train_pacu4.npz', x_train_pacu)
    np.savez_compressed(input_path+'x_test_pacu4.npz', x_test_pacu)
    np.savez_compressed(input_path+'x_val_pacu4.npz', x_val_pacu)
    np.savez_compressed(input_path+'y_train_pacu4.npz', y_train_pacu)
    np.savez_compressed(input_path+'y_test_pacu4.npz', y_test_pacu)
    np.savez_compressed(input_path+'y_val_pacu4.npz', y_val_pacu)
    
    np.savez_compressed(input_path+'age_train_pacu4.npz', age_train_pacu)
    np.savez_compressed(input_path+'age_test_pacu4.npz', age_test_pacu)
    np.savez_compressed(input_path+'age_val_pacu4.npz', age_val_pacu)    
    np.savez_compressed(input_path+'gender_train_pacu4.npz', gender_train_pacu)
    np.savez_compressed(input_path+'gender_test_pacu4.npz', gender_test_pacu)
    np.savez_compressed(input_path+'gender_val_pacu4.npz', gender_val_pacu)    
    
    print('done', flush=True)

    
    
    
print('size of training set(pacu):', len(x_train_pacu))
print('size of validation set(pacu):', len(x_val_pacu))
print('size of test set(pacu):', len(x_test_pacu))

loading data 5401/7253 ...completed
loading data 5402/7253 ...completed
loading data 5403/7253 ...completed
loading data 5404/7253 ...completed
loading data 5405/7253 ...completed
loading data 5406/7253 ...completed
loading data 5407/7253 ...completed
loading data 5408/7253 ...completed
loading data 5409/7253 ...completed
loading data 5410/7253 ...completed
loading data 5411/7253 ...completed
loading data 5412/7253 ...completed
loading data 5413/7253 ...completed
loading data 5414/7253 ...completed
loading data 5415/7253 ...completed
loading data 5416/7253 ...completed
loading data 5417/7253 ...completed
loading data 5418/7253 ...completed
loading data 5419/7253 ...completed
loading data 5420/7253 ...completed
loading data 5421/7253 ...completed
loading data 5422/7253 ...completed
loading data 5423/7253 ...completed
loading data 5424/7253 ...completed
loading data 5425/7253 ...completed
loading data 5426/7253 ...completed
loading data 5427/7253 ...completed
loading data 5428/7253 ...co

loading data 5628/7253 ...completed
loading data 5629/7253 ...completed
loading data 5630/7253 ...completed
loading data 5631/7253 ...completed
loading data 5632/7253 ...completed
loading data 5633/7253 ...completed
loading data 5634/7253 ...completed
loading data 5635/7253 ...completed
loading data 5636/7253 ...completed
loading data 5637/7253 ...completed
loading data 5638/7253 ...completed
loading data 5639/7253 ...completed
loading data 5640/7253 ...completed
loading data 5641/7253 ...completed
loading data 5642/7253 ...completed
loading data 5643/7253 ...completed
loading data 5644/7253 ...completed
loading data 5645/7253 ...completed
loading data 5646/7253 ...completed
loading data 5647/7253 ...completed
loading data 5648/7253 ...completed
loading data 5649/7253 ...completed
loading data 5650/7253 ...completed
loading data 5651/7253 ...completed
loading data 5652/7253 ...completed
loading data 5653/7253 ...completed
loading data 5654/7253 ...completed
loading data 5655/7253 ...co

loading data 5855/7253 ...completed
loading data 5856/7253 ...completed
loading data 5857/7253 ...completed
loading data 5858/7253 ...completed
loading data 5859/7253 ...completed
loading data 5860/7253 ...completed
loading data 5861/7253 ...completed
loading data 5862/7253 ...completed
loading data 5863/7253 ...completed
loading data 5864/7253 ...completed
loading data 5865/7253 ...completed
loading data 5866/7253 ...completed
loading data 5867/7253 ...completed
loading data 5868/7253 ...completed
loading data 5869/7253 ...completed
loading data 5870/7253 ...completed
loading data 5871/7253 ...completed
loading data 5872/7253 ...completed
loading data 5873/7253 ...completed
loading data 5874/7253 ...completed
loading data 5875/7253 ...completed
loading data 5876/7253 ...completed
loading data 5877/7253 ...completed
loading data 5878/7253 ...completed
loading data 5879/7253 ...completed
loading data 5880/7253 ...completed
loading data 5881/7253 ...completed
loading data 5882/7253 ...co

loading data 6085/7253 ...completed
loading data 6086/7253 ...completed
loading data 6087/7253 ...completed
loading data 6088/7253 ...completed
loading data 6089/7253 ...completed
loading data 6090/7253 ...completed
loading data 6091/7253 ...completed
loading data 6092/7253 ...completed
loading data 6093/7253 ...completed
loading data 6094/7253 ...completed
loading data 6095/7253 ...completed
loading data 6096/7253 ...completed
loading data 6097/7253 ...completed
loading data 6098/7253 ...completed
loading data 6099/7253 ...completed
loading data 6100/7253 ...completed
loading data 6101/7253 ...completed
loading data 6102/7253 ...completed
loading data 6103/7253 ...completed
loading data 6104/7253 ...completed
loading data 6105/7253 ...completed
loading data 6106/7253 ...completed
loading data 6107/7253 ...completed
loading data 6108/7253 ...completed
loading data 6109/7253 ...completed
loading data 6110/7253 ...completed
loading data 6111/7253 ...completed
loading data 6112/7253 ...co

loading data 6312/7253 ...completed
loading data 6313/7253 ...completed
loading data 6314/7253 ...completed
loading data 6315/7253 ...completed
loading data 6316/7253 ...completed
loading data 6317/7253 ...completed
loading data 6318/7253 ...completed
loading data 6319/7253 ...completed
loading data 6320/7253 ...completed
loading data 6321/7253 ...completed
loading data 6322/7253 ...completed
loading data 6323/7253 ...completed
loading data 6324/7253 ...completed
loading data 6325/7253 ...completed
loading data 6326/7253 ...completed
loading data 6327/7253 ...completed
loading data 6328/7253 ...completed
loading data 6329/7253 ...completed
loading data 6330/7253 ...completed
loading data 6331/7253 ...completed
loading data 6332/7253 ...completed
loading data 6333/7253 ...completed
loading data 6334/7253 ...completed
loading data 6335/7253 ...completed
loading data 6336/7253 ...completed
loading data 6337/7253 ...completed
loading data 6338/7253 ...completed
loading data 6339/7253 ...co

loading data 6540/7253 ...completed
loading data 6541/7253 ...completed
loading data 6542/7253 ...completed
loading data 6543/7253 ...completed
loading data 6544/7253 ...completed
loading data 6545/7253 ...completed
loading data 6546/7253 ...completed
loading data 6547/7253 ...completed
loading data 6548/7253 ...completed
loading data 6549/7253 ...completed
loading data 6550/7253 ...completed
loading data 6551/7253 ...completed
loading data 6552/7253 ...completed
loading data 6553/7253 ...completed
loading data 6554/7253 ...completed
loading data 6555/7253 ...completed
loading data 6556/7253 ...completed
loading data 6557/7253 ...completed
loading data 6558/7253 ...completed
loading data 6559/7253 ...completed
loading data 6560/7253 ...completed
loading data 6561/7253 ...completed
loading data 6562/7253 ...completed
loading data 6563/7253 ...completed
loading data 6564/7253 ...completed
loading data 6565/7253 ...completed
loading data 6566/7253 ...completed
loading data 6567/7253 ...co

loading data 6767/7253 ...completed
loading data 6768/7253 ...completed
loading data 6769/7253 ...completed
loading data 6770/7253 ...completed
loading data 6771/7253 ...completed
loading data 6772/7253 ...completed
loading data 6773/7253 ...completed
loading data 6774/7253 ...completed
loading data 6775/7253 ...completed
loading data 6776/7253 ...completed
loading data 6777/7253 ...completed
loading data 6778/7253 ...completed
loading data 6779/7253 ...completed
loading data 6780/7253 ...completed
loading data 6781/7253 ...completed
loading data 6782/7253 ...completed
loading data 6783/7253 ...completed
loading data 6784/7253 ...completed
loading data 6785/7253 ...completed
loading data 6786/7253 ...completed
loading data 6787/7253 ...completed
loading data 6788/7253 ...completed
loading data 6789/7253 ...completed
loading data 6790/7253 ...completed
loading data 6791/7253 ...completed
loading data 6792/7253 ...completed
loading data 6793/7253 ...completed
loading data 6794/7253 ...co

loading data 6994/7253 ...completed
loading data 6995/7253 ...completed
loading data 6996/7253 ...completed
loading data 6997/7253 ...completed
loading data 6998/7253 ...completed
loading data 6999/7253 ...completed
loading data 7000/7253 ...completed
loading data 7001/7253 ...completed
loading data 7002/7253 ...completed
loading data 7003/7253 ...completed
loading data 7004/7253 ...completed
loading data 7005/7253 ...completed
loading data 7006/7253 ...completed
loading data 7007/7253 ...completed
loading data 7008/7253 ...completed
loading data 7009/7253 ...completed
loading data 7010/7253 ...completed
loading data 7011/7253 ...completed
loading data 7012/7253 ...completed
loading data 7013/7253 ...completed
loading data 7014/7253 ...completed
loading data 7015/7253 ...completed
loading data 7016/7253 ...completed
loading data 7017/7253 ...completed
loading data 7018/7253 ...completed
loading data 7019/7253 ...completed
loading data 7020/7253 ...completed
loading data 7021/7253 ...co

loading data 7221/7253 ...completed
loading data 7222/7253 ...completed
loading data 7223/7253 ...completed
loading data 7224/7253 ...completed
loading data 7225/7253 ...completed
loading data 7226/7253 ...completed
loading data 7227/7253 ...completed
loading data 7228/7253 ...completed
loading data 7229/7253 ...completed
loading data 7230/7253 ...completed
loading data 7231/7253 ...completed
loading data 7232/7253 ...completed
loading data 7233/7253 ...completed
loading data 7234/7253 ...completed
loading data 7235/7253 ...completed
loading data 7236/7253 ...completed
loading data 7237/7253 ...completed
loading data 7238/7253 ...completed
loading data 7239/7253 ...completed
loading data 7240/7253 ...completed
loading data 7241/7253 ...completed
loading data 7242/7253 ...completed
loading data 7243/7253 ...completed
loading data 7244/7253 ...completed
loading data 7245/7253 ...completed
loading data 7246/7253 ...completed
loading data 7247/7253 ...completed
loading data 7248/7253 ...co