In [4]:
import os, sys
import pickle
import pandas as pd
import numpy as np

# 피크 사이 wave를 모두 같은 length로 만들기 위한 함수
def linear_connection(list, idx):
    int_idx = int(idx)
    return list[int_idx] + (list[int_idx+1] - list[int_idx]) * (idx - int_idx)



### input 설정
LEN_INPUT = 10 # input 10s
LEN_PER_NRS = 120 # vital length for each NRS
OVERLAP = 5
n_aug = int((LEN_PER_NRS-LEN_INPUT)/OVERLAP) + 1 # data augmentation 개수


# vital data 저장 경로
preprocess_path = '../../cranberry2/Preprocessing/'
vital_path = preprocess_path + 'NRS_vital_pickle_unzip/NRS_vital_pickle'
f_vital_list = os.listdir(vital_path)


# 전처리 정보를 담을 Dataframe
column_list = ['file_path'] + [str(i+1) for i in range(n_aug)] #+ ['NRS']
df_preprocess = pd.DataFrame(columns = column_list)


SRATE, f_num = 100, 0
for f_vital in f_vital_list[0:2000]:
    f_num += 1
    
    print('###Input', f_num,'/ '+str(len(f_vital_list))+': '+f_vital+'###')
    
    # ppg, ecg peaks 불러오기
    # peaks가 없는 경우는 ECG나 PPG data가 없는 case들
    if not os.path.exists(preprocess_path+'cache/PPG_peaks/'+f_vital):
        print('no existing PPG peaks: ', f_vital)
        continue
    if not os.path.exists(preprocess_path+'cache/ECG_peaks/'+f_vital):
        print('no existing ECG peaks: ', f_vital)
        continue
    
    # vital data 불러오기    
    df_vital = pickle.load(open(vital_path+'/'+f_vital, 'rb')).reset_index()
    
    #dataframe에 새로운 행 만들기
    df_preprocess.loc[f_num-1,'file_path'] = f_vital
        
    ppg_min, ppg_peak = pickle.load(open(preprocess_path+'cache/PPG_peaks/'+f_vital, 'rb'))
    ecg_peak = pickle.load(open(preprocess_path+'cache/ECG_peaks/'+f_vital, 'rb'))
    
    ppg_min, ppg_peak = np.array([ppg_min]), np.array([ppg_peak])
    ecg_peak= np.array([ecg_peak])
    
    
    # 10초 단위로 끊기
    for i in range(n_aug):
        start_idx = i*OVERLAP*SRATE # 500i
        end_idx = (i*OVERLAP + LEN_INPUT)*SRATE # 500i + 1000
        
        seg_ppg, seg_ecg = [np.nan for j in range(LEN_INPUT*SRATE)], [np.nan for j in range(LEN_INPUT*SRATE)]
        df_vital_input = df_vital.loc[start_idx:end_idx-1]
        seg_ppg[0:len(df_vital_input)] = df_vital_input['Pleth'].tolist()
        seg_ecg[0:len(df_vital_input)] = df_vital_input['ECG'].tolist()
    
        ### 1. 결측치 처리 ###              
        # df.isnull().sum() 하면 더 간단하게 가능하나 애초에 NRS에 해당하는 vital data가 120초 보다 짧은 경우
        nan_ppg_list = np.isnan(seg_ppg)
        nan_ecg_list = np.isnan(seg_ecg)
        nan_ppg_perc = np.sum(nan_ppg_list) / LEN_INPUT / SRATE
        nan_ecg_perc = np.sum(nan_ecg_list) / LEN_INPUT / SRATE
        
        # ECG, PPG 둘다 결측치인 부분
        nan_both_perc = 0
        for j in range(len(seg_ppg)):
            if nan_ppg_list[j] and  nan_ecg_list[j]:
                nan_both_perc += 1
        nan_both_perc /= (LEN_INPUT*SRATE)
            
        # segment의 결측치 비율 정보
        nan_info = [nan_ppg_perc, nan_ecg_perc, nan_both_perc]
        
        # 결측치가 많은 경우, noise 확인할 것도 없이 False -  이 경우의 noise_info는 -1로 처리
        if nan_ppg_perc > 0.3 or nan_ecg_perc > 0.3 or nan_both_perc > 0.2:
            df_preprocess.loc[f_num-1,str(i+1)] = (False, nan_info, [-1, -1])
            continue
            
        

        ### 2. Noise 처리 ###
        # 10초 segment 내의 ppg, ecg peak idx
        #seg_ppg_min = ppg_min[(start_idx<=np.array(ppg_min)) & (np.array(ppg_min)<end_idx)]
        idx_ppg_peak = ppg_peak[(start_idx<=ppg_peak) & (ppg_peak<end_idx)] - start_idx
        idx_ecg_peak = ecg_peak[(start_idx<=ecg_peak) & (ecg_peak<end_idx)] - start_idx
        
        # peak가 5개 이하는 noise가 많이 낀 상황 (10초 구간 중 peak가 7초 이상 없으면 문제 -> 즉 peak 개수 범위는 7/2 ~ 7/0.4)
        # 따라서 peak가 7초 이상 있어야하고 이때 최소 peak 개수는 3.5개 (20초면 7이하)
        # peak 개수가 기준 미달이면 noise 계산 자세히 할 필요없이 False - 이 경우의 noise_info는 -2로 처리
        if len(idx_ppg_peak)<=4 or len(idx_ecg_peak)<=4:
            df_preprocess.loc[f_num-1,str(i+1)] = (False, nan_info, [-2, -2])
            continue

            
        # 10초 segment 내의 ppg, ecg peak value
        #print(len(seg_ppg), idx_ppg_peak)
        val_ppg_peak = [seg_ppg[k] for k in idx_ppg_peak]
        val_ecg_peak = [seg_ecg[k] for k in idx_ecg_peak]
        
        # peak와 peak 사이 interval에 대한 noise 여부 -> 따라서 길이는 peak - 1
        bool_noise_ppg = [False for k in range(len(idx_ppg_peak)-1)]
        bool_noise_ecg = [False for k in range(len(idx_ecg_peak)-1)]
        
        #  2.1 peak 간격 이상한 noise (HR 30~150 -> HBI 0.4s ~ 2s로 SRATE 곱해주면 40~200)
        for k in range(len(bool_noise_ppg)):
            if not 40 < idx_ppg_peak[k+1] - idx_ppg_peak[k] < 200:
                bool_noise_ppg[k] = True
        for k in range(len(bool_noise_ecg)):
            if not 40 < idx_ecg_peak[k+1] - idx_ecg_peak[k] < 200:
                bool_noise_ecg[k] = True
                
        # 2.2 모양 이상한 noise
        # wave interval into same length(2s(200))
        len_wave = 200
        norm_seg_ppg, norm_seg_ecg = [], []

        for k in range(len(bool_noise_ppg)):
            len_interval_ppg = idx_ppg_peak[k+1] - idx_ppg_peak[k]
            
            # peak 사이 wave를 모두 같은 길이로 변환
            norm_seg_ppg.append([linear_connection(seg_ppg[idx_ppg_peak[k]:idx_ppg_peak[k+1]+1], n/len_wave*len_interval_ppg) for n in range(len_wave)])
        
        for k in range(len(bool_noise_ecg)):
            len_interval_ecg = idx_ecg_peak[k+1] - idx_ecg_peak[k]
            
            # peak 사이 wave를 모두 같은 길이로 변환
            norm_seg_ecg.append([linear_connection(seg_ecg[idx_ecg_peak[k]:idx_ecg_peak[k+1]+1], n/len_wave*len_interval_ecg) for n in range(len_wave)])
          
        
        # wave interval 사이 correlation 계산 - PPG
        mean_wave_ppg = np.nanmean(norm_seg_ppg, axis = 0)
        mean_wave_ppg = pd.DataFrame(mean_wave_ppg).fillna(method='ffill', axis=0).fillna(method='bfill', axis=0).values.flatten()
        norm_seg_ppg = pd.DataFrame(norm_seg_ppg).fillna(method='ffill', axis=1).fillna(method='bfill', axis=1).values
        for k in range(len(bool_noise_ppg)):
            if np.corrcoef(norm_seg_ppg[k], mean_wave_ppg)[0,1] < 0.9:
                bool_noise_ppg[k] = True
        noise_ppg_perc = np.sum(bool_noise_ppg) / len(bool_noise_ppg)
        
        # wave interval 사이 correlation 계산 - ECG                
        mean_wave_ecg = np.nanmean(norm_seg_ecg, axis = 0)
        mean_wave_ecg = pd.DataFrame(mean_wave_ecg).fillna(method='ffill', axis=0).fillna(method='bfill', axis=0).values.flatten()
        norm_seg_ecg = pd.DataFrame(norm_seg_ecg).fillna(method='ffill', axis=1).fillna(method='bfill', axis=1).values
        for k in range(len(bool_noise_ecg)):
            if np.corrcoef(norm_seg_ecg[k], mean_wave_ecg)[0,1] < 0.9:
                bool_noise_ecg[k] = True
        noise_ecg_perc = np.sum(bool_noise_ecg) / len(bool_noise_ecg)
        
        # segment의 noise 비율 정보
        noise_info = [noise_ppg_perc, noise_ecg_perc]
        
        # segment를 input으로 써도 되는지
        if nan_ppg_perc < 0.3 and nan_ecg_perc < 0.3 and nan_both_perc < 0.2 and noise_ppg_perc < 0.3 and noise_ecg_perc < 0.3:
            bool_pass = True
        else:
            bool_pass = False
       
        # 이 segment의 정보를 dataframe에 저장
        df_preprocess.loc[f_num-1,str(i+1)] = (bool_pass, nan_info, noise_info)        

    if f_num%1000 == 0:
        print('dumping cache of d_preprocess -', f_num, '/ 8318')
        pickle.dump(df_preprocess, open('cache/preprocess2/input1/df_preprocess_pacu_0-2000', 'wb'))
        
print('dumping cache of d_preprocess -', f_num, '/ 8318')
pickle.dump(df_preprocess, open('cache/preprocess2/input1/df_preprocess_pacu_0-2000', 'wb'))


###Input 1 / 8318: 3.0,249,PACU1_2_190503_132609.csv###
###Input 2 / 8318: 8.0,1444,PACU1_5_190723_131001.csv###
###Input 3 / 8318: 3.0,5650,PACU1_12_200604_165543.csv###
###Input 4 / 8318: 6.0,5450,PACU1_12_200528_152944.csv###
###Input 5 / 8318: 3.0,7909,PACU1_12_201008_143103.csv###
###Input 6 / 8318: 6.0,2882,PACU1_12_200109_150545.csv###
###Input 7 / 8318: 5.0,8045,PACU1_6_201016_093657.csv###
###Input 8 / 8318: 5.0,4027,PACU1_9_200225_093113.csv###
###Input 9 / 8318: 3.0,7238,PACU1_3_200827_194040.csv###
###Input 10 / 8318: 8.0,2271,PACU1_1_190909_143818.csv###
###Input 11 / 8318: 8.0,1895,PACU1_6_190828_082910.csv###
###Input 12 / 8318: 3.0,8130,PACU1_1_201020_220244.csv###
###Input 13 / 8318: 7.0,1406,PACU1_8_190722_124706.csv###
###Input 14 / 8318: 10.0,1119,PACU1_5_190710_164522.csv###
###Input 15 / 8318: 5.0,746,PACU1_2_190531_091955.csv###
###Input 16 / 8318: 6.0,599,PACU1_7_190524_091407.csv###
###Input 17 / 8318: 4.0,8255,PACU1_7_201027_110406.csv###
###Input 18 / 8318: 8

  c /= stddev[:, None]
  c /= stddev[None, :]


###Input 24 / 8318: 5.0,1330,PACU1_11_190718_145520.csv###
###Input 25 / 8318: 4.0,4785,PACU1_11_200325_125712.csv###
###Input 26 / 8318: 6.0,4395,PACU1_6_200310_121627.csv###
###Input 27 / 8318: 5.0,6936,PACU1_1_200810_122348.csv###
###Input 28 / 8318: 7.0,1281,PACU1_4_190717_172426.csv###
###Input 29 / 8318: 7.0,2364,PACU1_4_190912_000138.csv###
###Input 30 / 8318: 5.0,5145,PACU1_1_200514_144525.csv###
###Input 31 / 8318: 8.0,2927,PACU1_10_200110_130059.csv###
###Input 32 / 8318: 8.0,350,PACU1_3_190509_211631.csv###
###Input 33 / 8318: 9.0,1283,PACU1_5_190717_190617.csv###
###Input 34 / 8318: 3.0,3797,PACU1_2_200218_093808.csv###
###Input 35 / 8318: 5.0,3230,PACU1_6_200122_122323.csv###
###Input 36 / 8318: 5.0,1154,PACU1_2_190711_085132.csv###
###Input 37 / 8318: 9.0,7289,PACU1_2_200909_092156.csv###
###Input 38 / 8318: 3.0,3758,PACU1_2_200215_014656.csv###
###Input 39 / 8318: 6.0,1115,PACU1_2_190710_171004.csv###
###Input 40 / 8318: 6.0,209,PACU1_9_190502_142127.csv###
###Input 41 /

###Input 165 / 8318: 7.0,4229,PACU1_10_200303_162448.csv###
###Input 166 / 8318: 5.0,924,PACU1_3_190703_171251.csv###
###Input 167 / 8318: 5.0,6620,PACU1_6_200722_083316.csv###
###Input 168 / 8318: 3.0,7246,PACU1_1_200828_172552.csv###
###Input 169 / 8318: 8.0,2132,PACU1_7_190904_172040.csv###
###Input 170 / 8318: 3.0,874,PACU1_5_190612_163931.csv###
###Input 171 / 8318: 5.0,1512,PACU1_8_190816_152119.csv###
###Input 172 / 8318: 8.0,7221,PACU1_6_200825_095049.csv###
###Input 173 / 8318: 5.0,596,PACU1_4_190523_200356.csv###
###Input 174 / 8318: 4.0,7930,PACU1_1_201008_183613.csv###
###Input 175 / 8318: 5.0,5390,PACU1_2_200526_234216.csv###
###Input 176 / 8318: 5.0,3837,PACU1_11_200219_095754.csv###
###Input 177 / 8318: 8.0,2189,PACU1_7_190905_182232.csv###
###Input 178 / 8318: 5.0,3879,PACU1_8_200219_164737.csv###
###Input 179 / 8318: 5.0,1611,PACU1_2_190820_135649.csv###
###Input 180 / 8318: 7.0,6072,PACU1_12_200623_152304.csv###
###Input 181 / 8318: 4.0,1373,PACU1_7_190719_133554.csv#

###Input 304 / 8318: 6.0,3573,PACU1_11_200207_142904.csv###
###Input 305 / 8318: 5.0,7511,PACU1_3_200918_005602.csv###
###Input 306 / 8318: 5.0,1207,PACU1_3_190715_150624.csv###
###Input 307 / 8318: 5.0,6856,PACU1_7_200804_134606.csv###
###Input 308 / 8318: 6.0,3274,PACU1_3_200123_120538.csv###
###Input 309 / 8318: 5.0,4098,PACU1_7_200228_121855.csv###
###Input 310 / 8318: 3.0,8073,PACU1_4_201016_174655.csv###
###Input 311 / 8318: 5.0,6837,PACU1_12_200803_143028.csv###
###Input 312 / 8318: 7.0,3029,PACU1_2_200115_141012.csv###
###Input 313 / 8318: 4.0,3767,PACU1_5_200217_113708.csv###
###Input 314 / 8318: 4.0,2645,PACU1_5_191219_165854.csv###
###Input 315 / 8318: 6.0,385,PACU1_5_190513_153956.csv###
###Input 316 / 8318: 8.0,7245,PACU1_1_200828_144451.csv###
###Input 317 / 8318: 3.0,5626,PACU1_2_200604_090759.csv###
###Input 318 / 8318: 3.0,6634,PACU1_4_200722_125624.csv###
###Input 319 / 8318: 5.0,3271,PACU1_3_200123_120538.csv###
###Input 320 / 8318: 5.0,991,PACU1_11_190705_131344.csv

###Input 442 / 8318: 8.0,882,PACU1_1_190617_121710.csv###
###Input 443 / 8318: 7.0,2907,PACU1_4_200110_000846.csv###
###Input 444 / 8318: 5.0,3602,PACU1_5_200210_104932.csv###
###Input 445 / 8318: 5.0,5511,PACU1_3_200529_231452.csv###
###Input 446 / 8318: 3.0,7634,PACU1_6_200923_160908.csv###
###Input 447 / 8318: 6.0,1245,PACU1_1_190716_202834.csv###
###Input 448 / 8318: 3.0,1558,PACU1_10_190819_135024.csv###
###Input 449 / 8318: 8.0,2894,PACU1_2_200109_175440.csv###
###Input 450 / 8318: 6.0,3048,PACU1_9_200115_175359.csv###
###Input 451 / 8318: 5.0,3328,PACU1_3_200129_142528.csv###
###Input 452 / 8318: 7.0,5493,PACU1_11_200529_143500.csv###
###Input 453 / 8318: 6.0,1936,PACU1_5_190829_100712.csv###
###Input 454 / 8318: 6.0,3717,PACU1_2_200214_030259.csv###
###Input 455 / 8318: 4.0,319,PACU1_11_190509_095133.csv###
###Input 456 / 8318: 5.0,5126,PACU1_3_200513_213632.csv###
###Input 457 / 8318: 2.0,8220,PACU1_3_201024_122437.csv###
###Input 458 / 8318: 4.0,6962,PACU1_4_200811_115934.csv

###Input 581 / 8318: 5.0,752,PACU1_5_190531_101851.csv###
###Input 582 / 8318: 8.0,828,PACU1_3_190611_105333.csv###
###Input 583 / 8318: 2.0,2264,PACU1_10_190909_125533.csv###
###Input 584 / 8318: 2.0,4424,PACU1_3_200311_093245.csv###
###Input 585 / 8318: 5.0,5588,PACU1_9_200603_101036.csv###
###Input 586 / 8318: 2.0,1833,PACU1_5_190823_162702.csv###
###Input 587 / 8318: 6.0,6362,PACU1_3_200708_184828.csv###
###Input 588 / 8318: 3.0,1674,PACU1_3_190820_191402.csv###
###Input 589 / 8318: 10.0,6918,PACU1_6_200806_163447.csv###
###Input 590 / 8318: 8.0,2705,PACU1_8_200102_125258.csv###
###Input 591 / 8318: 8.0,3832,PACU1_6_200219_090110.csv###
###Input 592 / 8318: 5.0,3272,PACU1_5_200123_115000.csv###
###Input 593 / 8318: 4.0,6356,PACU1_7_200708_160355.csv###
###Input 594 / 8318: 4.0,4851,PACU1_8_200327_132531.csv###
###Input 595 / 8318: 5.0,8056,PACU1_9_201016_123439.csv###
###Input 596 / 8318: 3.0,931,PACU1_2_190703_215330.csv###
###Input 597 / 8318: 5.0,1647,PACU1_8_190820_163302.csv##

###Input 718 / 8318: 8.0,7422,PACU1_9_200916_092132.csv###
###Input 719 / 8318: 8.0,4494,PACU1_9_200313_135235.csv###
###Input 720 / 8318: 4.0,1549,PACU1_1_190816_184851.csv###
###Input 721 / 8318: 4.0,5252,PACU1_11_200519_170333.csv###
###Input 722 / 8318: 7.0,1790,PACU1_11_190822_145331.csv###
###Input 723 / 8318: 6.0,1463,PACU1_3_190723_195127.csv###
###Input 724 / 8318: 7.0,4234,PACU1_5_200303_195155.csv###
###Input 725 / 8318: 3.0,1570,PACU1_3_190819_205959.csv###
###Input 726 / 8318: 3.0,5436,PACU1_3_200528_113953.csv###
###Input 727 / 8318: 8.0,6034,PACU1_11_200619_153347.csv###
###Input 728 / 8318: 2.0,1521,PACU1_1_190816_154204.csv###
###Input 729 / 8318: 2.0,6741,PACU1_11_200728_132208.csv###
###Input 730 / 8318: 5.0,1088,PACU1_9_190710_093206.csv###
###Input 731 / 8318: 5.0,2179,PACU1_4_190905_150215.csv###
###Input 732 / 8318: 8.0,3139,PACU1_9_200120_121646.csv###
###Input 733 / 8318: 6.0,988,PACU1_11_190705_131344.csv###
###Input 734 / 8318: 7.0,4501,PACU1_7_200313_151802.

###Input 857 / 8318: 4.0,1052,PACU1_4_190709_112306.csv###
###Input 858 / 8318: 3.0,2829,PACU1_7_200108_090440.csv###
###Input 859 / 8318: 5.0,765,PACU1_5_190531_144225.csv###
###Input 860 / 8318: 3.0,527,PACU1_5_190516_174457.csv###
###Input 861 / 8318: 5.0,4533,PACU1_7_200316_154805.csv###
###Input 862 / 8318: 5.0,2554,PACU1_2_191113_114438.csv###
###Input 863 / 8318: 7.0,6361,PACU1_3_200708_184828.csv###
###Input 864 / 8318: 5.0,1588,PACU1_6_190820_105347.csv###
###Input 865 / 8318: 7.0,8123,PACU1_5_201020_171155.csv###
###Input 866 / 8318: 4.0,1390,PACU1_3_190719_154655.csv###
###Input 867 / 8318: 3.0,7206,PACU1_3_200821_223504.csv###
###Input 868 / 8318: 5.0,8081,PACU1_3_201019_094609.csv###
###Input 869 / 8318: 4.0,5767,PACU1_12_200610_094140.csv###
###Input 870 / 8318: 4.0,5195,PACU1_6_200515_165154.csv###
###Input 871 / 8318: 2.0,4947,PACU1_1_200506_135811.csv###
###Input 872 / 8318: 3.0,8269,PACU1_5_201027_151403.csv###
###Input 873 / 8318: 8.0,2479,PACU1_3_191021_093930.csv##

###Input 995 / 8318: 4.0,5615,PACU1_10_200603_164857.csv###
###Input 996 / 8318: 8.0,6348,PACU1_5_200708_142001.csv###
###Input 997 / 8318: 5.0,7715,PACU1_8_200925_162959.csv###
###Input 998 / 8318: 6.0,1159,PACU1_6_190712_083200.csv###
###Input 999 / 8318: 7.0,643,PACU1_4_190527_165508.csv###
###Input 1000 / 8318: 5.0,1853,PACU1_1_190827_103157.csv###
dumping cache of d_preprocess - 1000 / 3888
###Input 1001 / 8318: 6.0,5017,PACU1_5_200508_130334.csv###
###Input 1002 / 8318: 6.0,5530,PACU1_5_200601_142745.csv###
###Input 1003 / 8318: 8.0,2587,PACU1_9_191128_144712.csv###
###Input 1004 / 8318: 2.0,5979,PACU1_8_200617_155220.csv###
###Input 1005 / 8318: 3.0,4794,PACU1_3_200325_152610.csv###
###Input 1006 / 8318: 3.0,3283,PACU1_3_200123_162554.csv###
###Input 1007 / 8318: 9.0,3747,PACU1_1_200214_181636.csv###
###Input 1008 / 8318: 6.0,3135,PACU1_3_200120_100745.csv###
###Input 1009 / 8318: 8.0,4799,PACU1_9_200325_171903.csv###
###Input 1010 / 8318: 7.0,5407,PACU1_9_200527_133938.csv###
#

###Input 1131 / 8318: 7.0,612,PACU1_7_190524_125100.csv###
no existing PPG peaks:  7.0,612,PACU1_7_190524_125100.csv
###Input 1132 / 8318: 6.0,1422,PACU1_3_190723_052629.csv###
###Input 1133 / 8318: 7.0,3186,PACU1_6_200121_170016.csv###
###Input 1134 / 8318: 7.0,6015,PACU1_7_200618_171846.csv###
###Input 1135 / 8318: 6.0,2139,PACU1_4_190904_213656.csv###
###Input 1136 / 8318: 4.0,3443,PACU1_8_200204_114900.csv###
###Input 1137 / 8318: 4.0,3604,PACU1_3_200210_112555.csv###
###Input 1138 / 8318: 7.0,648,PACU1_3_190528_000714.csv###
###Input 1139 / 8318: 5.0,6937,PACU1_6_200810_142429.csv###
###Input 1140 / 8318: 9.0,4802,PACU1_6_200325_180343.csv###
###Input 1141 / 8318: 7.0,2888,PACU1_1_200109_162237.csv###
###Input 1142 / 8318: 4.0,4205,PACU1_5_200302_200759.csv###
###Input 1143 / 8318: 5.0,7663,PACU1_1_200924_114932.csv###
###Input 1144 / 8318: 5.0,4980,PACU1_6_200507_161903.csv###
###Input 1145 / 8318: 4.0,6782,PACU1_1_200730_115233.csv###
###Input 1146 / 8318: 3.0,577,PACU1_4_190523

###Input 1267 / 8318: 8.0,3053,PACU1_1_200115_235547.csv###
###Input 1268 / 8318: 8.0,546,PACU1_3_190517_120132.csv###
###Input 1269 / 8318: 5.0,586,PACU1_4_190523_164213.csv###
###Input 1270 / 8318: 5.0,3422,PACU1_7_200203_141134.csv###
###Input 1271 / 8318: 7.0,4649,PACU1_5_200318_202045.csv###
###Input 1272 / 8318: 5.0,993,PACU1_11_190705_131344.csv###
###Input 1273 / 8318: 4.0,1715,PACU1_2_190821_115808.csv###
###Input 1274 / 8318: 4.0,5098,PACU1_2_200512_231841.csv###
###Input 1275 / 8318: 5.0,8191,PACU1_5_201022_140109.csv###
###Input 1276 / 8318: 5.0,2158,PACU1_3_190905_113119.csv###
###Input 1277 / 8318: 8.0,6058,PACU1_4_200622_202654.csv###
###Input 1278 / 8318: 5.0,5583,PACU1_1_200603_085151.csv###
###Input 1279 / 8318: 6.0,8050,PACU1_4_201016_121452.csv###
###Input 1280 / 8318: 8.0,3090,PACU1_3_200117_000917.csv###
###Input 1281 / 8318: 3.0,1232,PACU1_5_190716_125505.csv###
###Input 1282 / 8318: 3.0,4478,PACU1_4_200312_204007.csv###
###Input 1283 / 8318: 3.0,3387,PACU1_2_200

###Input 1404 / 8318: 4.0,784,PACU1_6_190603_093744.csv###
###Input 1405 / 8318: 8.0,2344,PACU1_7_190911_131655.csv###
###Input 1406 / 8318: 8.0,3627,PACU1_3_200210_212804.csv###
###Input 1407 / 8318: 3.0,3757,PACU1_2_200215_014656.csv###
###Input 1408 / 8318: 8.0,3089,PACU1_3_200117_000917.csv###
###Input 1409 / 8318: 6.0,444,PACU1_11_190514_165036.csv###
###Input 1410 / 8318: 3.0,1561,PACU1_8_190819_140625.csv###
###Input 1411 / 8318: 7.0,411,PACU1_4_190514_113852.csv###
###Input 1412 / 8318: 4.0,2879,PACU1_3_200109_124545.csv###
###Input 1413 / 8318: 5.0,6256,PACU1_2_200703_092105.csv###
###Input 1414 / 8318: 4.0,5693,PACU1_8_200605_163631.csv###
###Input 1415 / 8318: 4.0,1061,PACU1_7_190709_140636.csv###
###Input 1416 / 8318: 4.0,1184,PACU1_4_190712_164052.csv###
###Input 1417 / 8318: 5.0,2520,PACU1_7_191023_123318.csv###
###Input 1418 / 8318: 8.0,5193,PACU1_10_200515_165624.csv###
###Input 1419 / 8318: 7.0,960,PACU1_12_190704_164600.csv###
###Input 1420 / 8318: 4.0,2878,PACU1_7_20

###Input 1541 / 8318: 4.0,1254,PACU1_5_190717_082658.csv###
###Input 1542 / 8318: 3.0,2696,PACU1_3_200102_100649.csv###
###Input 1543 / 8318: 7.0,4077,PACU1_3_200228_001842.csv###
###Input 1544 / 8318: 7.0,1030,PACU1_5_190708_180445.csv###
###Input 1545 / 8318: 3.0,6187,PACU1_6_200630_142911.csv###
###Input 1546 / 8318: 8.0,1333,PACU1_9_190718_154644.csv###
###Input 1547 / 8318: 3.0,5347,PACU1_1_200525_161225.csv###
###Input 1548 / 8318: 4.0,291,PACU1_3_190505_210427.csv###
###Input 1549 / 8318: 6.0,2322,PACU1_6_190910_173346.csv###
###Input 1550 / 8318: 5.0,748,PACU1_1_190531_092308.csv###
###Input 1551 / 8318: 9.0,4066,PACU1_9_200227_172002.csv###
###Input 1552 / 8318: 3.0,3908,PACU1_4_200220_160451.csv###
###Input 1553 / 8318: 4.0,4923,PACU1_10_200331_165136.csv###
###Input 1554 / 8318: 5.0,5070,PACU1_1_200512_102246.csv###
###Input 1555 / 8318: 6.0,5804,PACU1_7_200611_105257.csv###
###Input 1556 / 8318: 6.0,4220,PACU1_6_200303_123252.csv###
###Input 1557 / 8318: 5.0,855,PACU1_5_190

###Input 1677 / 8318: 2.0,5177,PACU1_1_200515_133304.csv###
###Input 1678 / 8318: 5.0,439,PACU1_3_190514_162201.csv###
###Input 1679 / 8318: 3.0,2401,PACU1_5_190917_164400.csv###
###Input 1680 / 8318: 6.0,2977,PACU1_5_200113_191451.csv###
###Input 1681 / 8318: 6.0,2665,PACU1_1_191223_155705.csv###
###Input 1682 / 8318: 9.0,2597,PACU1_1_191203_215005.csv###
###Input 1683 / 8318: 3.0,234,PACU1_6_190503_090717.csv###
###Input 1684 / 8318: 3.0,2199,PACU1_1_190906_085600.csv###
###Input 1685 / 8318: 6.0,6392,PACU1_3_200710_163122.csv###
###Input 1686 / 8318: 8.0,4678,PACU1_4_200319_193541.csv###
###Input 1687 / 8318: 10.0,688,PACU1_6_190529_082821.csv###
###Input 1688 / 8318: 7.0,2491,PACU1_3_191021_160814.csv###
###Input 1689 / 8318: 9.0,6966,PACU1_2_200811_124816.csv###
###Input 1690 / 8318: 4.0,5010,PACU1_9_200508_114612.csv###
###Input 1691 / 8318: 4.0,2760,PACU1_5_200103_145348.csv###
###Input 1692 / 8318: 5.0,3811,PACU1_3_200218_154242.csv###
###Input 1693 / 8318: 4.0,7762,PACU1_4_200

###Input 1814 / 8318: 5.0,4944,PACU1_6_200504_142621.csv###
###Input 1815 / 8318: 3.0,24,PACU1_2_190410_191639.csv###
###Input 1816 / 8318: 2.0,6455,PACU1_3_200715_094143.csv###
###Input 1817 / 8318: 2.0,1553,PACU1_3_190819_082733.csv###
###Input 1818 / 8318: 5.0,6252,PACU1_5_200702_181420.csv###
###Input 1819 / 8318: 5.0,7111,PACU1_1_200819_145226.csv###
###Input 1820 / 8318: 2.0,3950,PACU1_1_200221_121631.csv###
###Input 1821 / 8318: 5.0,8181,PACU1_7_201022_102105.csv###
###Input 1822 / 8318: 3.0,2955,PACU1_7_200113_094954.csv###
###Input 1823 / 8318: 3.0,5635,PACU1_4_200604_110216.csv###
###Input 1824 / 8318: 4.0,4442,PACU1_8_200311_134715.csv###
###Input 1825 / 8318: 5.0,5277,PACU1_10_200520_122755.csv###
###Input 1826 / 8318: 6.0,7734,PACU1_3_200928_145817.csv###
###Input 1827 / 8318: 9.0,1305,PACU1_7_190718_114824.csv###
###Input 1828 / 8318: 5.0,2326,PACU1_4_190910_180102.csv###
###Input 1829 / 8318: 6.0,777,PACU1_4_190531_192901.csv###
###Input 1830 / 8318: 4.0,7607,PACU1_6_200

###Input 1950 / 8318: 5.0,2403,PACU1_3_190917_173755.csv###
###Input 1951 / 8318: 3.0,1188,PACU1_10_190712_185226.csv###
###Input 1952 / 8318: 5.0,7833,PACU1_2_201006_183602.csv###
###Input 1953 / 8318: 5.0,8367,PACU1_11_201030_143744.csv###
###Input 1954 / 8318: 5.0,1763,PACU1_1_190821_201207.csv###
###Input 1955 / 8318: 6.0,1225,PACU1_3_190716_095311.csv###
###Input 1956 / 8318: 6.0,2789,PACU1_6_200106_171021.csv###
###Input 1957 / 8318: 6.0,4957,PACU1_2_200507_002522.csv###
###Input 1958 / 8318: 6.0,1060,PACU1_1_190709_130101.csv###
###Input 1959 / 8318: 6.0,6172,PACU1_3_200629_194734.csv###
###Input 1960 / 8318: 4.0,8182,PACU1_7_201022_115230.csv###
###Input 1961 / 8318: 7.0,3487,PACU1_8_200205_120101.csv###
###Input 1962 / 8318: 7.0,245,PACU1_4_190503_115752.csv###
###Input 1963 / 8318: 6.0,6129,PACU1_6_200625_163959.csv###
###Input 1964 / 8318: 3.0,1321,PACU1_6_190718_135114.csv###
###Input 1965 / 8318: 4.0,2862,PACU1_1_200109_095159.csv###
###Input 1966 / 8318: 5.0,169,PACU1_10_

In [5]:
# input 설정
LEN_INPUT = 10 # input 10s
LEN_PER_NRS = 120 # vital length for each NRS
OVERLAP = 5
n_aug = int((LEN_PER_NRS-LEN_INPUT)/OVERLAP) + 1 # data augmentation 개수


# vital data 저장 경로
vital_path = '../../cranberry2/Preprocessing/preop_vital/preop'
f_vital_list = os.listdir(vital_path)

# 전처리 정보를 담을 Dataframe
column_list = ['file_path'] + [str(i+1) for i in range(n_aug)] #+ ['NRS']
df_preprocess = pd.DataFrame(columns = column_list)


SRATE, f_num = 100, 1000
for f_vital in f_vital_list[1000:2000]:
    f_num += 1
    
    print('###Input', f_num,'/ '+str(len(f_vital_list))+': '+f_vital+'###')
    
    # ppg, ecg peaks 불러오기
    # peaks가 없는 경우는 ECG나 PPG data가 없는 case들
    if not os.path.exists('../../cranberry2/Preprocessing/cache/PPG_peaks/'+f_vital):
        print('no existing PPG peaks: ', f_vital)
        continue
    if not os.path.exists('../../cranberry2/Preprocessing/cache/ECG_peaks/'+f_vital):
        print('no existing ECG peaks: ', f_vital)
        continue
    
    # vital data 불러오기    
    df_vital = pickle.load(open(vital_path+'/'+f_vital, 'rb')).reset_index()
    
    #dataframe에 새로운 행 만들기
    df_preprocess.loc[f_num-1,'file_path'] = f_vital
        
    ppg_min, ppg_peak = pickle.load(open('../../cranberry2/Preprocessing/cache/PPG_peaks/'+f_vital, 'rb'))
    ecg_peak = pickle.load(open('../../cranberry2/Preprocessing/cache/ECG_peaks/'+f_vital, 'rb'))
    
    ppg_min, ppg_peak = np.array([ppg_min]), np.array([ppg_peak])
    ecg_peak= np.array([ecg_peak])
    
    
    # 10초 단위로 끊기
    for i in range(n_aug):
        start_idx = i*OVERLAP*SRATE # 500i
        end_idx = (i*OVERLAP + LEN_INPUT)*SRATE # 500i + 1000
        
        seg_ppg, seg_ecg = [np.nan for j in range(LEN_INPUT*SRATE)], [np.nan for j in range(LEN_INPUT*SRATE)]
        df_vital_input = df_vital.loc[start_idx:end_idx-1]
        seg_ppg[0:len(df_vital_input)] = df_vital_input['Pleth'].tolist()
        seg_ecg[0:len(df_vital_input)] = df_vital_input['ECG'].tolist()
    
        ### 1. 결측치 처리 ###              
        # df.isnull().sum() 하면 더 간단하게 가능하나 애초에 NRS에 해당하는 vital data가 120초 보다 짧은 경우
        nan_ppg_list = np.isnan(seg_ppg)
        nan_ecg_list = np.isnan(seg_ecg)
        nan_ppg_perc = np.sum(nan_ppg_list) / LEN_INPUT / SRATE
        nan_ecg_perc = np.sum(nan_ecg_list) / LEN_INPUT / SRATE
        
        # ECG, PPG 둘다 결측치인 부분
        nan_both_perc = 0
        for j in range(len(seg_ppg)):
            if nan_ppg_list[j] and  nan_ecg_list[j]:
                nan_both_perc += 1
        nan_both_perc /= (LEN_INPUT*SRATE)
            
        # segment의 결측치 비율 정보
        nan_info = [nan_ppg_perc, nan_ecg_perc, nan_both_perc]
        
        # 결측치가 많은 경우, noise 확인할 것도 없이 False -  이 경우의 noise_info는 -1로 처리
        if nan_ppg_perc > 0.3 or nan_ecg_perc > 0.3 or nan_both_perc > 0.2:
            df_preprocess.loc[f_num-1,str(i+1)] = (False, nan_info, [-1, -1])
            continue
            
        

        ### 2. Noise 처리 ###
        # 10초 segment 내의 ppg, ecg peak idx
        #seg_ppg_min = ppg_min[(start_idx<=np.array(ppg_min)) & (np.array(ppg_min)<end_idx)]
        idx_ppg_peak = ppg_peak[(start_idx<=ppg_peak) & (ppg_peak<end_idx)] - start_idx
        idx_ecg_peak = ecg_peak[(start_idx<=ecg_peak) & (ecg_peak<end_idx)] - start_idx
        
        # peak가 5개 이하는 noise가 많이 낀 상황 (10초 구간 중 peak가 7초 이상 없으면 문제 -> 즉 peak 개수 범위는 7/2 ~ 7/0.4)
        # 따라서 peak가 7초 이상 있어야하고 이때 최소 peak 개수는 3.5개
        # peak 개수가 기준 미달이면 noise 계산 자세히 할 필요없이 False - 이 경우의 noise_info는 -2로 처리
        if len(idx_ppg_peak)<=4 or len(idx_ecg_peak)<=4:
            df_preprocess.loc[f_num-1,str(i+1)] = (False, nan_info, [-2, -2])
            continue

            
        # 10초 segment 내의 ppg, ecg peak value
        #print(len(seg_ppg), idx_ppg_peak)
        val_ppg_peak = [seg_ppg[k] for k in idx_ppg_peak]
        val_ecg_peak = [seg_ecg[k] for k in idx_ecg_peak]
        
        # peak와 peak 사이 interval에 대한 noise 여부 -> 따라서 길이는 peak - 1
        bool_noise_ppg = [False for k in range(len(idx_ppg_peak)-1)]
        bool_noise_ecg = [False for k in range(len(idx_ecg_peak)-1)]
        
        #  2.1 peak 간격 이상한 noise (HR 30~150 -> HBI 0.4s ~ 2s로 SRATE 곱해주면 40~200)
        for k in range(len(bool_noise_ppg)):
            if not 40 < idx_ppg_peak[k+1] - idx_ppg_peak[k] < 200:
                bool_noise_ppg[k] = True
        for k in range(len(bool_noise_ecg)):
            if not 40 < idx_ecg_peak[k+1] - idx_ecg_peak[k] < 200:
                bool_noise_ecg[k] = True
                
        # 2.2 모양 이상한 noise
        # wave interval into same length(2s(200))
        len_wave = 200
        norm_seg_ppg, norm_seg_ecg = [], []

        for k in range(len(bool_noise_ppg)):
            len_interval_ppg = idx_ppg_peak[k+1] - idx_ppg_peak[k]
            
            # peak 사이 wave를 모두 같은 길이로 변환
            norm_seg_ppg.append([linear_connection(seg_ppg[idx_ppg_peak[k]:idx_ppg_peak[k+1]+1], n/len_wave*len_interval_ppg) for n in range(len_wave)])
        
        for k in range(len(bool_noise_ecg)):
            len_interval_ecg = idx_ecg_peak[k+1] - idx_ecg_peak[k]
            
            # peak 사이 wave를 모두 같은 길이로 변환
            norm_seg_ecg.append([linear_connection(seg_ecg[idx_ecg_peak[k]:idx_ecg_peak[k+1]+1], n/len_wave*len_interval_ecg) for n in range(len_wave)])
          
        
        # wave interval 사이 correlation 계산 - PPG
        mean_wave_ppg = np.nanmean(norm_seg_ppg, axis = 0)
        mean_wave_ppg = pd.DataFrame(mean_wave_ppg).fillna(method='ffill', axis=0).fillna(method='bfill', axis=0).values.flatten()
        norm_seg_ppg = pd.DataFrame(norm_seg_ppg).fillna(method='ffill', axis=1).fillna(method='bfill', axis=1).values
        for k in range(len(bool_noise_ppg)):
            if np.corrcoef(norm_seg_ppg[k], mean_wave_ppg)[0,1] < 0.9:
                bool_noise_ppg[k] = True
        noise_ppg_perc = np.sum(bool_noise_ppg) / len(bool_noise_ppg)
        
        # wave interval 사이 correlation 계산 - ECG                
        mean_wave_ecg = np.nanmean(norm_seg_ecg, axis = 0)
        mean_wave_ecg = pd.DataFrame(mean_wave_ecg).fillna(method='ffill', axis=0).fillna(method='bfill', axis=0).values.flatten()
        norm_seg_ecg = pd.DataFrame(norm_seg_ecg).fillna(method='ffill', axis=1).fillna(method='bfill', axis=1).values
        for k in range(len(bool_noise_ecg)):
            if np.corrcoef(norm_seg_ecg[k], mean_wave_ecg)[0,1] < 0.9:
                bool_noise_ecg[k] = True
        noise_ecg_perc = np.sum(bool_noise_ecg) / len(bool_noise_ecg)
        
        # segment의 noise 비율 정보
        noise_info = [noise_ppg_perc, noise_ecg_perc]
        
        # segment를 input으로 써도 되는지
        if nan_ppg_perc < 0.3 and nan_ecg_perc < 0.3 and nan_both_perc < 0.2 and noise_ppg_perc < 0.3 and noise_ecg_perc < 0.3:
            bool_pass = True
        else:
            bool_pass = False
       
        # 이 segment의 정보를 dataframe에 저장
        df_preprocess.loc[f_num-1,str(i+1)] = (bool_pass, nan_info, noise_info)        

    if f_num%1000 == 0:
        print('dumping cache of d_preprocess -', f_num, '/ 3888')
        pickle.dump(df_preprocess, open('cache/preprocess2/input1/df_preprocess_preop_1000-2000', 'wb'))
        
print('dumping cache of d_preprocess -', f_num, '/ 3888')
pickle.dump(df_preprocess, open('cache/preprocess2/input1/df_preprocess_preop_1000-2000', 'wb'))


###Input 1001 / 3888: 0.0,1894,PACU1_11_190828_091710.csv###
###Input 1002 / 3888: 0.0,2704,PACU1_1_200102_130054.csv###
###Input 1003 / 3888: 0.0,6068,PACU1_5_200623_124014.csv###
###Input 1004 / 3888: 0.0,2390,PACU1_2_190917_121200.csv###
###Input 1005 / 3888: 0.0,6453,PACU1_7_200715_094134.csv###
###Input 1006 / 3888: 0.0,2989,PACU1_3_200114_125126.csv###
###Input 1007 / 3888: 0.0,6360,PACU1_3_200708_184828.csv###
###Input 1008 / 3888: 0.0,619,PACU1_10_190524_154757.csv###
###Input 1009 / 3888: 0.0,6864,PACU1_10_200804_174809.csv###
###Input 1010 / 3888: 0.0,1879,PACU1_8_190827_172707.csv###
###Input 1011 / 3888: 0.0,8173,PACU1_10_201022_103506.csv###
###Input 1012 / 3888: 0.0,4421,PACU1_7_200311_091556.csv###
###Input 1013 / 3888: 0.0,3198,PACU1_5_200121_191449.csv###
###Input 1014 / 3888: 0.0,6229,PACU1_6_200701_175602.csv###
###Input 1015 / 3888: 0.0,1453,PACU1_6_190723_162355.csv###
###Input 1016 / 3888: 0.0,3936,PACU1_9_200221_100530.csv###
###Input 1017 / 3888: 0.0,7259,PACU1_

###Input 1137 / 3888: 0.0,2120,PACU1_10_190904_145834.csv###
###Input 1138 / 3888: 0.0,3381,PACU1_2_200130_220200.csv###
###Input 1139 / 3888: 0.0,8265,PACU1_4_201027_124103.csv###
###Input 1140 / 3888: 0.0,2067,PACU1_10_190903_134424.csv###
###Input 1141 / 3888: 0.0,906,PACU1_6_190703_121905.csv###
###Input 1142 / 3888: 0.0,3110,PACU1_5_200117_143141.csv###
###Input 1143 / 3888: 0.0,5561,PACU1_1_200602_160011.csv###
###Input 1144 / 3888: 0.0,348,PACU1_6_190509_181726.csv###
###Input 1145 / 3888: 0.0,7021,PACU1_10_200813_125054.csv###
###Input 1146 / 3888: 0.0,1945,PACU1_12_190829_124001.csv###
###Input 1147 / 3888: 0.0,1599,PACU1_10_190820_124105.csv###
###Input 1148 / 3888: 0.0,2428,PACU1_6_191017_090815.csv###
###Input 1149 / 3888: 0.0,1083,PACU1_2_190709_224023.csv###
###Input 1150 / 3888: 0.0,5401,PACU1_1_200527_113207.csv###
###Input 1151 / 3888: 0.0,3801,PACU1_5_200218_110158.csv###
###Input 1152 / 3888: 0.0,5578,PACU1_1_200602_222624.csv###
###Input 1153 / 3888: 0.0,5786,PACU1_

###Input 1275 / 3888: 0.0,6379,PACU1_10_200710_124303.csv###
###Input 1276 / 3888: 0.0,3648,PACU1_9_200211_152643.csv###
###Input 1277 / 3888: 0.0,6034,PACU1_11_200619_153347.csv###
###Input 1278 / 3888: 0.0,5899,PACU1_9_200615_171108.csv###
###Input 1279 / 3888: 0.0,6835,PACU1_8_200803_135332.csv###
###Input 1280 / 3888: 0.0,1630,PACU1_4_190820_155329.csv###
###Input 1281 / 3888: 0.0,1537,PACU1_7_190816_172125.csv###
###Input 1282 / 3888: 0.0,5946,PACU1_10_200616_173958.csv###
###Input 1283 / 3888: 0.0,8240,PACU1_4_201026_234119.csv###
###Input 1284 / 3888: 0.0,6589,PACU1_7_200720_125233.csv###
###Input 1285 / 3888: 0.0,8208,PACU1_3_201023_192748.csv###
###Input 1286 / 3888: 0.0,168,PACU1_10_190430_120158.csv###
###Input 1287 / 3888: 0.0,2472,PACU1_1_191018_163240.csv###
###Input 1288 / 3888: 0.0,7947,PACU1_11_201012_135145.csv###
###Input 1289 / 3888: 0.0,6018,PACU1_5_200618_225353.csv###
###Input 1290 / 3888: 0.0,5428,PACU1_2_200527_215853.csv###
###Input 1291 / 3888: 0.0,4457,PACU1

###Input 1412 / 3888: 0.0,4899,PACU1_2_200331_102220.csv###
###Input 1413 / 3888: 0.0,5573,PACU1_5_200602_195947.csv###
###Input 1414 / 3888: 0.0,7346,PACU1_6_200911_112406.csv###
no existing PPG peaks:  0.0,7346,PACU1_6_200911_112406.csv
###Input 1415 / 3888: 0.0,6723,PACU1_10_200727_160240.csv###
###Input 1416 / 3888: 0.0,1168,PACU1_4_190712_105830.csv###
###Input 1417 / 3888: 0.0,1074,PACU1_10_190709_172441.csv###
###Input 1418 / 3888: 0.0,4051,PACU1_2_200225_180030.csv###
###Input 1419 / 3888: 0.0,5717,PACU1_4_200608_160636.csv###
###Input 1420 / 3888: 0.0,713,PACU1_5_190529_185037.csv###
###Input 1421 / 3888: 0.0,963,PACU1_1_190704_160741.csv###
###Input 1422 / 3888: 0.0,6935,PACU1_1_200810_122348.csv###
###Input 1423 / 3888: 0.0,2514,PACU1_7_191023_084609.csv###
###Input 1424 / 3888: 0.0,7848,PACU1_11_201007_102740.csv###
###Input 1425 / 3888: 0.0,1839,PACU1_1_190823_200839.csv###
###Input 1426 / 3888: 0.0,208,PACU1_7_190502_141009.csv###
###Input 1427 / 3888: 0.0,7396,PACU1_6_20

###Input 1549 / 3888: 0.0,7960,PACU1_1_201013_033301.csv###
###Input 1550 / 3888: 0.0,1531,PACU1_2_190816_165812.csv###
###Input 1551 / 3888: 0.0,5540,PACU1_6_200602_093149.csv###
###Input 1552 / 3888: 0.0,1899,PACU1_7_190828_102153.csv###
###Input 1553 / 3888: 0.0,407,PACU1_7_190514_104520.csv###
###Input 1554 / 3888: 0.0,7533,PACU1_8_200918_163328.csv###
###Input 1555 / 3888: 0.0,6885,PACU1_6_200805_154052.csv###
###Input 1556 / 3888: 0.0,6127,PACU1_5_200625_154758.csv###
###Input 1557 / 3888: 0.0,634,PACU1_3_190527_143739.csv###
###Input 1558 / 3888: 0.0,7219,PACU1_3_200824_163114.csv###
###Input 1559 / 3888: 0.0,34,PACU1_1_190412_115835.csv###
###Input 1560 / 3888: 0.0,5304,PACU1_4_200521_181817.csv###
###Input 1561 / 3888: 0.0,5006,PACU1_7_200508_113700.csv###
###Input 1562 / 3888: 0.0,3173,PACU1_5_200121_131155.csv###
###Input 1563 / 3888: 0.0,6204,PACU1_11_200701_094103.csv###
###Input 1564 / 3888: 0.0,4869,PACU1_3_200329_194034.csv###
###Input 1565 / 3888: 0.0,3409,PACU1_1_2001

###Input 1685 / 3888: 0.0,8101,PACU1_3_201019_204425.csv###
###Input 1686 / 3888: 0.0,5926,PACU1_9_200616_130900.csv###
###Input 1687 / 3888: 0.0,5185,PACU1_3_200515_151431.csv###
###Input 1688 / 3888: 0.0,7609,PACU1_1_200923_101852.csv###
###Input 1689 / 3888: 0.0,8239,PACU1_6_201026_173818.csv###
###Input 1690 / 3888: 0.0,8275,PACU1_1_201027_170907.csv###
###Input 1691 / 3888: 0.0,3746,PACU1_4_200214_180535.csv###
###Input 1692 / 3888: 0.0,7582,PACU1_5_200922_113530.csv###
###Input 1693 / 3888: 0.0,8115,PACU1_11_201020_150248.csv###
###Input 1694 / 3888: 0.0,139,PACU1_4_190429_125630.csv###
###Input 1695 / 3888: 0.0,6686,PACU1_7_200724_140045.csv###
###Input 1696 / 3888: 0.0,4032,PACU1_2_200225_114356.csv###
###Input 1697 / 3888: 0.0,7901,PACU1_7_201008_130818.csv###
###Input 1698 / 3888: 0.0,7131,PACU1_3_200819_235609.csv###
###Input 1699 / 3888: 0.0,7489,PACU1_7_200917_125724.csv###
###Input 1700 / 3888: 0.0,5780,PACU1_7_200610_145022.csv###
###Input 1701 / 3888: 0.0,4673,PACU1_11_

###Input 1823 / 3888: 0.0,1003,PACU1_5_190705_170202.csv###
###Input 1824 / 3888: 0.0,7273,PACU1_3_200902_110226.csv###
###Input 1825 / 3888: 0.0,1665,PACU1_1_190820_170051.csv###
###Input 1826 / 3888: 0.0,1709,PACU1_8_190821_110201.csv###
###Input 1827 / 3888: 0.0,525,PACU1_10_190516_171941.csv###
###Input 1828 / 3888: 0.0,4267,PACU1_4_200304_165501.csv###
###Input 1829 / 3888: 0.0,1398,PACU1_4_190720_041113.csv###
###Input 1830 / 3888: 0.0,5890,PACU1_5_200615_151715.csv###
###Input 1831 / 3888: 0.0,4691,PACU1_4_200320_114831.csv###
###Input 1832 / 3888: 0.0,3700,PACU1_1_200213_204423.csv###
###Input 1833 / 3888: 0.0,4175,PACU1_2_200302_160158.csv###
###Input 1834 / 3888: 0.0,1639,PACU1_4_190820_163339.csv###
###Input 1835 / 3888: 0.0,2118,PACU1_6_190904_143103.csv###
###Input 1836 / 3888: 0.0,141,PACU1_12_190429_133559.csv###
###Input 1837 / 3888: 0.0,1039,PACU1_5_190709_092355.csv###
###Input 1838 / 3888: 0.0,543,PACU1_8_190517_115535.csv###
###Input 1839 / 3888: 0.0,4494,PACU1_9_20

###Input 1959 / 3888: 0.0,8297,PACU1_5_201028_091628.csv###
###Input 1960 / 3888: 0.0,5706,PACU1_1_200608_095337.csv###
###Input 1961 / 3888: 0.0,5054,PACU1_1_200511_181056.csv###
###Input 1962 / 3888: 0.0,6141,PACU1_8_200626_105734.csv###
###Input 1963 / 3888: 0.0,6442,PACU1_2_200714_191239.csv###
###Input 1964 / 3888: 0.0,6272,PACU1_10_200706_103337.csv###
###Input 1965 / 3888: 0.0,1547,PACU1_1_190816_184851.csv###
###Input 1966 / 3888: 0.0,3098,PACU1_1_200117_102315.csv###
###Input 1967 / 3888: 0.0,2132,PACU1_7_190904_172040.csv###
###Input 1968 / 3888: 0.0,3768,PACU1_1_200217_115421.csv###
###Input 1969 / 3888: 0.0,3623,PACU1_1_200210_210020.csv###
###Input 1970 / 3888: 0.0,3593,PACU1_10_200207_183722.csv###
###Input 1971 / 3888: 0.0,5293,PACU1_8_200521_103754.csv###
###Input 1972 / 3888: 0.0,3640,PACU1_5_200211_113809.csv###
###Input 1973 / 3888: 0.0,4677,PACU1_4_200319_193541.csv###
###Input 1974 / 3888: 0.0,3233,PACU1_1_200122_134827.csv###
###Input 1975 / 3888: 0.0,1253,PACU1_4