In [24]:
import os
import re
import numpy as np
import pandas as pd
import multiprocessing
from scipy import signal
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
import keras
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from model_src.DilatedResNet import DilatedResNet
from model_src.BianResnet import BianResNet
from model_src.LSTM import VanillaLSTM, CNNLSTM, BiLSTM, BiLSTMAttn
from model_src.RespNet import RespNet

DATA_PATH = '../../DataLake/bidmc_csv'
DATA_PATH = '../../DataLake/Capnobase/data/csv'
regex_bidmc = re.compile('bidmc_[0-9]+')
regex_capno = re.compile('[0-9]{4}_8min')

2023-09-20 18:32:13.815644: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-09-20 18:32:15.947391: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Is GPU Avaliable: []
Is GPU Avaliable: []


2023-09-20 18:33:20.063312: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:266] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2023-09-20 18:33:20.063365: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:168] retrieving CUDA diagnostic information for host: 331e9bf49250
2023-09-20 18:33:20.063376: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:175] hostname: 331e9bf49250
2023-09-20 18:33:20.063533: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:199] libcuda reported version is: 525.89.2
2023-09-20 18:33:20.063558: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:203] kernel reported version is: 525.89.2
2023-09-20 18:33:20.063567: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:309] kernel version seems to match DSO: 525.89.2


### BIDMC

In [8]:
bidmc_id = sorted(list(set([regex_bidmc.match(filename.name).group() for filename in os.scandir(DATA_PATH)])))

In [9]:
fs = 125
window_size = fs * 60 # 7500
shift = int(window_size/60)

In [21]:
pleth = []; resp = []
for sid in bidmc_id:
    pleth_csv = pd.read_csv(f'{DATA_PATH}/{sid}_Signals.csv', names=['Time [s]', ' PLETH'])
    resp_csv = pd.read_csv(f'{DATA_PATH}/{sid}_Numerics.csv', names=['Time [s]', ' RESP'])
    samp_pleth = np.expand_dims(np.array([pleth_csv[0+shift*i:window_size+shift*i][' PLETH'].values for i in range(int((len(pleth_csv)-window_size)/shift)+1)]), axis=-1)
    samp_resp = np.array([round(np.mean(resp_csv[0+int(shift/fs)*i:int(window_size/fs)+int(shift/fs)*i][' RESP'])) for i in range(int((len(pleth_csv)-window_size)/shift)+1)]).reshape(-1,1)
    pleth.append(samp_pleth)
    resp.append(samp_resp)

pleth = np.concatenate(pleth, axis=0)
resp = np.concatenate(resp, axis=0)

### Capnobase

## Functions

In [None]:
def interpolation(x, input):
    x0 = int(np.floor(x))
    y0 = input[x0]
    x1 = int(np.ceil(x))
    y1 = input[x1]
    y = (y1-y0)*(x-x0) + y0
    return y


def signal_resample(input_signal, org_fs, new_fs, method='interpolation'):
    output_signal = []
    new_x = np.arange(0, len(input_signal), org_fs/new_fs)
    
    if method == 'interpolation': 
        interp = interpolation

    for x in new_x:
        y = interp(x, input_signal)
        output_signal.append(y)

    return np.asarray(output_signal)


def generate_dataset(arg_pleths, arg_resps, fs=125, shift_factor=4):
    """
    성모병원에서 수집된 데이터의 특성상 이러한 전처리를 진행해주어야 한다.
    """
    import copy
    dataset = []
    window_size = fs * 60 # 7500
    shift = int(window_size/shift_factor)
    samples_len = len(arg_pleths)

    cpy_resps = copy.deepcopy(arg_resps)
    cpy_pleths = copy.deepcopy(arg_pleths)

    for j in range(samples_len):
        rr = cpy_resps[j]; ppg = cpy_pleths[j]

        rr['offset'] = (rr['offset']-rr['offset'].min())/1000
        size_lim = int(fs * np.ceil(rr['offset'].max()))
        ppg = ppg[:size_lim]
        shift_n_times = int((len(ppg)-window_size)/shift)+1

        samp_rr = [len(rr.loc[ (rr['offset']>=0+(int(shift/fs)*i)) & ((rr['offset']<int(window_size/fs)+(int(shift/fs)*i))) ]) for i in range(shift_n_times)]
        samp_ppg = [ppg[0+(shift*i):window_size+(shift*i)] for i in range(shift_n_times)]

        for i in range(len(samp_ppg)):
            temp = []
            temp.append(samp_ppg[i])
            temp.append(samp_rr[i])
            dataset.append(temp)

    return dataset


def preprocessing(targets, numtaps, cutoff, shift_factor, org_fs, new_fs, processes):
    print('Extract PLETH/RESP')
    pleths = [pd.read_csv(f'{DATA_PATH}/{sid}/pleth.csv', header=None, names=['sid', 'offset', 'pleth']).pleth.values for sid in targets.id.unique()]
    resps = [pd.read_csv(f'{DATA_PATH}/{sid}/respirationTimeline.csv', header=None, names=['sid', 'offset']) for sid in targets.id.unique()]

    # Before filtering: Check NaN
    for pleth in pleths:
        if any(np.isnan(pleth)):
            print('check')

    # Before filtering: Convert type as np.float32
    pleths = list(map(lambda pleth: pleth.astype(np.float32), pleths))


    print('Init Preprocessing: Filtering')
    taps = signal.firwin(numtaps=numtaps, cutoff=cutoff, window='hamming', pass_zero=False, fs=org_fs)
    pool = multiprocessing.Pool(processes=processes)
    filtered_pleths = pool.starmap(signal.filtfilt, [(taps, 1.0, pleth) for pleth in pleths])
    pool.close()
    pool.join()


    print('Init Preprocessing: Windowing')
    dataset = generate_dataset(filtered_pleths, resps, shift_factor=shift_factor)


    print('Init Preprocessing: Resampling')
    pool = multiprocessing.Pool(processes=processes)
    result = pool.starmap(signal_resample, [(pleth[0], org_fs, new_fs) for pleth in dataset])
    pool.close()
    pool.join()

    new_patient = []
    for i in range(len(dataset)):
        temp = []
        temp.append(result[i])
        temp.append(dataset[i][1])
        new_patient.append(temp)

    return new_patient


def prepare_modeling(dataset=None, batchsize=None):
    print(f'Prepare modeling')
    pleths = []
    resps = []
    for ppg, rr in dataset:
        pleths.append(ppg.astype(np.float32))
        resps.append(rr)
    pleths = np.asarray(pleths)
    resps = np.asarray(resps)
    print(pleths.shape, resps.shape)

    scaler = MinMaxScaler()
    scaled_pleths = np.asarray([scaler.fit_transform(pleth.reshape(-1,1)) for pleth in pleths])
    print(scaled_pleths.shape, type(scaled_pleths[0][0][0]))

    x, y = scaled_pleths[:], resps[:]

    return tf.data.Dataset.from_tensor_slices((x, y)).batch(batchsize)