In [None]:
# output -> RECORDS_<NUM>_1
!python3 valid_signals.py

In [None]:
# output -> RECORDS_<NUM>_2
!python3 valid_length.py

In [None]:
# output -> RECORDS_<NUM>_3
!python3 valid_signals_length.py

In [None]:
# - fix any datatype inconsistencies
# - StandardScaler() (remove mean and scale to unit variance)
# - 4th order butterworth zero-phase filter (0.5 Hz - 8 Hz bandpass)
# ? 4th order, 20 dB cheby filter
# - median filter
# - segment into 5s samples
#
# - for each sample
#
#       find peaks
#       valid if (> 2 peaks and valleys) & (< 15 peaks and valleys)
#       valid if abs(n_peaks - n_valleys) < 2
#       
#       find anomalies

In [14]:
import numpy as np
from wfdb import rdrecord
from os.path import exists
from sklearn.preprocessing import StandardScaler
from scipy.signal import butter, cheby2, sosfiltfilt, medfilt
from heartpy.peakdetection import make_windows
from neurokit2.ppg import ppg_findpeaks
from heartpy.preprocessing import flip_signal


class Preprocessor:
    
    # path in the form -> physionet.org/files/mimic3wdb/1.0/30/3000063/3000063_0007
    def __init__(self, path, fs=125, seg_len=5):
        self.path = path
        self.fs = fs
        self.seg_len = seg_len
        
        rec, pleth, abp = self.load()
        self.record = rec
        self.pleth = pleth
        self.abp = abp
        
        # scaler & filters
        self.scaler = StandardScaler()
        self.btr = butter(4, [0.5, 8.0], btype='bandpass', output='sos', fs=self.fs)
        self.cby = cheby2(4, 20, [0.5, 8.0], btype='bandpass', output='sos', fs=self.fs)
        
        self.windows = None

    def load(self):
        if not exists(self.path + '.hea'):
            Utils.runcmd('wget -r -np https://' + self.path + '.hea')

        if not exists(self.path + '.dat'):
            Utils.runcmd('wget -r -np https://' + self.path + '.dat')
        
        rec = rdrecord(self.path)
        signals = rec.sig_name
        pleth = rec.p_signal[:, signals.index('PLETH')].astype(np.float64)
        abp = rec.p_signal[:, signals.index('ABP')].astype(np.float64)
        return rec, pleth, abp
    
    def window(self, sig):
        if sig == 'pleth':
            X = self.pleth_filtered
        elif sig == 'abp':
            X = self.abp_filtered
        idx = make_windows(X, sample_rate=self.fs, windowsize=self.seg_len, min_size=(self.seg_len * self.fs))
        self.windows = np.array([np.array(X[i:j]) for i, j in idx])
        return self.windows
    
    def filter_pleth(self):
        X = self.pleth
        X[np.argwhere(np.isnan(X))] = 0  # Set nan to 0
        X = self.scaler.fit_transform(X.reshape(-1, 1)).reshape(-1)
        X = sosfiltfilt(self.btr, X, padtype=None)
        X = sosfiltfilt(self.cby, X, padtype=None)
        X = medfilt(X, kernel_size=3)
        self.pleth_filtered = X
        return self.pleth_filtered
    
    def filter_pleth_windows(self):
        valid = []
        for X in self.windows:
            X_flip = flip_signal(X)
            
            peaks = ppg_findpeaks(X, sampling_rate=self.fs)['PPG_Peaks']
            n_peaks = len(peaks)
            
            valleys = ppg_findpeaks(X_flip, sampling_rate=self.fs)['PPG_Peaks']
            n_valleys = len(valleys)
            
            if n_peaks > 0:
                max_peaks = np.max(X[peaks])
                if (np.abs(max_peaks) > 2):
                    continue

            if n_valleys > 0:
                min_valleys = np.min(X_flip[valleys])
                if (np.abs(min_valleys) > 2):
                    continue

            # if extrapolated heart rate is <24 | >180
            if ((n_peaks < 2) | (n_valleys < 2) | (n_peaks > 15) | (n_valleys > 15)):
                continue

            # if 3 or more peaks than valleys or vice versa 
            if (np.abs(n_peaks - n_valleys) > 2):
                continue

            valid.append(X)
        return np.array(valid)
    
    def filter_abp(self):
        return

    def filter_abp_windows(self):
        return


import subprocess


class Utils:
    
    def runcmd(cmd):
        """
        Runs the provided terminal command.
        """
        process = subprocess.Popen(cmd,
                                   stdout=subprocess.PIPE,
                                   stderr=subprocess.PIPE,
                                   text=True,
                                   shell=True)

In [15]:
import numpy as np
from bokeh.plotting import output_notebook, figure, show

output_notebook()

base_dir = 'physionet.org/files/mimic3wdb/1.0/'
path = base_dir + '30/3000063/3000063_0007'

ps = Preprocessor(path, fs=125, seg_len=5)

pleth_f = ps.filter_pleth()
windows = ps.window()
windows.shape

(611, 625)

In [16]:
valid = ps.remove_bad_windows()
valid.shape

(513, 625)

In [None]:
import numpy as np
import pandas as pd
from utils import hampel

anomalies = np.array(hampel(pd.Series(valid[0, :]), window_size=5, n=3, imputation=False))
anomalies

In [18]:
x = valid.flatten()

# x_flip = flip_signal(x)

# peaks = ppg_findpeaks(x, sampling_rate=125)['PPG_Peaks']
# n_peaks = len(peaks)

# valleys = ppg_findpeaks(x_flip, sampling_rate=125)['PPG_Peaks']
# n_valleys = len(valleys)

fig = figure(title='5s segment')
fig.line(np.linspace(0, len(x) - 1, len(x)), x)
# fig.circle(peaks, x[peaks], radius=10, color='red')
# fig.circle(valleys, x[valleys], radius=10, color='green')
show(fig)

In [None]:
# import numpy as np
# import pandas as pd
# import matplotlib.pyplot as plt
# from utils import runcmd, hampel
# from wfdb import rdrecord
# from os.path import exists
# from sklearn.preprocessing import StandardScaler
# from scipy.signal import butter, sosfiltfilt, medfilt, cheby2
# from neurokit2.ppg import ppg_findpeaks
# from bokeh.plotting import output_notebook, figure, show
# from heartpy.peakdetection import make_windows
# from heartpy.preprocessing import flip_signal

# peaks = ppg_findpeaks(pleth, sampling_rate=125)['PPG_Peaks']
# peak_max = np.abs(np.max(pleth[peaks]))
# peak_min = np.abs(np.min(pleth[peaks]))
# print(peak_min, peak_max)

# valleys = ppg_findpeaks(flip_signal(pleth), sampling_rate=125)['PPG_Peaks']
# valley_max = np.abs(np.max(pleth[valleys]))
# valley_min = np.abs(np.min(pleth[valleys]))
# print(valley_min, valley_max)

# anom = []
# for idx in peaks:
#     x = pleth[idx]
#     if (x < (peak_min * 2)) | (x > (peak_max / 2)):
#         anom.append(idx)
# anom = np.array(anom)
# anom

In [None]:
idx = make_windows(pleth, sample_rate=125, windowsize=5, min_size=625)
idx.shape

In [None]:
windows = np.array([np.array(pleth[i:j]) for i, j in idx])
windows.shape

In [None]:
valid_segs = []
for i, seg in enumerate(windows[::]):
    peaks = np.abs(seg[ppg_findpeaks(seg, sampling_rate=125)['PPG_Peaks']])
    peak_avg = np.abs(np.average(peaks))
    
    valleys = np.abs(seg[ppg_findpeaks(flip_signal(seg), sampling_rate=125)['PPG_Peaks']])
    valley_avg = np.abs(np.average(valleys))
    
    lb, ub = (i * 625), ((i * 625) + 625)
    if np.logical_and(anom > lb, anom < ub).any():
        continue
    elif (peaks < (peak_avg / 2)).any() | (peaks > (peak_avg * 2)).any():
        continue
    elif (peaks < (peak_min * 2)).any() | (peaks > (peak_max / 2)).any():
        continue
    elif (valleys < (valley_avg / 2)).any() | (valleys > (valley_avg * 2)).any():
        continue
    # elif (valleys > (valley_min * 2)).any() | (valleys > (valley_max / 2)).any():
    #     continue
    else:
        valid_segs.append(seg)
valid_segs = np.array(valid_segs)
valid_segs.shape

In [None]:
for i, seg in enumerate(valid_segs):
    if np.min(seg) < -4:
        print(i)

In [None]:
x = valid_segs[504].flatten()
peaks = ppg_findpeaks(x, sampling_rate=125)['PPG_Peaks']
valleys = ppg_findpeaks(flip_signal(x), sampling_rate=125)['PPG_Peaks']

fig = figure(title='5s segment')
fig.line(np.linspace(0, len(x) - 1, len(x)), x)
fig.circle(peaks, x[peaks], radius=10, color='red')
fig.circle(valleys, x[valleys], radius=10, color='green')
show(fig)

In [None]:
anomalies = np.array(hampel(pd.Series(windows[0, :]), window_size=100, n=5, imputation=False))
anomalies

In [None]:
peaks = ppg_findpeaks(windows[0, :], sampling_rate=125)['PPG_Peaks']
peaks

In [None]:
fig = figure(title='5s segment')
fig.line(np.linspace(0, len(windows[0, :]) - 1, len(windows[0, :])), windows[0, :])
fig.circle(peaks, windows[0, :][peaks], radius=10, color='red')
show(fig)

In [None]:
# identify flatline segments
buffer = 0
remove_idx = []
i, head, tail = 0, None, None
while not (i + 2) >= (len(pleth) - 2):
    if (not (pleth[i] > (pleth[i + 1] + buffer)) | (pleth[i] < pleth[i + 1] - buffer)) & \
       (not (pleth[i + 1] > (pleth[i + 2] + buffer)) | (pleth[i + 1] < pleth[i + 2] - buffer)) & \
       (not (pleth[i + 2] > (pleth[i + 3] + buffer)) | (pleth[i + 2] < pleth[i + 3] - buffer)):
        if head == None:
            head = i
    else:
        if head != None:
            tail = i + 1
            remove_idx.append([head, tail])
        head = None
    i += 2
if len(remove_idx) > 0:
    print(remove_idx)

In [None]:
segs = []
prev = 0
tol = 1000
for idx in anomalies:
    if prev == 0:
        print(idx)
        segs.append(pleth[prev:idx - tol])
        prev = idx
    elif prev == idx - 1:
        prev = idx
    elif prev + tol > idx:
        prev = idx
    else:
        print(idx)
        segs.append(pleth[prev + tol:idx - tol])
        prev = idx
segs.append(pleth[prev + tol::])

In [None]:
pleth_segs = []
for arr in segs:
    for value in arr:
        pleth_segs.append(value)

In [None]:
# fig = figure(title='meep morp')
# fig.line(np.linspace(0, len(pleth) - 1, len(pleth)), pleth)
# fig.circle(peaks, pleth[peaks], radius=10, color='red')
# show(fig)

# anomalies = np.array(hampel(pd.Series(pleth), window_size=100, n=5, imputation=False))