In [None]:
import os
from database_tools.tools import BuildDatabase

repo_dir = '/home/cam/Documents/database_tools/'
os.chdir(repo_dir)

config = dict(
    low=0.5,                    # low bandpass frequency
    high=8.0,                   # high bandpass frequency
    sim=0.6,                    # similarity threshold
    df=0.2,                     # one sided frequency delta for SNR calculation
    snr_t=2.0,                  # SNR threshold
    hr_diff=1/6,                # maximum heart rate difference between ppg, abp
    f0_low=0.667,               # minimum valid HR in Hz
    f0_high=3.0,                # maximum valid HR in Hz
    abp_min_bounds=[40, 100],   # upper and lower threshold for DBP
    abp_max_bounds=[70, 190],   # upper and lower threshold for SBP
    pp_min=25,                  # pulse pressure lower threshold
    pp_max=75,                  # pulse pressure upper threshold
    n_peaks=2,                  # minimum number of peaks in a window
    windowsize=2,               # windowsize for rolling mean
    ppg_ma_perc=20,             # multiplier for ppg peak detection
    abp_ma_perc=1,              # multiplier for abp peak detection
    beat_sim=0.3,               # lower threshold for beat similarity
)

worker = BuildDatabase(
    output_dir='data-2022-10-28/',
    config=config,
    win_len=256,
    fs=125,
    samples_per_file=2500,
    samples_per_patient=500,
    max_samples=200000,
    data_dir='physionet.org/files/mimic3wdb/1.0/',
)

worker.run()

In [None]:
import os
import pandas as pd
from database_tools.tools import DataEvaluator

pd.options.display.max_rows = 2500

df = pd.read_csv('../data-2022-10-28/mimic3_stats.csv')
worker = DataEvaluator(stats=df)

figs = worker.run()

In [None]:
import itertools
import numpy as np
import pandas as pd
from heartpy.preprocessing import flip_signal
from heartpy.peakdetection import detect_peaks
from heartpy.datautils import rolling_mean
from database_tools.preprocessing.SignalLevelFiltering import get_similarity

def make_equal_len(x, y):
    len_x = len(x)
    len_y = len(y)
    if len_x > len_y:
        y = np.pad(y, pad_width=[0, len_x - len_y])
    else:
        x = np.pad(x, pad_width=[0, len_y - len_x])
    return x, y

def beat_similarity(x, windowsize, ma_perc, fs=125):
    x_pad = np.pad(x, pad_width=[9, 9])
    rol_mean = rolling_mean(x_pad, windowsize=windowsize, sample_rate=fs)
    peaks = detect_peaks(x_pad, rol_mean, ma_perc=ma_perc, sample_rate=fs)['peaklist']
    peaks = np.array(peaks) - 10
    flip = flip_signal(x_pad)
    rol_mean = rolling_mean(flip, windowsize=windowsize, sample_rate=fs)
    valleys = detect_peaks(flip, rol_mean, ma_perc=ma_perc, sample_rate=fs)['peaklist']
    valleys = np.array(valleys) - 10

    # check no peaks are valleys
    if np.isin(peaks, valleys).any():
        return [-1, -1]

    # check that peaks and valleys are in order
    hist = np.digitize(valleys, peaks)
    if not np.array([hist[i] == hist[i+1] - 1 for i in range(len(hist) - 1)]).all():
        return [-1, -1]

    neg_len = lambda x : len(x) * -1
    if len(peaks) <= len(valleys):
        beats = sorted(np.split(x, valleys), key=neg_len)

        aligned_beats = [beats[0]]

        for i, b in enumerate(beats[1::]):
            b_new = np.pad(b, pad_width=[len(beats[0]) - len(b), 0])
            aligned_beats.append(b_new)
    else:
        beats = sorted(np.split(x, valleys[1::]), key=neg_len)

        aligned_beats = [beats[0]]

        for i, b in enumerate(beats[1::]):
            b_new = np.pad(b, pad_width=[peaks[0] - (peaks[i + 1] - valleys[i]), 0])
            aligned_beats.append(b_new)

    aligned_beats = [b for b in aligned_beats if len(b[b != 0]) > fs / 2]
    idx = [(i, j) for ((i, _), (j, _)) in itertools.combinations(enumerate([i for i in range(len(aligned_beats))]), 2)]

    s = 0
    for i, j in idx:
        x, y = make_equal_len(aligned_beats[i], aligned_beats[j])
        s += get_similarity(x, y)
    try:
        return [s / len(aligned_beats), len(aligned_beats)]
    except ZeroDivisionError:
        return [0, 0]

In [None]:
import os
import json
import numpy as np
import pandas as pd
from tqdm import tqdm
import plotly.graph_objects as go

pd.options.plotting.backend = 'plotly'

repo_dir = '/home/cam/Documents/database_tools/'
os.chdir(repo_dir)

# for ppg (windowsize=2, ma_perc=20)
# for abp (windowsize=2, ma_perc=1)

stats = []
for i in tqdm(range(0, 120), total=120):
    ppg = []
    abp = []
    with open(f'data-2022-10-28/mimic3/lines/mimic3_0000{str(i).zfill(3)}.jsonlines', 'r') as f:
        for sample in f:
            sample = json.loads(sample)
            # ppg.append(sample['ppg'])
            abp.append(sample['abp'])
    # ppg = np.array(ppg)
    abp = np.array(abp)
    stats += [beat_similarity(x, windowsize=2, ma_perc=1) for x in abp]
    if i == 10:
        break

stats = np.array(stats)
sim = stats[:, 0]
n_beats = stats[:, 1]
pd.Series(sim).plot.hist()

In [None]:
pd.Series(n_beats).plot.hist()

In [None]:
list(enumerate(pd.Series(sim).head(2500)))

In [None]:
abp = []
with open(f'data-2022-10-28/mimic3/lines/mimic3_0000000.jsonlines', 'r') as f:
    for sample in f:
        sample = json.loads(sample)
        abp.append(sample['abp'])
abp = np.array(abp)

x = abp[905, :]
x_pad = np.pad(x, pad_width=[9, 9])

fs=125
rol_mean = rolling_mean(x_pad, windowsize=2, sample_rate=fs)
peaks = detect_peaks(x_pad, rol_mean, ma_perc=1, sample_rate=fs)['peaklist']
peaks = np.array(peaks) - 10
flip = flip_signal(x_pad)
rol_mean = rolling_mean(flip, windowsize=2, sample_rate=fs)
valleys = detect_peaks(flip, rol_mean, ma_perc=1, sample_rate=fs)['peaklist']
valleys = np.array(valleys) - 10

fig = go.FigureWidget()
fig.update_layout(template='plotly_dark')
fig.add_scatter(
    y=x,
)
fig.add_scatter(
    x=peaks,
    y=x[peaks],
    mode='markers',
)
fig.add_scatter(
    x=valleys,
    y=x[valleys],
    mode='markers',
)
fig

In [None]:
len(sim[sim > 0.3])

In [None]:
# fig = go.FigureWidget()
# fig.update_layout(
#     template='plotly_dark',
# )
# for j in aligned_beats:
#     fig.add_scatter(
#         y=j,
#     )
# fig