In [None]:
import os
from database_tools.tools import BuildDatabase

repo_dir = '/media/cam/CAPSTONEDB/database_tools/'
os.chdir(repo_dir)

config = dict(
    low=0.5,
    high=8.0,
    sim1=0.7,
    sim2=0.9,
    df=0.2,
    snr_t=2,
    hr_diff=1/6,
    f0_low=0.667,
    f0_high=3.0,
    abp_min_bounds=[40, 100],  # this is largest possible range of valid bp values
    abp_max_bounds=[70, 190],  # ^
)

worker = BuildDatabase(
    output_dir='data-2022-09-29/',
    config=config,
    win_len=256,
    fs=125,
    samples_per_file=2500,
    samples_per_patient=600,
    max_samples=300000,
    data_dir='physionet.org/files/mimic3wdb/1.0/',
)

df = worker.run()

In [None]:
import os
import pandas as pd
from database_tools.tools import DataEvaluator

pd.options.display.max_rows = 1000

repo_dir = '/media/cam/CAPSTONEDB/database_tools/'
os.chdir(repo_dir)

df = pd.read_csv('data-2022-09-25/mimic3_stats.csv')
worker = DataEvaluator(stats=df)

figs = worker.run()

In [None]:
df.head(1000)

In [None]:
import numpy as np
import pandas as pd
from scipy import signal

pd.options.plotting.backend = 'plotly'

b, a = signal.cheby2(
    N=4,
    rs=20,
    Wn=[0.5, 8.0],
    btype='bandpass',
    analog=True,
)
w, h = signal.freqs(b, a)
fig = pd.DataFrame({'Amplitude (dB)': 20*np.log10(np.abs(h)), 'Frequency (rads / s)': w}).plot.line(y='Amplitude (dB)', x='Frequency (rads / s)', log_x=True)
fig.add_annotation(
    x=np.log10(0.5),
    xanchor='right',
    y=-18,
    yanchor='bottom',
    text='Lower cuttoff frequency (0.5 Hz)',
    font=dict(
        family="sans serif",
        size=18,
        color="crimson"
    ),
    arrowcolor='red',
    arrowside='end',
    arrowhead=3,
    arrowsize=3,
    arrowwidth=1.2,
    ax=-20,
)
fig.add_annotation(
    x=np.log10(8.1),
    xanchor='left',
    y=-20,
    yanchor='bottom',
    text='Upper cuttoff frequency (8.0 Hz)',
    font=dict(
        family="sans serif",
        size=18,
        color="crimson"
    ),
    arrowcolor='red',
    arrowside='end',
    arrowhead=3,
    arrowsize=3,
    arrowwidth=1.2,
    ay=-30,
    ax=30,
)

In [None]:
import numpy as np

low=0.5
high=8.0
sim1=0.7
sim2=0.9
snr_t=22.5
hr_diff=1/6
f0_low=0.667
f0_high=3.0
abp_min_bounds=[40, 100]
abp_max_bounds=[70, 190]

time_sim = 0.917812
spec_sim = 0.982195	
snr_p = 31.567170
snr_a = 18.929158
f0_p = 124.640565 / 60
f0_a = 124.300533 / 60
f0 = np.array([f0_p, f0_a])
min_ = 72.65625
max_ = 140.62500

valid = False
if np.nan in [time_sim, spec_sim, snr_p, snr_a, f0_p, f0_a, min_, max_]:
    pass
elif (time_sim < sim1) | (spec_sim < sim1):
    pass
elif ( (snr_p < snr_t) | (snr_a < snr_t) ) & ( (time_sim < sim2) | (spec_sim < sim2) ):
    pass
elif ( np.abs(f0_p - f0_a) > hr_diff ) & ( (time_sim < sim2) | (spec_sim < sim2) ):
    pass
elif (f0 < f0_low).any() | (f0 > f0_high).any():
    pass
elif (min_ < abp_min_bounds[0]) | (max_ > abp_max_bounds[1]):
    pass
elif (max_ < abp_max_bounds[0]) | (max_ > abp_max_bounds[1]):
    pass
else:
    valid = True
valid

In [None]:
import glob
import json
import numpy as np
import pandas as pd

ppg = []
abp = []

with open('../data-2022-09-29/mimic3/mimic3_0000000.jsonlines', 'r') as f:
    for sample in f:
        sample = json.loads(sample)
        ppg.append(sample['ppg'])
        abp.append(sample['abp'])


ppg = np.array(ppg).flatten()
abp = np.array(abp).flatten()

In [None]:
from scipy import signal, integrate

def get_snr(x, low=0.5, high=8.0, df=0.2, fs=125):
    """
    Calculate the Signal-to-noise ratio (SNR) of the cardiac signal.
    Density of spectrum between low and high frequencies is considered
    signal power. Density of spectrum outside low to high frequency
    band is considered signal noise. F0 is estimated to be the frequency
    at which the power spectrum is at its maximum.

    Args:
        x (np.ndarray): Cardiac signal data.
        low (float, optional): Lower frequency in Hz. Defaults to 0.5.
        high (float, optional): Upper frequency in Hz. Defaults to 8.0.
        fs (int, optional): Sampling rate of signal. Defaults to 125.

    Returns:
        snr (float): SNR of signal in dB.
        f0 (float): Fundamental frequency of signal in Hz.
    """
    # Estimate spectral power density
    freqs, psd = signal.periodogram(x, fs, nfft=2048)
    f0 = freqs[np.argmax(psd)]
    freq_res = freqs[1] - freqs[0]

    # Signal power
    idx_sig_fund = np.logical_and(freqs >= f0 - df, freqs <= f0 + df)
    idx_sig_harm1 = np.logical_and(freqs >= (2 * f0) - df, freqs <= (2 * f0) + df)
    idx_sig_harm2 = np.logical_and(freqs >= (3 * f0) - df, freqs <= (3 * f0) + df)

    if (idx_sig_fund == False).all():
        p_sig_fund = 0
    else:
        p_sig_fund = integrate.simps(psd[idx_sig_fund], dx=freq_res)


    if (idx_sig_harm1 != False).all():
        p_sig_harm1 = 0
    else:
        p_sig_harm1 = integrate.simps(psd[idx_sig_harm1], dx=freq_res)


    if (idx_sig_harm2 != False).all():
        p_sig_harm2 = 0
    else:
        p_sig_harm2 = integrate.simps(psd[idx_sig_harm2], dx=freq_res)

    p_sig = p_sig_fund + p_sig_harm1 + p_sig_harm2

    # Noise power
    idx_cardiac = np.logical_and(freqs >= low, freqs <= high)

    if (idx_cardiac != False).all():
        p_cardiac = 0
    else:
        p_cardiac = integrate.simps(psd[idx_cardiac], dx=freq_res)

    p_noise = p_cardiac - p_sig

    # Try, except to prevent divide by 0 error
    with np.errstate(divide='ignore', invalid='ignore'):
        try:
            # Find SNR and convert to dB
            snr = 10 * np.log10(p_sig / p_noise)
        except (ZeroDivisionError, RuntimeWarning):
            snr = -10
    return snr, f0

snr = []
for i in range(2500):
    y = ppg[i * 256: (i + 1) * 256]
    snr.append(get_snr(y))

In [None]:
import plotly.graph_objects as go
from sklearn.preprocessing import MinMaxScaler

fig = go.FigureWidget()
fig.add_scatter()
fig.update_layout(
    xaxis_title='Frequency',
    yaxis_title='Amplitude',
    width=1000,
    height=800,
)

i = 190
fig.data[0].y = ppg[i * 256:(i + 1) * 256]
fig

In [None]:
len(snr)

In [None]:
snr = np.array(snr)
len(snr[snr > 2])

In [None]:
snr

In [None]:
pd.Series(y).plot()

In [None]:
pd.DataFrame(dict(freqs=freqs, psd=psd)).plot(x='freqs', y='psd')

In [None]:
# for f in $files; do mongoimport --db MIMICDB --file $f; done

# from pymongo import MongoClient

# uri = 'mongodb://127.0.0.1:27017/?directConnection=true&serverSelectionTimeoutMS=2000&appName=mongosh+1.6.0'
# client = MongoClient(uri)

# coll = client.MIMICDB.mimic3_0000067

# cursor = coll.find()
# for doc in cursor:
#     print(doc)