In [1]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from database_tools.tools import BuildDatabase
from database_tools.preprocessing.utils import build_data_directory, ConfigMapper

repo_dir = '/home/cam/Documents/database_tools/'
data_dir = build_data_directory(repo_dir + 'data/', 'mimic3')

In [3]:
if -2:
    print(1)

In [2]:
config = dict(
    checks=['snr', 'hr', 'beat', 'notch'],
    fs=125,                                 # sampling frequency
    win_len=256,                            # window length
    freq_band=[0.5, 8.0],                   # bandpass frequencies
    sim=0.6,                                # similarity threshold
    snr=2.0,                                # SNR threshold
    hr_freq_band=[0.667, 3.0],              # valid heartrate frequency band in Hz
    hr_delta=1/6,                           # maximum heart rate difference between ppg, abp
    dbp_bounds=[20, 130],                   # upper and lower threshold for DBP
    sbp_bounds=[50, 225],                   # upper and lower threshold for SBP
    flat_line_length=10,                    # max length of flat lines
    windowsize=1,                           # windowsize for rolling mean
    ma_perc=20,                             # multiplier for peak detection
    beat_sim=0.2,                           # lower threshold for beat similarity
    min_notches=1,                          # minimum number of dichrotic notches in a window
)

cm = ConfigMapper(config=config)

bd = BuildDatabase(
    data_dir=data_dir,
    samples_per_file=10,
    samples_per_patient=500,
    max_samples=10,
)

bd.run(cm)

Gettings valid segments...
Collecting samples...
Rejected samples: 0 --- Samples collected: 0


  0%|          | 0/3627 [00:00<?, ?it/s]

Rejected samples: 3623 --- Samples collected: 4


  0%|          | 0/350 [00:00<?, ?it/s]

IndexError: list index out of range

In [None]:
pd.options.display.max_rows = 100

stats = pd.read_csv(data_dir + 'mimic3_stats.csv')

In [None]:
stats

In [None]:
data = pd.read_json(data_dir + 'data/lines/mimic3_000.jsonlines', lines=True)

ppg = np.array(data['ppg'].to_list())
abp = np.array(data['abp'].to_list())

fig = go.FigureWidget()

In [None]:
fig.add_scatter(y=ppg[0, :])

In [None]:
fig.data[0].y = ppg[12, :]

In [33]:
import numpy as np
from database_tools.tools.records import Dataset, get_split_idx, split_data

dt = 20230220
data_dir = f'/home/cam/Documents/database_tools/data/mimic3-data-{dt}/data/'

ds = Dataset(data_dir=data_dir)
idx = get_split_idx(n=ds.ppg.shape[0], split_strategy=(0.7, 0.15, 0.15))
data_unscaled = split_data(ds, idx)

data_scaled = {'ppg': {}, 'vpg': {}, 'apg': {}, 'abp': {}}
train_scaler = {}
test_scaler = {}

for key in ['ppg', 'vpg', 'apg', 'abp']:
    min_ = np.min(data_unscaled[key]['train'])
    max_ = np.max(data_unscaled[key]['train'])

    train_scaler[key] = [min_, max_]

    for split in ['train', 'val']:
        tmp = data_unscaled[key][split]
        tmp_scaled = np.divide(tmp - min_, max_ - min_)
        data_scaled[key][split] = tmp_scaled

for key in ['ppg', 'vpg', 'apg', 'abp']:
    min_ = np.min(data_unscaled[key]['test'])
    max_ = np.max(data_unscaled[key]['test'])

    test_scaler[key] = [min_, max_]

    for split in ['test']:
        tmp = data_unscaled[key][split]
        tmp_scaled = np.divide(tmp - min_, max_ - min_)
        data_scaled[key][split] = tmp_scaled

INFO:root:Data was extracted from 80 JSONLINES files.
INFO:root:The total number of windows is 200000.


In [35]:
import plotly.graph_objects as go

fig = go.FigureWidget()
fig.add_scatter(y=data_scaled['ppg']['train'][0, :])

FigureWidget({
    'data': [{'type': 'scatter',
              'uid': 'b61ea74e-78b1-484f-b443-73c82dba30c9',
              'y': array([0.32469035, 0.32213725, 0.32080178, ..., 0.65249943, 0.64635662,
                          0.64006747])}],
    'layout': {'template': '...'}
})