In [3]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from database_tools.tools import BuildDatabase
from database_tools.preprocessing.utils import build_data_directory, ConfigMapper

repo_dir = '/home/cam/Documents/database_tools/'
data_dir = build_data_directory(repo_dir + 'data/', 'mimic3')

In [None]:
config = dict(
    checks=['snr', 'hr', 'beat', 'notch'],
    fs=125,                                 # sampling frequency
    win_len=256,                            # window length
    freq_band=[0.5, 8.0],                   # bandpass frequencies
    sim=0.6,                                # similarity threshold
    snr=2.0,                                # SNR threshold
    hr_freq_band=[0.667, 3.0],              # valid heartrate frequency band in Hz
    hr_delta=1/6,                           # maximum heart rate difference between ppg, abp
    dbp_bounds=[20, 130],                   # upper and lower threshold for DBP
    sbp_bounds=[50, 225],                   # upper and lower threshold for SBP
    flat_line_length=10,                    # max length of flat lines
    windowsize=1,                           # windowsize for rolling mean
    ma_perc=20,                             # multiplier for peak detection
    beat_sim=0.2,                           # lower threshold for beat similarity
    min_notches=1,                          # minimum number of dichrotic notches in a window
)

cm = ConfigMapper(config=config)

bd = BuildDatabase(
    data_dir=data_dir,
    samples_per_file=10,
    samples_per_patient=500,
    max_samples=10,
)

bd.run(cm)

In [14]:
pd.options.display.max_rows = 200

stats = pd.read_csv(data_dir + 'mimic3_stats.csv')
stats

Unnamed: 0,mrn,valid,time_sim,spec_sim,ppg_snr,abp_snr,ppg_hr,abp_hr,dbp,sbp,ppg_beat_sim,abp_beat_sim,flat_ppg,flat_abp,ppg_notches,abp_notches
0,3162326,False,,,3.118229,-10.0,87.890625,0.0,0.0,0.0,0.482569,-1.0,True,False,True,False
1,3162326,False,,,1.44204,-10.0,80.566406,0.0,0.0,0.0,0.224767,-1.0,True,False,True,False
2,3162326,False,,,1.177901,-10.0,76.904297,0.0,0.0,0.0,0.190706,-1.0,True,False,False,False
3,3162326,False,,,-2.627558,-10.0,76.904297,0.0,0.0,0.0,-1.0,-1.0,True,False,False,False
4,3162326,False,,,-2.758812,-10.0,80.566406,0.0,0.0,0.0,0.017122,-1.0,True,False,True,False
5,3162326,False,,,1.268085,-10.0,84.228516,0.0,0.0,0.0,0.453832,-1.0,True,False,True,False
6,3162326,False,,,2.40564,-10.0,32.958984,0.0,0.0,0.0,0.177647,-1.0,True,False,True,False
7,3162326,False,,,3.041237,-10.0,84.228516,0.0,0.0,0.0,0.872012,-1.0,True,False,True,False
8,3162326,False,,,3.013368,-10.0,87.890625,0.0,0.0,0.0,0.476512,-1.0,True,False,True,False
9,3162326,False,,,2.541124,-10.0,87.890625,0.0,0.0,0.0,0.68607,-1.0,True,False,True,False


In [9]:
data = pd.read_json(data_dir + 'data/lines/mimic3_000.jsonlines', lines=True)

ppg = np.array(data['ppg'].to_list())
abp = np.array(data['abp'].to_list())

In [13]:
fig = go.FigureWidget()
fig.add_scatter(y=ppg[0, :])

FigureWidget({
    'data': [{'type': 'scatter',
              'uid': '9bc18b5a-cfe9-4e27-aa19-beffa768aaf4',
              'y': array([-0.09194944, -0.10085089, -0.10923564, ...,  0.35062884,  0.32292084,
                           0.29242709])}],
    'layout': {'template': '...'}
})

In [None]:
import numpy as np
from database_tools.tools.records import Dataset, get_split_idx, split_data

dt = 20230220
data_dir = f'/home/cam/Documents/database_tools/data/mimic3-data-{dt}/data/'

ds = Dataset(data_dir=data_dir)
idx = get_split_idx(n=ds.ppg.shape[0], split_strategy=(0.7, 0.15, 0.15))
data_unscaled = split_data(ds, idx)

data_scaled = {'ppg': {}, 'vpg': {}, 'apg': {}, 'abp': {}}
train_scaler = {}
test_scaler = {}

for key in ['ppg', 'vpg', 'apg', 'abp']:
    min_ = np.min(data_unscaled[key]['train'])
    max_ = np.max(data_unscaled[key]['train'])

    train_scaler[key] = [min_, max_]

    for split in ['train', 'val']:
        tmp = data_unscaled[key][split]
        tmp_scaled = np.divide(tmp - min_, max_ - min_)
        data_scaled[key][split] = tmp_scaled

for key in ['ppg', 'vpg', 'apg', 'abp']:
    min_ = np.min(data_unscaled[key]['test'])
    max_ = np.max(data_unscaled[key]['test'])

    test_scaler[key] = [min_, max_]

    for split in ['test']:
        tmp = data_unscaled[key][split]
        tmp_scaled = np.divide(tmp - min_, max_ - min_)
        data_scaled[key][split] = tmp_scaled

In [None]:
import plotly.graph_objects as go

fig = go.FigureWidget()
fig.add_scatter(y=data_scaled['ppg']['train'][0, :])