In [None]:
import os
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from database_tools.tools import BuildDatabase
from database_tools.preprocessing.utils import ConfigMapper

data_dir = '/home/cam/Documents/database_tools/data/mimic3-data-20230205/'

In [None]:
config = dict(
    checks=['snr', 'hr', 'beat'],
    fs=125,                                 # sampling frequency
    win_len=256,                            # window length
    freq_band=[0.5, 8.0],                   # bandpass frequencies
    sim=0.6,                                # similarity threshold
    snr=2.0,                                # SNR threshold
    hr_freq_band=[0.667, 3.0],              # valid heartrate frequency band in Hz
    hr_delta=1/6,                           # maximum heart rate difference between ppg, abp
    dbp_bounds=[20, 130],                   # upper and lower threshold for DBP
    sbp_bounds=[50, 225],                   # upper and lower threshold for SBP
    windowsize=1,                           # windowsize for rolling mean
    ma_perc=20,                             # multiplier for peak detection
    beat_sim=0.2,                           # lower threshold for beat similarity
)

cm = ConfigMapper(config=config)

bd = BuildDatabase(
    data_dir=data_dir,
    samples_per_file=2500,
    samples_per_patient=500,
    max_samples=200000,
)

bd.run(cm)

In [None]:
pd.options.display.max_rows = 100

stats = pd.read_csv(data_dir + 'mimic3_stats.csv')

In [None]:
data = pd.read_json(data_dir + 'data/lines/mimic3_000.jsonlines', lines=True)

ppg = np.array(data['ppg'].to_list())
abp = np.array(data['abp'].to_list())

In [None]:
fig = go.FigureWidget()

fig.add_scatter(
    y=ppg[0, :]
)

In [1]:
from database_tools.tools.records import Dataset, generate_records

data_dir = '/home/cam/Documents/database_tools/data/mimic3-data-20230205/data/'
ds = Dataset(data_dir=data_dir)
# data, scalers = generate_records(
#     ds=ds,
#     data_dir=data_dir,
#     scaler_path='/home/cam/Documents/database_tools/data/data-2022-11-08/mimic3/scalers_MinMax.pkl',
# )

INFO:root:Data was extracted from 80 JSONLINES files.
INFO:root:The total number of windows is 200000.


In [3]:
data_dir_old = '/home/cam/Documents/database_tools/data/data-2022-11-08/mimic3/'
ds_old = Dataset(data_dir=data_dir_old)

INFO:root:Data was extracted from 80 JSONLINES files.
INFO:root:The total number of windows is 200000.


In [2]:
import pandas as pd

pd.Series(ds.ppg.flatten()).plot.hist(nbins=50)