# Imports and Setup

In [None]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from database_tools.tools import BuildDatabase, DataEvaluator, records
from database_tools.filtering.utils import ConfigMapper, build_data_directory

repo_dir = '/home/cam/Documents/database_tools/'
data_dir = build_data_directory(repo_dir + 'data/', 'mimic3', date='20230405')

# Build Database

In [None]:
config = dict(
    checks=['snr', 'hr', 'beat', 'notch'],
    fs=125,                                 # sampling frequency
    win_len=256,                            # window length
    freq_band=[0.5, 8.0],                   # bandpass frequencies
    sim=0.6,                                # similarity threshold
    snr=2.0,                                # SNR threshold
    hr_freq_band=[0.667, 3.0],              # valid heartrate frequency band in Hz
    hr_delta=1/6,                           # maximum heart rate difference between ppg, abp
    dbp_bounds=[20, 130],                   # upper and lower threshold for DBP
    sbp_bounds=[50, 225],                   # upper and lower threshold for SBP
    flat_line_length=10,                    # max length of flat lines
    windowsize=1,                           # windowsize for rolling mean
    ma_perc=20,                             # multiplier for peak detection
    beat_sim=0.2,                           # lower threshold for beat similarity
    min_notches=1,                          # minimum number of dichrotic notches in a window
)
cm = ConfigMapper(config=config)

# TODO: Test a 50k sample on more stringent parameters

bd = BuildDatabase(
    data_dir=data_dir,
    samples_per_file=2500,
    samples_per_patient=500,
    max_samples=50000,
)
bd.run(cm)

# Generate TFRecords

In [None]:
ds = records.Dataset(data_dir)
data_unscaled, data_scaled, scaler_dict = records.generate_records(
    ds,
    data_dir=data_dir,
    split_strategy=(0.7, 0.15, 0.15),
    samples_per_file=10000,
    scaler_path=None,
)

In [None]:
ppg_train = data_scaled['ppg']['train']
abp_train = data_scaled['abp']['train']

In [None]:
fig = go.FigureWidget()
fig.add_scatter(y=ppg_train[10, :])

In [None]:
ppg_test = data_scaled['ppg']['test']
abp_test = data_scaled['abp']['test']

In [None]:
fig = go.FigureWidget()
fig.add_scatter(y=ppg_test[10, :])

# Evaluate Dataset

In [None]:
pd.options.display.max_rows = 200

stats = pd.read_csv(data_dir + 'mimic3_stats.csv')
de = DataEvaluator(stats[stats['valid']])
plots = de.run()
plots.keys()

In [None]:
i = 0
data = pd.read_json(data_dir + f'data/lines/mimic3_{str(i).zfill(3)}.jsonlines', lines=True)

ppg = np.array(data['ppg'].to_list())
abp = np.array(data['abp'].to_list())

In [None]:
fig = go.FigureWidget()
fig.add_scatter(y=abp[1001, :])