# Imports and Setup

In [None]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from database_tools.tools.dataset import DatasetFactory
from database_tools.tools import DataEvaluator, records
from database_tools.processing.utils import build_data_directory

repo_dir = '/home/cam/Documents/database_tools/'
data_dir = build_data_directory(repo_dir + 'data/', 'mimic3', date='20230408')

# Build Database

In [None]:
bd = DatasetFactory(
    data_dir=data_dir,
)
bd.run()

# Evaluate Dataset

In [None]:
pd.options.display.max_rows = 200

stats = pd.read_csv(data_dir + 'mimic3_stats.csv')
de = DataEvaluator(stats[stats['valid']])
plots = de.run()
plots.keys()

In [None]:
i = 0
data = pd.read_json(data_dir + f'data/lines/mimic3_{str(i).zfill(3)}.jsonlines', lines=True)

ppg = np.array(data['ppg'].to_list())
abp = np.array(data['abp'].to_list())

In [None]:
fig = make_subplots(rows=2, cols=1)

j = 0
fig.add_scatter(y=ppg[j, :], row=1, col=1)
fig.add_scatter(y=abp[j, :], row=2, col=1)
fig.update_layout(width=700)

In [None]:
from database_tools.tools.dataset import Window, ConfigMapper
from database_tools.processing.detect import detect_notches, detect_peaks

x = ppg[j, :]

cm = ConfigMapper(data_dir + 'config.ini')
win = Window(x, cm, checks=cm.data.checks)
win.get_peaks()
win.valid

In [None]:
import glob
from alive_progress import alive_bar

min_ = []
max_ = []
with alive_bar(total=200000, force_tty=True) as bar:
    for path in glob.glob(data_dir + 'data/lines/mimic3_*.jsonlines'):
        data = pd.read_json(path, lines=True)
        ppg = np.array(data['ppg'].to_list())
        for x in ppg:
            min_.append(np.min(x))
            max_.append(np.max(x))
            bar()

# Generate TFRecords

In [None]:
ds = records.Dataset(data_dir)
data_unscaled, data_scaled, scaler_dict = records.generate_records(
    ds,
    data_dir=data_dir,
    split_strategy=(0.7, 0.15, 0.15),
    samples_per_file=10000,
    scaler_path=None,
)