# Imports and Setup

In [None]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from database_tools.tools.dataset import DatasetFactory
from database_tools.tools import DataEvaluator, records
from database_tools.processing.utils import build_data_directory

repo_dir = '/home/cam/Documents/database_tools/'
data_dir = build_data_directory(repo_dir + 'data/', 'mimic3', date='20230407')

# Build Database

In [None]:
bd = DatasetFactory(
    data_dir=data_dir,
)
bd.run()

# Generate TFRecords

In [None]:
ds = records.Dataset(data_dir)
data_unscaled, data_scaled, scaler_dict = records.generate_records(
    ds,
    data_dir=data_dir,
    split_strategy=(0.7, 0.15, 0.15),
    samples_per_file=10000,
    scaler_path=None,
)

In [None]:
ppg_train = data_scaled['ppg']['train']
abp_train = data_scaled['abp']['train']

In [None]:
fig = go.FigureWidget()

i = 570
fig.add_scatter(y=ppg_train[i, :]*101)
fig.add_scatter(y=abp_train[i, :], name='abp')

In [None]:
ppg_test = data_scaled['ppg']['test']
abp_test = data_scaled['abp']['test']

In [None]:
fig = go.FigureWidget()
fig.add_scatter(y=ppg_test[10, :]*100)

# Evaluate Dataset

In [None]:
pd.options.display.max_rows = 200

stats = pd.read_csv(data_dir + 'mimic3_stats.csv')
de = DataEvaluator(stats[stats['valid']])
plots = de.run()
plots.keys()

In [None]:
i = 0
data = pd.read_json(data_dir + f'data/lines/mimic3_{str(i).zfill(3)}.jsonlines', lines=True)

ppg = np.array(data['ppg'].to_list())
abp = np.array(data['abp'].to_list())

In [None]:
fig = go.FigureWidget()
fig.add_scatter(y=abp[10, :])