In [None]:
import json
import vitaldb
from tqdm.notebook import tqdm

fs = 125 
data_dir = '/home/cam/Documents/database_tools/vitaldb/lines/'

sig_names = ['ART', 'PLETH']
case_ids = vitaldb.find_cases(sig_names)

print(f'Found {len(case_ids)} cases containing {sig_names}.')

n = 0
samples = ''
for case in tqdm(case_ids, total=len(case_ids)):
    data = vitaldb.load_case(case, sig_names, 1 / fs)
    abp, ppg = data[:, 0], data[:, 1]

    if len(ppg) != len(abp):
        print(f'The signal arrays for case {case} are different lengths.')
    else:
        if len(ppg) > 75000:  # 10 min minimum length
            print(f'Appending data from case {case}.')
            samples += json.dumps(dict(case_id=case, ppg=ppg.tolist(), abp=abp.tolist())) + '\n'
            n += 1
        else:
            print(f'Case {case} was less than 10 minutes.')
    if n == 10:
        break

with open(data_dir + 'vitaldb.jsonlines', 'w') as f:
    f.write(samples)

In [None]:
import os
import ast
import json
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
from database_tools.preprocessing import VitalSignalProcessor

repo_dir = '/home/cam/Documents/database_tools/'
os.chdir(repo_dir)

config = dict(
    low=0.5,                    # low bandpass frequency
    high=8.0,                   # high bandpass frequency
    sim=0.6,                    # similarity threshold
    df=0.2,                     # one sided frequency delta for SNR calculation
    snr_t=2.0,                  # SNR threshold
    hr_diff=1/6,                # maximum heart rate difference between ppg, abp
    f0_low=0.667,               # minimum valid HR in Hz
    f0_high=3.0,                # maximum valid HR in Hz
    abp_min_bounds=[20, 130],   # upper and lower threshold for DBP
    abp_max_bounds=[50, 225],   # upper and lower threshold for SBP
    windowsize=1,               # windowsize for rolling mean
    ma_perc=20,                 # multiplier for peak detection
    beat_sim=0.2,               # lower threshold for beat similarity
)

processor = VitalSignalProcessor(
    data_dir='/home/cam/Documents/database_tools/vitaldb/vitaldb.jsonlines',
    win_len=256,
    fs=125,
)

samples = ''
for i, (case, ppg, abp) in enumerate(processor.run(config)):
    samples += json.dumps(dict(case=int(case), ppg=ppg.tolist(), abp=abp.tolist())) + '\n'
processor.save_stats(path='/home/cam/Documents/database_tools/vitaldb/vitaldb_stats.csv')

outfile = '/home/cam/Documents/database_tools/vitaldb/lines/vitaldb_processed.jsonlines'
with open(outfile, 'w') as f:
    f.write(samples)

In [None]:
import pandas as pd

df = pd.read_csv('../vitaldb/vitaldb_stats.csv')

df[df['valid']]

In [None]:
import os
from database_tools.tools import RecordsHandler

repo_dir = '/home/cam/Documents/database_tools/'
os.chdir(repo_dir)

worker = RecordsHandler(data_dir='vitaldb/')
worker.generate_records(split_strategy=(0.05, 0.05, .9), max_samples=1326, scaler='MinMax')