In [None]:
from tqdm.notebook import tqdm
from database_tools.io.records import generate_record_paths, get_header_record, header_has_signals
from typing import List

def locate_valid_records(signals: List[str], min_length: int, n_records: int) -> List[str]:
    """Locate valid data records. Exclusion is performed based on a list of signals
       the records must contain and a minimum length of the signals.

    Args:
        signals (List[str]): _description_
        min_length (int): _description_
        n_records (int): _description_

    Returns:
        List[str]: _description_
    """
    valid_segs = []
    for path in tqdm(generate_record_paths(name='adults')):

        # get patient layout header
        layout = get_header_record(path=path, record_type='layout')
        if layout is None: continue  # fx returns None if file DNE

        # check if header has provided signals
        if not header_has_signals(layout, signals): continue

        # get patient master header
        master = get_header_record(path=path, record_type='data')
        if master is None: continue

        segments = []  # will hold valid segments for current patient

        # zip segment names and lengths
        for seg_name, n_samples in zip(master.seg_name, master.seg_len):
        
            # check segment length
            if (n_samples > min_length) & (seg_name != '~'):  # '~' indicates data is missing
                seg_path = path + '/' + seg_name

                # Get segment header
                hea = get_header_record(path=seg_path, record_type='data')
                if hea is None: continue

                # Check if segment has provided signals and append
                if header_has_signals(hea, signals):
                    segments.append(seg_path)

        valid_segs += segments

        if len(valid_segs) > n_records:
            break
    return valid_segs

In [None]:
signals = ['PLETH', 'ABP']
min_length = 75000  # 10 min in samples (125 Hz)
max_valid_segs = 100



In [None]:
from database_tools.io.utils import get_rdheader

get_rdheader(pn_dir='mimic3wdb/1.0/36/3631880', record_name='3631880_layout')

In [None]:
path