In [None]:
from database_tools.io.records import generate_record_paths, get_header_record, header_has_signals
from typing import List
from alive_progress import alive_bar

def locate_valid_records(signals: List[str], min_length: int, n_segments: int, shuffle: bool = True) -> List[str]:
    """Locate valid data records. Exclusion is performed based on a list of signals
       the records must contain and a minimum length of the signals.

    Args:
        signals (List[str]): One or more of ['PLETH', 'ABP', ...]  TODO: add other signals
        min_length (int): Minimum length of data records to be considered valid.
        n_records (int): Maximum number of records to find.
        shuffle (bool): If True records list is shuffled.

    Returns:
        valid_segs (List[str]): _description_
    """
    valid_segs = []
    with alive_bar(total=n_segments, bar='brackets', force_tty=True) as bar:
        for path in generate_record_paths(name='adults', shuffle=shuffle):

            # get patient layout header
            layout = get_header_record(path=path, record_type='layout')
            if layout is None: continue  # fx returns None if file DNE

            # check if header has provided signals
            if not header_has_signals(layout, signals): continue

            # get patient master header
            master = get_header_record(path=path, record_type='data')
            if master is None: continue

            # zip segment names and lengths
            for seg_name, n_samples in zip(master.seg_name, master.seg_len):
            
                # check segment length
                if (n_samples > min_length) & (seg_name != '~'):  # '~' indicates data is missing
                    seg_path = path + '/' + seg_name

                    # Get segment header
                    hea = get_header_record(path=seg_path, record_type='data')
                    if hea is None: continue

                    # Check if segment has provided signals and append
                    if header_has_signals(hea, signals):
                        valid_segs.append(seg_path)
                        if len(valid_segs) > n_segments:
                            return valid_segs
                        bar()  # iterate loading bar

In [None]:
signals = ['PLETH', 'ABP']
min_length = 75000  # 10 min in samples (125 Hz)
n_segments = 50

valid_segs = locate_valid_records(signals, min_length, n_segments)

In [None]:
len(valid_segs)