In [14]:
# Local paths - modify as needed
BASE_PATIENTS_DIR   = r"E:\ML algoritme tl anfaldsdetektion vha HRV\ePatch data from Aarhus to Lausanne\Patients ePatch data" # Root directory containing patient data folders
BASE_ANNOTATION_DIR = r"E:\ML algoritme tl anfaldsdetektion vha HRV\ePatch data from Aarhus to Lausanne\Seizure log ePatch patients with seizures - excluded seizures removed" # Root directory containing annotation files
#OUTPUT_ROOT         = r"E:\ML algoritme tl anfaldsdetektion vha HRV\LabView-Results_Excluded_seizures_removed\NonResponders" # Root directory for output files
OUTPUT_ROOT         = r"E:\ML algoritme tl anfaldsdetektion vha HRV\LabView-Results_Excluded_seizures_removed\Responders" # Root directory for output files

In [15]:
# Seizure extraction utilities
# - Lightweight helpers to map TDMS file times to sample indices, slice windows,
#   save CSVs and produce simple ECG plots (absolute or relative time axis).
# - Assumptions:
#     * TDMS timestamps that are naive are treated as UTC if TDMS_NAIVE_IS_UTC is True.
#     * TARGET_TZ is Europe/Copenhagen and used when converting to local naive datetimes.
# Notes:
#     * Concrete TDMS parsing and annotation parsing live in other cells.
#     * This cell purposefully contains only compact utilities and configuration.

from nptdms import TdmsFile
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import os
import json
import re
import glob
import warnings
from pathlib import Path
import datetime as dt
import arrow
from zoneinfo import ZoneInfo  # py>=3.9
import gc
import psutil
from typing import Optional, Tuple

# Processing configuration (adjust in the notebook as needed)
TARGET_TZ = ZoneInfo("Europe/Copenhagen")
TDMS_NAIVE_IS_UTC = True

# Visualization / extraction parameters
X_AXIS_MODE = "absolute"     # "absolute" or "relative"
TIME_TZ = "local"
PAD_MIN = 2                  # padding (min) added around seizure windows
NONSEIZURE_OFFSET_MIN = 20   # offset (min) for non-seizure window selection
MAX_PLOT_POINTS = 200_000    # downsample threshold when plotting

# --------------------------
# Helper utilities
# --------------------------

def slice_window(signal_length, fs, i_start, i_end, pad_min):
    """
    Compute inclusive window [i0, i1) around event indices with padding (minutes).
    Returns (i0, i1) clipped to [0, signal_length].
    - signal_length: int, total samples in the recording
    - fs: sampling frequency in Hz
    - i_start, i_end: event sample indices (i_end is exclusive)
    - pad_min: padding in minutes
    """
    pad = int(round(pad_min * 60 * fs))
    i0 = max(0, i_start - pad)
    i1 = min(signal_length, i_end + pad)
    return i0, i1

def save_csv(path, t_rel_s, ecg):
    """
    Save relative time (seconds) and ECG samples to CSV.
    - path: output filepath
    - t_rel_s: 1D array-like of times in seconds relative to segment start
    - ecg: 1D array-like of ECG samples
    """
    pd.DataFrame({"t_rel_s": np.asarray(t_rel_s), "ecg": np.asarray(ecg)}).to_csv(path, index=False, encoding="utf-8")

def _format_dt(py_dt):
    """Format datetime for titles/filenames."""
    return py_dt.strftime("%Y-%m-%d %H:%M:%S")

def make_time_axis(ax):
    """Configure matplotlib axis to show time-of-day with sensible ticks."""
    locator = mdates.AutoDateLocator()
    ax.xaxis.set_major_locator(locator)
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M:%S'))
    for lbl in ax.get_xticklabels():
        lbl.set_rotation(15)
        lbl.set_horizontalalignment('right')

def build_segment_datetimes(start_local_dt, start_index, end_index, wf_inc):
    """
    Given the local naive start time of the TDMS file and sample indices,
    compute the segment start and end datetimes.
    - wf_inc: seconds per sample (1 / fs when fs is constant)
    - end_index is treated as exclusive here; the end time corresponds to sample end_index-1.
    """
    seg_start = start_local_dt + dt.timedelta(seconds=start_index * wf_inc)
    seg_end = start_local_dt + dt.timedelta(seconds=(end_index - 1) * wf_inc)
    return seg_start, seg_end

def plot_seizure(out_png, y, fs, wf_inc, tdms_start_local, seg_i0, seg_i1, st_idx, et_idx,
                 title_prefix="", x_axis_mode="absolute"):
    """
    Plot a segment containing a seizure and save to PNG.
    - y: ECG samples for the plotted segment (length = seg_i1 - seg_i0)
    - fs: sampling frequency (Hz)
    - wf_inc: seconds per sample (usually 1.0 / fs)
    - tdms_start_local: naive local datetime corresponding to sample index 0 of the file
    - seg_i0, seg_i1: segment sample range [seg_i0, seg_i1)
    - st_idx, et_idx: seizure start/end sample indices (absolute within the file)
    - x_axis_mode: "absolute" to show clock times, "relative" to show seconds from segment start
    """
    n = len(y)
    t_rel = np.arange(n) / fs

    # Relative positions of seizure markers (seconds from segment start)
    start_rel_s = (st_idx - seg_i0) / fs
    end_rel_s = (et_idx - seg_i0) / fs

    # Absolute datetimes for axis and marker placement
    seg_start, seg_end = build_segment_datetimes(tdms_start_local, seg_i0, seg_i1, wf_inc)
    event_start = tdms_start_local + dt.timedelta(seconds=st_idx * wf_inc)
    event_end = tdms_start_local + dt.timedelta(seconds=et_idx * wf_inc)

    plt.figure(figsize=(14, 4))
    ax = plt.gca()

    if x_axis_mode == "absolute":
        # Build datetimes for each sample (use wf_inc in case fs is non-integer)
        datetimes_py = [seg_start + dt.timedelta(seconds=i * wf_inc) for i in range(n)]
        ax.plot(datetimes_py, y, label="ECG")
        ax.axvline(event_start, linestyle="--", color="C1", label="Seizure start")
        ax.axvline(event_end, linestyle="--", color="C2", label="Seizure end")
        make_time_axis(ax)
        ax.set_xlabel("Time (HH:MM:SS)")
    else:
        ax.plot(t_rel, y, label="ECG")
        ax.axvline(start_rel_s, linestyle="--", color="C1", label="Seizure start")
        ax.axvline(end_rel_s, linestyle="--", color="C2", label="Seizure end")
        ax.set_xlabel("Time (s) relative to segment start")

    ax.set_ylabel("Amplitude")
    ax.set_title(
        f"{title_prefix}  |  Window: {_format_dt(seg_start)} â†’ {_format_dt(seg_end)}  "
        f"(Event: {_format_dt(event_start)} â†’ {_format_dt(event_end)})"
    )
    ax.legend()
    plt.tight_layout()
    plt.savefig(out_png, dpi=150)
    plt.close()

def _make_time_axis(ax):
    """
    Configure a matplotlib axis for time-series x-axis:
      - automatic locator for nice tick spacing
      - HH:MM:SS formatting
      - slight rotation for readability
    Requires matplotlib.dates as mdates to be available in the notebook.
    """
    locator = mdates.AutoDateLocator()
    ax.xaxis.set_major_locator(locator)
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M:%S'))
    for lab in ax.get_xticklabels():
        lab.set_rotation(15)
        lab.set_horizontalalignment('right')


def _seg_times(tdms_start_naive, i0, i1, wf_inc):
    """
    Return start and end datetimes for a waveform segment.

    tdms_start_naive: naive (tz-unaware) datetime for the TDMS recording start
    i0, i1: sample indices (i1 is exclusive in downstream logic, so end uses i1-1)
    wf_inc: seconds per sample
    """
    seg_start = tdms_start_naive + dt.timedelta(seconds=i0 * wf_inc)
    seg_end = tdms_start_naive + dt.timedelta(seconds=(i1 - 1) * wf_inc)
    return seg_start, seg_end


def _thin_for_plot(y, max_points=MAX_PLOT_POINTS):
    """
    Return indices and thinned values for plotting long 1-D arrays.
    If length <= max_points the original array is returned (indices are arange).
    Otherwise a uniform down-sample with ceil(n / max_points) step is used.
    """
    n = len(y)
    if n <= max_points:
        return np.arange(n), y
    step = int(np.ceil(n / max_points))
    idx = np.arange(0, n, step, dtype=int)
    return idx, y[idx]


def to_naive_local(dt_like):
    """
    Convert various datetime-like inputs to a naive (tz-unaware) local datetime.

    Accepts: arrow.Arrow, pandas.Timestamp, datetime.datetime, strings, etc.
    Returns:
      - datetime.datetime (tzinfo removed, converted to local timezone first when needed)
      - None if the input cannot be interpreted
    """
    # Arrow object
    if isinstance(dt_like, arrow.Arrow):
        return dt_like.to("local").naive

    # Python datetime
    if isinstance(dt_like, dt.datetime):
        if dt_like.tzinfo is not None:
            # convert to system local timezone then drop tzinfo
            return dt_like.astimezone().replace(tzinfo=None)
        return dt_like

    # pandas.Timestamp
    if isinstance(dt_like, pd.Timestamp):
        py = dt_like.to_pydatetime()
        if py.tzinfo is not None:
            return py.astimezone().replace(tzinfo=None)
        return py

    # Fallback: try pandas parsing (handles strings, numbers, etc.)
    try:
        ts = pd.to_datetime(dt_like, errors="coerce")
        if pd.isna(ts):
            return None
        py = ts.to_pydatetime()
        if py.tzinfo is not None:
            return py.astimezone().replace(tzinfo=None)
        return py
    except Exception:
        return None


def _time_to_timedelta(t):
    """
    Helper: convert a time-like object with hour/minute/second/microsecond
    attributes into a pandas Timedelta.
    """
    return (
        pd.to_timedelta(t.hour, unit="h")
        + pd.to_timedelta(t.minute, unit="m")
        + pd.to_timedelta(t.second, unit="s")
        + pd.to_timedelta(t.microsecond, unit="us")
    )


def _parse_time_cell(date_cell, time_cell):
    """
    Combine a date cell and a time cell into a single pandas.Timestamp (local).
    Handles:
      - Excel float times (fraction of day)
      - strings like "12:30:00"
      - pandas.Timestamp, datetime.datetime, datetime.time
      - NaT / NaN
    Returns pd.NaT when parsing fails.

    Note: date is normalized (time-of-day zeroed) and time is added as a timedelta.
    """
    if pd.isna(time_cell) or pd.isna(date_cell):
        return pd.NaT

    # Normalize date (drop time component)
    date_ts = pd.to_datetime(date_cell, errors="coerce", dayfirst=True)
    if pd.isna(date_ts):
        return pd.NaT
    date_norm = date_ts.normalize()  # pandas.Timestamp at 00:00:00

    # Excel float: fraction of a day
    if isinstance(time_cell, (int, float, np.integer, np.floating)):
        return date_norm + pd.to_timedelta(float(time_cell), unit="D")

    # pandas.Timestamp
    if isinstance(time_cell, pd.Timestamp):
        return date_norm + _time_to_timedelta(time_cell)

    # datetime.datetime
    if isinstance(time_cell, dt.datetime):
        return date_norm + _time_to_timedelta(time_cell)

    # datetime.time -> convert to Timedelta relative to date_norm
    if isinstance(time_cell, dt.time):
        # use attributes of time object
        return date_norm + pd.to_timedelta(time_cell.hour, unit="h") + pd.to_timedelta(
            time_cell.minute, unit="m"
        ) + pd.to_timedelta(time_cell.second, unit="s") + pd.to_timedelta(
            time_cell.microsecond, unit="us"
        )

    # Strings and other types: attempt parsing
    s = str(time_cell).strip()
    if not s or s.lower() in {"nan", "none", "na"}:
        return pd.NaT
    parsed = pd.to_datetime(s, errors="coerce", dayfirst=True)
    if pd.isna(parsed):
        return pd.NaT

    return date_norm + _time_to_timedelta(parsed)
def _seconds_since_midnight(ts):
    """
    Return seconds since midnight for a (pandas) timestamp-like object.
    Returns np.nan for NA inputs.
    """
    if pd.isna(ts):
        return np.nan
    return ts.hour * 3600 + ts.minute * 60 + ts.second + ts.microsecond / 1e6


def load_seizure_annotations_file(path_excel):
    """
    Load a single Excel annotation file where the table header is on row 7 (use header=6).

    Expected columns (any reasonable variant/casing), examples:
      - Seizure number / Anfald nr
      - Date / Dato
      - Seizurestart clinic (tt:mm:ss) / Anfaldsstart Klinisk
      - Seizurestart EEG (tt:mm:ss) / Anfaldsstart EEG
      - Seizureend clinic / Anfaldstop Klinisk
      - Seizureend EEG / Anfaldstop EEG
      - Seizure type / Anfaldstype
      - other / Evt. bemÃ¦rkninger

    Returns a DataFrame with normalized columns:
      seizure_number, date (normalized to midnight), start_clinic, start_eeg, end_clinic, end_eeg,
      seizure_type, other, plus helper columns with seconds_since_midnight and hour for starts/ends,
      and a source_file column with the base filename.
    """
    df = pd.read_excel(path_excel, header=6)

    # Normalize column names by stripping whitespace
    cols = {c: str(c).strip() for c in df.columns}
    df.rename(columns=cols, inplace=True)

    # Helper to find a column containing a keyword (case-insensitive substring match)
    def find_col(key):
        key_l = key.lower()
        for c in df.columns:
            if key_l in c.lower():
                return c
        return None

    # Try multiple language/alias variants for important columns
    num_col = find_col('Anfald nr') or find_col('seizure')
    date_col = find_col('Dato')
    s_clin_col = find_col('Anfaldsstart Klinisk') or find_col('Anfaldsstart klinisk')
    s_eeg_col = find_col('Anfaldsstart EEG') or find_col('Anfaldsstart eeg')
    e_clin_col = find_col('Anfaldstop Klinisk') or find_col('Anfaldstop klinisk')
    e_eeg_col = find_col('Anfaldstop EEG') or find_col('Anfaldstop eeg')
    type_col = find_col('Anfaldstype') or find_col('anfaldstype')
    other_col = find_col('Evt. bemÃ¦rkninger') or find_col('note') or find_col('other')

    res = pd.DataFrame()

    # Seizure number: if not present use the row index + 1
    res['seizure_number'] = df[num_col] if num_col else (df.index + 1)

    # Normalize date column to midnight timestamps if present, else NaT
    if date_col:
        res['date'] = pd.to_datetime(df[date_col], errors='coerce', dayfirst=True).dt.normalize()
    else:
        res['date'] = pd.NaT

    # Combine date + time for each time column using a parser _parse_time_cell (assumed defined elsewhere).
    # Use Series.get on the row to avoid KeyError if date_col is missing.
    res['start_clinic'] = df.apply(
        lambda r: _parse_time_cell(r.get(date_col), r.get(s_clin_col)) if s_clin_col else pd.NaT,
        axis=1
    )
    res['start_eeg'] = df.apply(
        lambda r: _parse_time_cell(r.get(date_col), r.get(s_eeg_col)) if s_eeg_col else pd.NaT,
        axis=1
    )
    res['end_clinic'] = df.apply(
        lambda r: _parse_time_cell(r.get(date_col), r.get(e_clin_col)) if e_clin_col else pd.NaT,
        axis=1
    )
    res['end_eeg'] = df.apply(
        lambda r: _parse_time_cell(r.get(date_col), r.get(e_eeg_col)) if e_eeg_col else pd.NaT,
        axis=1
    )

    # Copy type/other columns if present, otherwise fill with None
    res['seizure_type'] = df[type_col] if type_col else None
    res['other'] = df[other_col] if other_col else None

    # Helper columns for statistics: seconds since midnight and hour of day
    for prefix in ['start_clinic', 'start_eeg', 'end_clinic', 'end_eeg']:
        res[f'{prefix}_seconds'] = res[prefix].apply(_seconds_since_midnight)
        # Use .dt.hour safely; if column contains non-datetime values this will raise, but original code assumed datetimes
        res[f'{prefix}_hour'] = res[prefix].dt.hour

    # Keep original source filename for traceability
    res['source_file'] = os.path.basename(path_excel)

    return res


def to_naive_local_dt(v):
    """
    Convert a timestamp-like value to a naive (tz-unaware) local datetime in TARGET_TZ.

    - Parses with pandas.to_datetime (dayfirst=True).
    - If parsed value is tz-aware, converts to TARGET_TZ and drops tzinfo.
    - Returns None for unparsable/NA values.
    """
    ts = pd.to_datetime(v, errors="coerce", dayfirst=True, utc=False)
    if pd.isna(ts):
        return None
    py = ts.to_pydatetime()
    # If a cell unexpectedly contains a timezone-aware datetime, convert to TARGET_TZ then drop tzinfo
    if py.tzinfo:
        py = py.astimezone(TARGET_TZ).replace(tzinfo=None)
    return py  # naive local datetime


def events_from_annotation_df(df, prefer="clinic"):
    """
    Convert DataFrame from load_seizure_annotations_file(...) into a sorted list of (start, end)
    tuples as naive local datetime objects.

    prefer: "clinic" or "eeg" to choose which start/end pair to prefer when both exist.
    """
    starts = df.get(f"start_{prefer}", df.get("start_clinic"))
    ends = df.get(f"end_{prefer}", df.get("end_clinic"))

    events = []
    for st, et in zip(starts, ends):
        if pd.isna(st) or pd.isna(et):
            continue
        st_dt = to_naive_local_dt(st)
        et_dt = to_naive_local_dt(et)
        # Only keep valid intervals where end is after start
        if st_dt and et_dt and et_dt > st_dt:
            events.append((st_dt, et_dt))

    # Sort by start time and return
    events.sort(key=lambda t: t[0])
    return events


def ensure_dir(p):
    """Ensure directory p exists (create parents if necessary)."""
    Path(p).mkdir(parents=True, exist_ok=True)

def find_ecg_channel(td) -> Optional[object]:
    """
    Find the EKG/ECG channel across all groups in a TDMS file.
    Matches common names (case-insensitive): 'EKG', 'ECG', 'Lead I', 'Lead1'.
    Fallback: return the first channel from the first group if no match is found.
    """
    # Regex matches 'ekg', 'ecg', 'lead i', 'lead1' (allows optional whitespace)
    name_re = re.compile(r"^(ekg|ecg|lead\s*1|lead\s*i)$", re.IGNORECASE)

    first_channel = None
    for group in td.groups():
        channels = group.channels()
        for ch in channels:
            name = (ch.name or "").strip()
            if name_re.match(name):
                return ch
            if first_channel is None:
                first_channel = ch  # save as fallback
    return first_channel

def get_sampling_increment(ch) -> float:
    """
    Get the sample increment (seconds per sample) from the channel properties.
    Supported variants:
      - 'wf_increment' (sec/sample)
      - 'wf_sampling_rate' or 'sampling_rate' (Hz) -> return 1/fs
      - 'wf_xscale' as a last resort
    Raises RuntimeError if no valid value is found.
    """
    props = getattr(ch, "properties", {}) or {}

    # Prefer direct increment
    inc = props.get("wf_increment", None)
    if inc is not None:
        try:
            inc = float(inc)
            if inc > 0:
                return inc
        except Exception:
            pass

    # Alternatively sampling rate in Hz
    fs = props.get("wf_sampling_rate", props.get("sampling_rate", None))
    if fs is not None:
        try:
            fs = float(fs)
            if fs > 0:
                return 1.0 / fs
        except Exception:
            pass

    # Last chance: wf_xscale
    xscale = props.get("wf_xscale", None)
    if xscale is not None:
        try:
            inc = float(xscale)
            if inc > 0:
                return inc
        except Exception:
            pass

    raise RuntimeError("Cannot determine sampling increment (wf_increment/sampling_rate missing or invalid).")

def to_naive_local(dt_like) -> Optional[dt.datetime]:
    """
    Convert various time-like inputs to a naive local datetime (tzinfo=None).
    Accepts: arrow.Arrow, datetime, pandas-parsable strings.
    Returns None if conversion fails.
    """
    if isinstance(dt_like, arrow.Arrow):
        return dt_like.to('local').naive

    if isinstance(dt_like, dt.datetime):
        # if aware -> convert to local time and drop tzinfo
        if dt_like.tzinfo:
            return dt_like.astimezone().replace(tzinfo=None)
        return dt_like

    ts = pd.to_datetime(dt_like, errors='coerce')
    if pd.isna(ts):
        return None
    py = ts.to_pydatetime()
    return py.astimezone().replace(tzinfo=None) if py.tzinfo else py

def get_tdms_start_local_naive(ch) -> Optional[dt.datetime]:
    """
    Extract the start time from the channel properties and return a naive local datetime.
    - If the timestamp is naive it is assumed to be UTC when TDMS_NAIVE_IS_UTC=True.
    - Converts to TARGET_TZ (expected ZoneInfo) and removes tzinfo before returning.
    Returns None if no known timestamp field is present or parsing fails.
    """
    candidates = [
        "wf_start_time", "wf_start", "ni_exptimestamp", "ni_expisrelativetime",
        "NI_ExpStartTime", "NI_T0"
    ]
    props = getattr(ch, "properties", {}) or {}

    for key in candidates:
        if key not in props:
            continue
        val = props[key]
        # Use pandas for tolerant parsing; do not force UTC here
        ts = pd.to_datetime(val, errors="coerce", utc=False)
        if pd.isna(ts):
            continue

        py = ts.to_pydatetime()

        # If naive: assume UTC or TARGET_TZ depending on global setting
        if py.tzinfo is None:
            if TDMS_NAIVE_IS_UTC:
                py = py.replace(tzinfo=dt.timezone.utc)
            else:
                py = py.replace(tzinfo=TARGET_TZ)

        # Convert to local zone (TARGET_TZ) and drop tzinfo -> naive local time
        py_local = py.astimezone(TARGET_TZ)
        return py_local.replace(tzinfo=None)

    return None

def tdms_load_ecg(tdms_path: str) -> Tuple[np.ndarray, float, Optional[dt.datetime], dict]:
    """
    Load ECG/EGK from a TDMS file:
      - returns (data_array, fs, start_local_naive, meta)
    Meta includes channel name, sampling, number of samples, start time (ISO string) and group.
    May raise errors if no channel is found or sampling cannot be determined.
    """
    # Suppress noisy nptdms warnings during reading
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        td = TdmsFile.read(tdms_path)

    ecg_ch = find_ecg_channel(td)
    if ecg_ch is None:
        raise RuntimeError("No channels found in the TDMS file.")

    # Data as float numpy array
    x = np.asarray(ecg_ch.data, dtype=float)

    # Sampling interval and frequency
    wf_inc = get_sampling_increment(ecg_ch)
    fs = 1.0 / wf_inc

    # Start time as naive local datetime (may be None)
    start_local_naive = get_tdms_start_local_naive(ecg_ch)

    meta = {
        "channel": getattr(ecg_ch, "name", None),
        "group": getattr(getattr(ecg_ch, "group", None), "name", None),
        "fs": fs,
        "wf_increment": wf_inc,
        "n_samples": int(x.shape[0]),
        "start_local_iso": start_local_naive.strftime("%Y-%m-%d %H:%M:%S") if start_local_naive else None,
    }
    return x, fs, start_local_naive, meta

from typing import Optional, Tuple

def extract_ids_from_relpath(rel_tdms: str) -> Tuple[Optional[int], Optional[int]]:
    """
    Infer numeric patient and recording IDs from a relative TDMS path or filename.

    Strategy (in order):
      1) Search folder/file path for "Patient <N>" and "Recording <M>" (case-insensitive).
      2) Parse filename patterns like:
         - Patient_5_2.tdms  or  Patient-5-2.tdms  (patient=5, recording=2)
         - Patient5-2.tdms
      3) Fallback: filename "Patient_5.tdms" -> patient=5, recording=None

    Returns (patient_id, recording_id) where either may be None if not found.
    """
    s = rel_tdms.replace("\\", "/")
    patient_id: Optional[int] = None
    rec_id: Optional[int] = None

    # 1) Try to find "patient <N>" and "recording <M>" anywhere in the path
    m_p = re.search(r"(?<!\d)\bpatient\s*(\d+)\b", s, re.IGNORECASE)
    m_r = re.search(r"(?<!\d)\brecording\s*(\d+)\b", s, re.IGNORECASE)
    if m_p:
        patient_id = int(m_p.group(1))
    if m_r:
        rec_id = int(m_r.group(1))

    # Use filename for additional heuristics
    fname = Path(s).name

    # 2) Filename with both patient and recording: Patient_<p>_<r>.tdms (robust to -, _, space)
    if patient_id is None or rec_id is None:
        m = re.search(r"(?i)^patient[_\s-]*?(\d+)[_\s-]+(\d+)\.tdms$", fname)
        if m:
            if patient_id is None:
                patient_id = int(m.group(1))
            if rec_id is None:
                rec_id = int(m.group(2))

    # 3) Filename with only patient: Patient_<p>.tdms
    if patient_id is None:
        m = re.search(r"(?i)^patient[_\s-]*?(\d+)\.tdms$", fname)
        if m:
            patient_id = int(m.group(1))

    return patient_id, rec_id


def discover_tdms_for_patient(
    base_dir: str,
    patient_selector: Optional[str] = None,
    patient_id: Optional[int] = None,
):
    """
    Generator yielding (relative_path, absolute_path) for TDMS files for a single patient folder.

    Behavior:
      - Scans top-level entries in base_dir and inspects the first patient folder that contains TDMS files.
      - If patient_selector is provided, only folders whose name contains that substring (case-insensitive)
        are considered.
      - If patient_id is provided, only folders with a 'Patient <id>' name are considered.
      - Within a chosen patient folder TDMS files are collected recursively and yielded sorted by recording id
        (if available) then alphabetically.
      - Stops after yielding files from the first matching patient folder (intended behavior).
    """
    base_dir = str(base_dir)
    entries = sorted(os.listdir(base_dir), key=str.lower)

    for entry in entries:
        pdir = os.path.join(base_dir, entry)
        if not os.path.isdir(pdir):
            continue

        # Apply selector filters
        if patient_selector and patient_selector.lower() not in entry.lower():
            continue
        if patient_id is not None:
            if not re.search(rf"(?<!\d)\bpatient\s*{patient_id}\b", entry, re.IGNORECASE):
                continue

        # Collect TDMS files under this patient folder
        tdms_files = []
        for root, _, files in os.walk(pdir):
            for f in files:
                fl = f.lower()
                if not fl.endswith(".tdms"):
                    continue
                # skip index-like files if they appear
                if fl.endswith(".tdms_index"):
                    continue
                abs_p = os.path.join(root, f)
                rel_p = os.path.relpath(abs_p, base_dir)
                tdms_files.append((rel_p, abs_p))

        # Sort: prefer numeric recording id when available, otherwise alphabetical
        def sort_key(pair):
            rel_p, _ = pair
            _, rec = extract_ids_from_relpath(rel_p)
            return (999999 if rec is None else rec, rel_p.lower())

        tdms_files.sort(key=sort_key)

        # If we found any TDMS files, yield them and stop (we only process one patient folder)
        if tdms_files:
            for rel_p, abs_p in tdms_files:
                yield rel_p, abs_p
            return

        # If the caller asked for a specific patient and this folder had no TDMS files, stop scanning.
        if patient_selector or patient_id is not None:
            return

def map_cols(df, starts_list, ends_list):
    # Return the first matching start/end column names from the DataFrame, case-insensitive.
    cols = {c.lower(): c for c in df.columns}
    start_col = next((cols[c.lower()] for c in starts_list if c.lower() in cols), None)
    end_col   = next((cols[c.lower()] for c in ends_list   if c.lower() in cols), None)
    return start_col, end_col

START_ALIASES = [
    "Seizure start","Anfald start","Start","Start time","Starttid","Onset",
    "Seizure_start","Start (UTC)"
]
END_ALIASES   = [
    "Seizure end","Anfald slut","Slut","End time","Sluttid","Offset",
    "Seizure_end","End (UTC)"
]
REC_ALIASES   = ["Recording","Recording ID","Recording_Id","Rec","Session"]


def parse_annotations_excel_or_csv(path_x, recording_id=None):
    """
    Return a list of (start_dt, end_dt) as naive local datetimes.
    Works with .xlsx/.xls (all sheets) and .csv.
    Optionally filters rows by a 'recording' column if present.
    """
    events = []
    p = Path(path_x)
    if p.suffix.lower() in (".xlsx", ".xls"):
        # Read all sheets from the Excel file
        xls = pd.ExcelFile(path_x)
        dfs = [xls.parse(sheet) for sheet in xls.sheet_names]
    else:
        # Single CSV file -> one DataFrame
        dfs = [pd.read_csv(path_x)]

    for df in dfs:
        if df.empty:
            continue

        # Optional: filter by recording id if a suitable column exists
        # Find columns matching any of REC_ALIASES (exact or case-insensitive)
        rec_cols = [
            c for c in df.columns
            if c.strip() in REC_ALIASES or c.strip().lower() in [a.lower() for a in REC_ALIASES]
        ]
        if rec_cols and recording_id is not None:
            # Build a mask that matches the recording_id in any of the candidate columns.
            mask_any = None
            for c in rec_cols:
                # Extract digits from the column and compare to recording_id as string
                m = df[c].astype(str, copy=False).str.extract(r"(\d+)")[0] == str(recording_id)
                mask_any = m if mask_any is None else (mask_any | m)
            df = df[mask_any.fillna(False)]
        if df.empty:
            continue

        # Column mapping (try original headers, then trimmed headers)
        a_col, b_col = map_cols(df, START_ALIASES, END_ALIASES)
        if not (a_col and b_col):
            df2 = df.copy()
            df2.columns = [c.strip() for c in df2.columns]
            a_col, b_col = map_cols(df2, START_ALIASES, END_ALIASES)
            if a_col and b_col:
                df = df2

        if a_col and b_col:
            # Iterate over rows with non-null start and end values
            for _, row in df[[a_col, b_col]].dropna().iterrows():
                st = to_naive_local_dt(row[a_col])
                et = to_naive_local_dt(row[b_col])
                # Keep only valid intervals where end > start
                if st and et and et > st:
                    events.append((st, et))

    # Deduplicate and sort by start time (now plain datetime, not Arrow)
    events = sorted(set(events), key=lambda t: t[0])
    return events


def _read_lvm_header_datetime(path_lvm, header_lines=22):
    """Read 'Date' and 'Time' from LVM header and return naive local datetime or None.
    
    Danish: LÃ¦s 'Date' og 'Time' fra LVM-header og returnÃ©r naiv lokal datetime eller None.
    """
    date_str, time_str = None, None
    with open(path_lvm, "r", encoding="utf-8", errors="ignore") as f:
        for i, line in enumerate(f):
            if i >= header_lines:
                break
            line = line.strip()
            if line.lower().startswith("date"):
                # e.g.: Date    2016/10/12
                parts = line.split(None, 1)
                if len(parts) > 1:
                    date_str = parts[1].strip()
            elif line.lower().startswith("time"):
                # e.g.: Time    13:06:19,1816539465369109152  (or HH:MM:SS)
                parts = line.split(None, 1)
                if len(parts) > 1:
                    # Replace comma with dot to handle fractional seconds using either separator
                    time_str = parts[1].strip().replace(",", ".")
    if not date_str or not time_str:
        return None
    # Try to combine into a single timestamp
    ts = pd.to_datetime(f"{date_str} {time_str}", errors="coerce", dayfirst=False, utc=False)
    if pd.isna(ts):
        # Fallback: try parsing date and time separately
        d = pd.to_datetime(date_str, errors="coerce")
        t = pd.to_datetime(time_str, errors="coerce")
        if pd.isna(d) or pd.isna(t):
            return None
        # Combine date and time components into a single Timestamp
        ts = d.normalize() + pd.to_timedelta(t.hour, unit="h") + pd.to_timedelta(t.minute, unit="m") + pd.to_timedelta(t.second, unit="s") + pd.to_timedelta(t.microsecond, unit="us")
    py = ts.to_pydatetime()
    # If timezone-aware â†’ convert to local timezone and drop tz info; otherwise return as-is
    return py.astimezone().replace(tzinfo=None) if py.tzinfo else py

def parse_annotations_lvm(path_lvm):
    # Read header for possible base time (for relative X_Value)
    base_dt = _read_lvm_header_datetime(path_lvm, header_lines=22)

    df = pd.read_csv(path_lvm, sep="\t", decimal=",", engine="python", skiprows=22, header=0)
    df.columns = [c.strip() for c in df.columns]

    time_col = next((c for c in ["X_Value","X Value","Time","Timestamp","DateTime"] if c in df.columns), None)
    comment_col = next((c for c in ["Comment","Comments","Kommentar"] if c in df.columns), None)
    if not (time_col and comment_col):
        return []

    # find start/stop rows
    starts, ends = [], []
    for _, row in df.iterrows():
        cmt = str(row[comment_col]).lower()
        if "seizure" in cmt and "start" in cmt:
            starts.append(row)
        elif "seizure" in cmt and ("end" in cmt or "stop" in cmt):
            ends.append(row)

    n = min(len(starts), len(ends))
    events = []
    for i in range(n):
        v_start = starts[i][time_col]
        v_end   = ends[i][time_col]

        st_dt = None
        et_dt = None

        # Case A: absolute time in the column
        st_abs = to_naive_local_dt(v_start)
        et_abs = to_naive_local_dt(v_end)
        if st_abs and et_abs:
            st_dt, et_dt = st_abs, et_abs
        else:
            # Case B: relative time (seconds) in X_Value + base_dt from header
            # (requires base_dt and numeric values)
            if base_dt is not None:
                try:
                    st_sec = float(str(v_start).replace(",", "."))
                    et_sec = float(str(v_end).replace(",", "."))
                    st_dt = base_dt + dt.timedelta(seconds=st_sec)
                    et_dt = base_dt + dt.timedelta(seconds=et_sec)
                except Exception:
                    st_dt = et_dt = None

        if st_dt and et_dt and et_dt > st_dt:
            events.append((st_dt, et_dt))

    # deduplicate + sort
    events = sorted(set(events), key=lambda t: t[0])
    return events

def find_annotations_for_patient(patient_id):
    # Find annotation files for a given patient id under BASE_ANNOTATION_DIR.
    pid = "" if patient_id is None else str(patient_id)
    patterns = [
        f"**/*patient*{pid}*.xlsx", f"**/*patient*{pid}*.xls",
        f"**/*patient*{pid}*.csv",  f"**/*patient*{pid}*.lvm",
        f"**/*{pid}*.xlsx", f"**/*{pid}*.csv", f"**/*{pid}*.lvm",
    ]
    found = []
    for pat in patterns:
        found += glob.glob(os.path.join(BASE_ANNOTATION_DIR, pat), recursive=True)

    # unique + priority xlsx/xls > csv > lvm
    def rank(p):
        pl = p.lower()
        if pl.endswith((".xlsx", ".xls")): return 0
        if pl.endswith(".csv"):            return 1
        if pl.endswith(".lvm"):            return 2
        return 9

    return sorted(list(dict.fromkeys(found)), key=rank)

def load_events_for_patient_with_excel(patient_id, base_annotation_dir):
    # Load events for a patient by searching Excel files in the given base directory.
    pid_str = str(patient_id)
    pid_0 = pid_str.zfill(2)  # "6" -> "06"
    exts = (".xls", ".xlsx")

    patterns = [
        f"**/patient {pid_str}.*",
        f"**/patient_{pid_str}.*",
        f"**/patient{pid_str}.*",
        f"**/patient {pid_0}.*",
        f"**/patient_{pid_0}.*",
        f"**/pt {pid_str}.*",
        f"**/pt_{pid_str}.*",
    ]

    # find candidates
    cands = []
    for pat in patterns:
        cands += glob.glob(os.path.join(base_annotation_dir, pat), recursive=True)

    # filter to xls/xlsx
    cands = [c for c in cands if c.lower().endswith(exts)]  

    # try them in order
    for path_excel in sorted(dict.fromkeys(cands)):
        try:
            df = load_seizure_annotations_file(path_excel)
            ev = events_from_annotation_df(df, prefer="clinic")
            if ev:
                return ev, path_excel
        except Exception as e:
            print(f"Warning: kunne ikke parse {path_excel}: {e}")  # Warning: could not parse ...

    return [], None

def load_events_for_patient_and_enrollment(patient_id, enrollment_name, base_annotation_dir):
    """
    Hvis enrollment_name er fx 'enrollment A', sÃ¥ leder vi efter:
      Patient 8a.*, Patient 8_a.*, Patient 8-a.*
    Ellers falder vi tilbage til almindelig patient-fil.

    If enrollment_name is e.g. 'enrollment A', we look for:
      Patient 8a.*, Patient 8_a.*, Patient 8-a.*
    Otherwise fall back to the regular patient file.
    """

    # 1) if no enrollment -> use the default search
    if not enrollment_name:
        return load_events_for_patient_with_excel(patient_id, base_annotation_dir)

    # 2) find letter/suffix from the enrollment name
    # "enrollment A" -> "a"
    m = re.search(r"enrollment\s*([A-Za-z])", enrollment_name, re.IGNORECASE)
    if not m:
        # if we cannot read a letter -> use the standard search
        return load_events_for_patient_with_excel(patient_id, base_annotation_dir)
    letter = m.group(1).lower()

    pid_str = str(patient_id)
    patterns = [
        f"**/patient {pid_str}{letter}.xls",
        f"**/patient {pid_str}{letter}.xlsx",
        f"**/patient_{pid_str}{letter}.xls",
        f"**/patient_{pid_str}{letter}.xlsx",
        f"**/patient {pid_str}{letter} *.xls",   # fx 'Patient 8b noget.xls'
        f"**/patient {pid_str}{letter} *.xlsx",
    ]

    cands = []
    for pat in patterns:
        cands += glob.glob(os.path.join(base_annotation_dir, pat), recursive=True)

    # if we found something, parse the first usable file
    for path_excel in sorted(dict.fromkeys(cands)):
        try:
            ann_df = load_seizure_annotations_file(path_excel)
            ev = events_from_annotation_df(ann_df, prefer="clinic")
            if ev:
                return ev, path_excel
        except Exception as e:
            print(f"Warning: kunne ikke parse {path_excel}: {e}")  # Warning: could not parse ...

    # fallback to standard search if nothing found with the letter suffix
    return load_events_for_patient_with_excel(patient_id, base_annotation_dir)

def time_to_index(local_dt, tdms_start_local_naive, wf_increment, n_samples):
    """
    Convert absolute local time -> sample index.
    local_dt and tdms_start_local_naive can be datetime, pandas.Timestamp or str.
    Returns an int index in [0, n_samples-1].
    """
    if wf_increment is None or wf_increment <= 0:
        raise ValueError("wf_increment must be > 0 (sec/sample).")

    a = to_naive_local_dt(local_dt)
    b = to_naive_local_dt(tdms_start_local_naive)
    if a is None or b is None:
        raise ValueError(f"Cannot interpret times: local_dt={local_dt!r}, file_start={tdms_start_local_naive!r}")

    dt_s = (a - b).total_seconds()
    idx = int(round(dt_s / wf_increment))
    return int(np.clip(idx, 0, n_samples - 1))


def make_time_axis(ax):
    locator = mdates.AutoDateLocator()
    ax.xaxis.set_major_locator(locator)
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M:%S'))
    for lab in ax.get_xticklabels():
        lab.set_rotation(15)
        lab.set_horizontalalignment('right')


def build_segment_datetimes(start_local_dt, start_index, end_index, wf_inc):
    seg_start = start_local_dt + dt.timedelta(seconds=start_index * wf_inc)
    seg_end   = start_local_dt + dt.timedelta(seconds=(end_index - 1) * wf_inc)
    return seg_start, seg_end


def plot_raw(
    out_png, y, fs, wf_inc, tdms_start_local, seg_i0, seg_i1,
    title_prefix="", x_axis_mode="absolute"
):
    n = seg_i1 - seg_i0
    t_rel = np.arange(n) / fs
    seg_start, seg_end = build_segment_datetimes(tdms_start_local, seg_i0, seg_i1, wf_inc)

    plt.figure(figsize=(14, 4))
    ax = plt.gca()
    if x_axis_mode == "absolute":
        # Build list of naive datetime objects for each sample
        datetimes_py = [seg_start + dt.timedelta(seconds=i * wf_inc) for i in range(n)]
        ax.plot(datetimes_py, y)
        make_time_axis(ax)
        ax.set_xlabel("Time (local)")
    else:
        ax.plot(t_rel, y)
        ax.set_xlabel("Time (s) relative to window start")

    ax.set_ylabel("Amplitude")
    ax.set_title(f"{title_prefix}  |  Window: {_format_dt(seg_start)} â†’ {_format_dt(seg_end)}")
    plt.tight_layout()
    plt.savefig(out_png, dpi=150)
    plt.close()


def sanity_check_and_log(
    out_dir: str,
    base_name: str,
    tdms_start_naive,   # datetime.datetime (naive local)
    wf_inc: float,      # sec/sample
    st_idx: int,        # event start sample
    et_idx: int,        # event end sample
    seg_i0: int,        # segment start sample
    seg_i1: int,        # segment end sample (exclusive)
    tol_seconds: float | None = None  # default: 1.5 * wf_inc
):
    """
    Checks that event markers (st_idx, et_idx) land correctly within the segment [seg_i0, seg_i1),
    and that they correspond to the expected relative positions. Saves a JSON report.

    Returns (ok: bool, report: dict).
    """
    if tol_seconds is None:
        tol_seconds = 1.5 * wf_inc  # ~1â€“2 samples tolerance

    # Absolute times
    seg_start = tdms_start_naive + dt.timedelta(seconds=seg_i0 * wf_inc)
    seg_end   = tdms_start_naive + dt.timedelta(seconds=(seg_i1 - 1) * wf_inc)  # last sample in the window
    event_start = tdms_start_naive + dt.timedelta(seconds=st_idx * wf_inc)
    event_end   = tdms_start_naive + dt.timedelta(seconds=et_idx * wf_inc)

    # Relative positions in seconds
    start_rel_s = (st_idx - seg_i0) * wf_inc
    end_rel_s   = (et_idx - seg_i0) * wf_inc

    # Expected absolute times from segment start + relative offsets
    exp_start = seg_start + dt.timedelta(seconds=start_rel_s)
    exp_end   = seg_start + dt.timedelta(seconds=end_rel_s)

    # Errors in seconds
    err_start_s = (event_start - exp_start).total_seconds()
    err_end_s   = (event_end   - exp_end).total_seconds()

    # Errors in samples
    err_start_samples = err_start_s / wf_inc
    err_end_samples   = err_end_s   / wf_inc

    # Additional checks
    within_segment = (st_idx >= seg_i0) and (et_idx <= seg_i1)
    start_on_edge  = (st_idx == seg_i0) or (st_idx == seg_i1)
    end_on_edge    = (et_idx == seg_i0) or (et_idx == seg_i1)

    # OK criterion
    ok = (abs(err_start_s) <= tol_seconds) and (abs(err_end_s) <= tol_seconds) and within_segment

    report = {
        "segment_start": seg_start.strftime("%Y-%m-%d %H:%M:%S"),
        "segment_end":   seg_end.strftime("%Y-%m-%d %H:%M:%S"),
        "event_start":   event_start.strftime("%Y-%m-%d %H:%M:%S"),
        "event_end":     event_end.strftime("%Y-%m-%d %H:%M:%S"),
        "wf_increment_s": wf_inc,
        "tol_seconds": tol_seconds,
        "start_rel_s": start_rel_s,
        "end_rel_s":   end_rel_s,
        "error_start_seconds": err_start_s,
        "error_end_seconds":   err_end_s,
        "error_start_samples": err_start_samples,
        "error_end_samples":   err_end_samples,
        "st_idx": st_idx,
        "et_idx": et_idx,
        "seg_i0": seg_i0,
        "seg_i1": seg_i1,
        "within_segment": within_segment,
        "start_on_edge": start_on_edge,
        "end_on_edge": end_on_edge,
        "ok": ok,
    }

    os.makedirs(out_dir, exist_ok=True)
    with open(os.path.join(out_dir, f"{base_name}_sanity.json"), "w", encoding="utf-8") as f:
        json.dump(report, f, indent=2, ensure_ascii=False)

    return ok, report

def _seg_times(tdms_start_naive, i0, i1, wf_inc):
    """Return (seg_start, seg_end) as naive local datetimes."""
    seg_start = tdms_start_naive + dt.timedelta(seconds=i0 * wf_inc)
    seg_end = tdms_start_naive + dt.timedelta(seconds=(i1 - 1) * wf_inc)
    return seg_start, seg_end


def _make_time_axis(ax):
    """Configure x-axis to show time labels (HH:MM:SS)."""
    locator = mdates.AutoDateLocator()
    ax.xaxis.set_major_locator(locator)
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M:%S'))
    for lab in ax.get_xticklabels():
        lab.set_rotation(15)
        lab.set_horizontalalignment('right')


# MAX_PLOT_POINTS is defined in the notebook globals; keep using it as default
def build_time_index(seg_start_dt, n, wf_inc):
    """Build a pandas DatetimeIndex starting at seg_start_dt with n samples spaced by wf_inc seconds."""
    return pd.date_range(start=seg_start_dt, periods=n, freq=pd.to_timedelta(wf_inc, unit="s"))


def thin_for_plot(y, max_points=MAX_PLOT_POINTS):
    """Return (indices, y_thinned) suitable for plotting when y is large."""
    n = len(y)
    if n <= max_points:
        return np.arange(n), y
    step = int(np.ceil(n / max_points))
    idx = np.arange(0, n, step, dtype=int)
    return idx, y[idx]


def plot_seizure_abs(
    out_png, y, fs, wf_inc, tdms_start_naive,
    seg_i0, seg_i1, st_idx, et_idx, title_prefix
):
    y = np.asarray(y)
    n = seg_i1 - seg_i0
    if n <= 0:
        return

    seg_start = tdms_start_naive + dt.timedelta(seconds=seg_i0 * wf_inc)
    seg_end = tdms_start_naive + dt.timedelta(seconds=(seg_i1 - 1) * wf_inc)
    event_start = tdms_start_naive + dt.timedelta(seconds=st_idx * wf_inc)
    event_end = tdms_start_naive + dt.timedelta(seconds=et_idx * wf_inc)

    plt.figure(figsize=(14, 4))
    ax = plt.gca()

    full_times = pd.date_range(
        start=seg_start,
        periods=n,
        freq=pd.to_timedelta(wf_inc, unit="s"),
    )

    idx, y_plot = thin_for_plot(y, max_points=MAX_PLOT_POINTS)
    times_plot = full_times[idx]

    ax.plot(times_plot, y_plot, label="ECG")
    ax.axvline(event_start, linestyle="--", label="seizure start")
    ax.axvline(event_end, linestyle="--", label="seizure end")

    _make_time_axis(ax)
    ax.set_xlabel("Time (HH:MM:SS)")
    ax.set_ylabel("Amplitude")
    ax.set_title(
        f"{title_prefix}  |  Window: {seg_start:%Y-%m-%d %H:%M:%S} â†’ {seg_end:%Y-%m-%d %H:%M:%S}  "
        f"(Event: {event_start:%Y-%m-%d %H:%M:%S} â†’ {event_end:%Y-%m-%d %H:%M:%S})"
    )
    ax.legend()
    plt.tight_layout()
    plt.savefig(out_png, dpi=150)
    plt.close()


def plot_raw_abs(
    out_png, y, fs, wf_inc, tdms_start_naive,
    seg_i0, seg_i1, title_prefix
):
    n = seg_i1 - seg_i0
    if n <= 0:
        return

    seg_start, seg_end = _seg_times(tdms_start_naive, seg_i0, seg_i1, wf_inc)

    plt.figure(figsize=(14, 4))
    ax = plt.gca()

    full_times = pd.date_range(start=seg_start, periods=n, freq=pd.to_timedelta(wf_inc, unit="s"))
    idx, y_plot = thin_for_plot(np.asarray(y), max_points=MAX_PLOT_POINTS)
    times_plot = full_times[idx]

    ax.plot(times_plot, y_plot)
    _make_time_axis(ax)
    ax.set_xlabel("Time (HH:MM:SS)")
    ax.set_ylabel("Amplitude")
    ax.set_title(
        f"{title_prefix}  |  Window: {seg_start:%Y-%m-%d %H:%M:%S} â†’ {seg_end:%Y-%m-%d %H:%M:%S}"
    )
    plt.tight_layout()
    plt.savefig(out_png, dpi=150)
    plt.close()


def discover_all_tdms(base_dir: str):
    """Yield (relative_path, absolute_path) for all .tdms files in Patients ePatch data (recursive)."""
    entries = sorted(os.listdir(base_dir), key=str.lower)
    for entry in entries:
        pdir = os.path.join(base_dir, entry)
        if not os.path.isdir(pdir):
            continue
        for root, _, files in os.walk(pdir):
            for f in files:
                fl = f.lower()
                if not fl.endswith(".tdms"):
                    continue
                if fl.endswith(".tdms_index"):  # safety: skip index files
                    continue
                abs_p = os.path.join(root, f)
                rel_p = os.path.relpath(abs_p, base_dir)
                yield rel_p, abs_p

def build_time_index(seg_start_dt, n, wf_inc):
    # Effective time axis: DatetimeIndex in 64-bit
    return pd.date_range(start=seg_start_dt, periods=n, freq=pd.to_timedelta(wf_inc, unit="s"))

def thin_for_plot(y, max_points=200_000):
    """Return (idx, y_thin) where idx is a slice/array to pick y down to <= max_points."""
    n = len(y)
    if n <= max_points:
        return slice(None), y
    step = int(np.ceil(n / max_points))
    idx = np.arange(0, n, step, dtype=int)
    return idx, y[idx]

def print_mem(tag=""):
    """Print current RAM usage (GB) for the Python process."""
    mem_gb = psutil.Process().memory_info().rss / (1024 ** 3)
    print(f"[DEBUG] {tag} RAM-forbrug: {mem_gb:.2f} GB")

def split_rel_tdms_path(rel_tdms: str):
    """
    Examples:
      "Patient 8/recording 1/Patient 8_1.tdms"
         -> patient="Patient 8", enrollment=None, recording="recording 1"

      "Patient 8/enrollment A/recording 2/Patient 8_2.tdms"
         -> patient="Patient 8", enrollment="enrollment A", recording="recording 2"
    """
    rel_dir = os.path.dirname(rel_tdms)         # drop the .tdms filename itself
    parts = rel_dir.split(os.sep)

    patient = None
    enrollment = None
    recording = None

    if len(parts) == 1:
        # "Patient 8"
        patient = parts[0]
    elif len(parts) == 2:
        # "Patient 8/recording 1" OR "Patient 8/enrollment A"
        patient = parts[0]
        # guess based on name
        if parts[1].lower().startswith("enrollment"):
            enrollment = parts[1]
        else:
            recording = parts[1]
    elif len(parts) >= 3:
        # "Patient 8/enrollment A/recording 1"
        patient = parts[0]
        # the next two can be enrollment + recording
        if parts[1].lower().startswith("enrollment"):
            enrollment = parts[1]
            recording = parts[2]
        else:
            # fallback: patient / <something> / recording
            recording = parts[2]
            enrollment = parts[1]
    return patient, enrollment, recording

def process_tdms_recording(rel_tdms, abs_tdms, patient_id, enrollment_name, events_all):
    """
    Process a single TDMS recording for a patient/enrollment.
    """
    patient_folder, enrollment_folder, recording_folder = split_rel_tdms_path(rel_tdms)

    print(f"\n== TDMS == {rel_tdms}  (patient={patient_id})")
    try:
        x, fs, start_local, meta = tdms_load_ecg(abs_tdms)
    except Exception as e:
        print(f"[FEJL] TDMS indlÃ¦sning: {e}")
        return

    if start_local is None:
        print("[FEJL] TDMS starttid kunne ikke fortolkes â€“ springer denne recording.")
        return

    wf_inc = 1.0 / fs
    events_in_span = filter_events_for_tdms_span(events_all, start_local, len(x), wf_inc, pad_hours=12, debug_label=rel_tdms)
    if not events_in_span:
        print(f"[INFO] {len(events_all)} annotering(er) fundet, men ingen overlapper denne recording.")
        return

    print(f"[OK] {len(events_in_span)}/{len(events_all)} annotering(er) overlapper {recording_folder or 'optagelsen'}.")

    # build output dirs
    patient_out_dir = os.path.join(OUTPUT_ROOT, patient_folder)
    ensure_dir(patient_out_dir)

    if enrollment_name:
        enrollment_out_dir = os.path.join(patient_out_dir, enrollment_name)
        ensure_dir(enrollment_out_dir)
        base_out_dir = enrollment_out_dir
    else:
        base_out_dir = patient_out_dir

    if recording_folder:
        out_dir = os.path.join(base_out_dir, recording_folder)
    else:
        out_dir = base_out_dir
    ensure_dir(out_dir)

    with open(os.path.join(out_dir, "meta.json"), "w", encoding="utf-8") as f:
        json.dump(meta, f, indent=2, ensure_ascii=False)

    # ðŸ”¹ loop events (you can reuse your existing event-loop here unchanged)
    for k, (st_local, et_local) in enumerate(events_in_span, start=1):
        base = f"event{k:02d}"

        # ---- indices for the event itself ----
        st_idx = time_to_index(st_local, start_local, wf_inc, len(x))
        et_idx = time_to_index(et_local, start_local, wf_inc, len(x))
        if et_idx < st_idx:
            st_idx, et_idx = et_idx, st_idx

        # =======================================================
        # A) CONTEXT window (Â± PAD_MIN) â€“ your current window
        # =======================================================
        sz_i0, sz_i1 = slice_window(len(x), fs, st_idx, et_idx, PAD_MIN)

        if sz_i1 <= sz_i0:
            print(f"[INFO] Tomt seizure-vindue for {base} â€“ springer.")
            continue

        y_ctx = x[sz_i0:sz_i1]

        ok, rep = sanity_check_and_log(
            out_dir,
            base,
            start_local,
            wf_inc,
            st_idx,
            et_idx,
            sz_i0,
            sz_i1,
        )
        print(
            f"[SANITY] {base}: ok={ok} within={rep['within_segment']} "
            f"err_start={rep['error_start_samples']:.2f} samp, "
            f"err_end={rep['error_end_samples']:.2f} samp"
        )

        # plot + csv context
        plot_seizure_abs(
            os.path.join(out_dir, f"{base}_seizure_ctx.png"),
            y=y_ctx,
            fs=fs,
            wf_inc=wf_inc,
            tdms_start_naive=start_local,
            seg_i0=sz_i0,
            seg_i1=sz_i1,
            st_idx=st_idx,
            et_idx=et_idx,
            title_prefix=f"Seizure kontekst (Â±{PAD_MIN} min)",
        )
        t_rel_ctx = (np.arange(sz_i0, sz_i1) - sz_i0) / fs
        save_csv(os.path.join(out_dir, f"{base}_seizure_ctx.csv"), t_rel_ctx, y_ctx)

        # =======================================================
        # B) SEIZURE-ONLY (the seizure itself)
        # =======================================================
        so_i0 = max(0, st_idx)
        so_i1 = min(len(x), et_idx)
        if so_i1 > so_i0:
            y_so = x[so_i0:so_i1]
            plot_seizure_abs(
                os.path.join(out_dir, f"{base}_seizure_only.png"),
                y=y_so,
                fs=fs,
                wf_inc=wf_inc,
                tdms_start_naive=start_local,
                seg_i0=so_i0,
                seg_i1=so_i1,
                st_idx=st_idx,
                et_idx=et_idx,
                title_prefix="Seizure (kun anfald)",
            )
            t_rel_so = (np.arange(so_i0, so_i1) - so_i0) / fs
            save_csv(os.path.join(out_dir, f"{base}_seizure_only.csv"), t_rel_so, y_so)

        # =======================================================
        # C) NON-SEIZURE (same length, 20 min before)
        # =======================================================
        ns_anchor_time = st_local - dt.timedelta(minutes=(NONSEIZURE_OFFSET_MIN + PAD_MIN))
        ns_i0 = time_to_index(ns_anchor_time, start_local, wf_inc, len(x))
        ns_i1 = ns_i0 + (sz_i1 - sz_i0)
        ns_i0 = max(0, ns_i0)
        ns_i1 = min(len(x), ns_i1)

        if ns_i1 > ns_i0:
            y_ns = x[ns_i0:ns_i1]
            plot_raw_abs(
                os.path.join(out_dir, f"{base}_nonseizure.png"),
                y=y_ns,
                fs=fs,
                wf_inc=wf_inc,
                tdms_start_naive=start_local,
                seg_i0=ns_i0,
                seg_i1=ns_i1,
                title_prefix=f"Non-seizure (20 min fÃ¸r; lÃ¦ngde {(ns_i1-ns_i0)/fs:.1f}s)",
            )
            t_rel_ns = (np.arange(ns_i0, ns_i1) - ns_i0) / fs
            save_csv(os.path.join(out_dir, f"{base}_nonseizure.csv"), t_rel_ns, y_ns)
        else:
            print(f"[INFO] Non-seizure udenfor filgrÃ¦nser for {base} â€“ springer.")

        plt.close("all")

        try:
            del y_sz, y_so, y_ns
        except NameError:
            pass
        gc.collect()
        print_mem(f"Efter event {k}")
    del x
    gc.collect()

def debug_list_annotation_candidates(patient_id, base_annotation_dir):
    """Print which files in the annotation folder could belong to the patient."""
    pid_str = str(patient_id)
    patterns = [
        f"**/patient {pid_str}.*",
        f"**/patient_{pid_str}.*",
        f"**/patient{pid_str}.*",
        f"**/pt {pid_str}.*",
        f"**/*{pid_str}*.xls",
        f"**/*{pid_str}*.xlsx",
    ]
    found = []
    for pat in patterns:
        found += glob.glob(os.path.join(base_annotation_dir, pat), recursive=True)

    if not found:
        print(f"[DEBUG] No potential annotation files found for patient {patient_id}")
    else:
        print(f"[DEBUG] Potential annotation files for patient {patient_id}:")
        for fpath in sorted(set(found)):
            print("   ", fpath)


# --------------------------
# Run for one patient
# --------------------------
def filter_events_for_tdms_span(events, tdms_start_naive, n_samples, wf_inc, pad_hours=12, debug_label=None):
    tdms_end = tdms_start_naive + dt.timedelta(seconds=n_samples * wf_inc)
    lo = tdms_start_naive - dt.timedelta(hours=pad_hours)
    hi = tdms_end + dt.timedelta(hours=pad_hours)

    kept = []
    for (st, et) in events:
        # st and et are already naive local (we created to_naive_local)
        if st <= hi and et >= lo:
            kept.append((st, et))

    if debug_label:
        print(f"[DEBUG] TDMS span {debug_label}: {tdms_start_naive} â†’ {tdms_end} (lo={lo}, hi={hi})")
        print(f"[DEBUG] Events (raw):")
        for i, (st, et) in enumerate(events, start=1):
            print(f"    evt{i}: {st} â†’ {et}")

        print(f"[DEBUG] -> {len(kept)}/{len(events)} events overlap the TDMS")

    return kept
    # return [(st, et) for st, et in events if (st <= hi and et >= lo)]


def run_for_patient(patient_selector=None, patient_id=None):
    """
    Process ALL TDMS files for one patient.
    - read annotations once from BASE_ANNOTATION_DIR (patient-level)
    - apply them to all the patient's recordings
    - always save under OUTPUT_ROOT/<Patient X>/[recording y]
    """
    # 1) find all TDMS for the selected patient
    tdms_list = list(
        discover_tdms_for_patient(
            BASE_PATIENTS_DIR,
            patient_selector=patient_selector,
            patient_id=patient_id,
        )
    )
    if not tdms_list:
        print("[INFO] No TDMS files found for the selected patient.")
        return

    # 2) try to infer patient-id from the first path
    p_id, rec_id = extract_ids_from_relpath(tdms_list[0][0])
    if p_id is None and patient_id is not None:
        p_id = patient_id

    # find all enrollments for this patient
    enrollments = sorted({
        split_rel_tdms_path(rel_tdms)[1]  # enrollment_name
        for rel_tdms, _ in tdms_list
        if split_rel_tdms_path(rel_tdms)[1] is not None
    })

    # If no enrollments, create a list with a single None so we still run once
    if not enrollments:
        enrollments = [None]

    # Run ALL enrollments for this patient
    for enrollment_name in enrollments:
        print(f"\n[INFO] Processing {patient_selector or f'Patient {p_id}'} | Enrollment: {enrollment_name or '(none)'}")

        # find TDMS that belong to this enrollment
        tdms_for_enrollment = [
            (rel, abs_)
            for rel, abs_ in tdms_list
            if split_rel_tdms_path(rel)[1] == enrollment_name
        ]

        # Load annotations for this enrollment (a, b, c etc.)
        events_all, ann_src = load_events_for_patient_and_enrollment(
            p_id, enrollment_name, BASE_ANNOTATION_DIR
        )

        debug_list_annotation_candidates(p_id, BASE_ANNOTATION_DIR)

        if not events_all:
            print(f"[INFO] No annotations found for {patient_selector} / {enrollment_name} â€” skipping.")
            continue

        print(f"[OK] Found {len(events_all)} annotation(s) in: {ann_src}")

        print_mem("Before patient loop")
        # Run the TDMS files matching this enrollment
        for rel_tdms, abs_tdms in tdms_for_enrollment:
            process_tdms_recording(
                rel_tdms=rel_tdms,
                abs_tdms=abs_tdms,
                patient_id=p_id,
                enrollment_name=enrollment_name,
                events_all=events_all
            )

    print_mem(f"After cleanup recording {rec_id}")


In [22]:
# For individual run and testing
if __name__ == "__main__":
    run_for_patient(patient_selector="Patient 38")


[INFO] Processing Patient 38 | Enrollment: enrollment a
[DEBUG] Potential annotation files for patient 38:
    E:\ML algoritme tl anfaldsdetektion vha HRV\ePatch data from Aarhus to Lausanne\Seizure log ePatch patients with seizures - excluded seizures removed\Patient 38a.xls
    E:\ML algoritme tl anfaldsdetektion vha HRV\ePatch data from Aarhus to Lausanne\Seizure log ePatch patients with seizures - excluded seizures removed\Patient 38b.xls
[INFO] No annotations found for Patient 38 / enrollment a â€” skipping.

[INFO] Processing Patient 38 | Enrollment: enrollment b
[DEBUG] Potential annotation files for patient 38:
    E:\ML algoritme tl anfaldsdetektion vha HRV\ePatch data from Aarhus to Lausanne\Seizure log ePatch patients with seizures - excluded seizures removed\Patient 38a.xls
    E:\ML algoritme tl anfaldsdetektion vha HRV\ePatch data from Aarhus to Lausanne\Seizure log ePatch patients with seizures - excluded seizures removed\Patient 38b.xls
[OK] Found 2 annotation(s) in: E

  res['date'] = pd.to_datetime(df[date_col], errors='coerce', dayfirst=True).dt.normalize()


[DEBUG] TDMS span Patient 38\enrollment b\recording 1\Patient 38b_1.tdms: 2018-06-18 14:30:46 â†’ 2018-06-19 11:18:36.500000 (lo=2018-06-18 02:30:46, hi=2018-06-19 23:18:36.500000)
[DEBUG] Events (raw):
    evt1: 2018-06-18 17:11:26 â†’ 2018-06-18 17:11:49
    evt2: 2018-06-19 11:12:10 â†’ 2018-06-19 11:12:37
[DEBUG] -> 2/2 events overlap the TDMS
[OK] 2/2 annotering(er) overlapper recording 1.
[SANITY] event01: ok=True within=True err_start=0.00 samp, err_end=0.00 samp
[DEBUG] Efter event 1 RAM-forbrug: 0.48 GB
[SANITY] event02: ok=True within=True err_start=0.00 samp, err_end=0.00 samp
[DEBUG] Efter event 2 RAM-forbrug: 0.53 GB
[DEBUG] After cleanup recording 1 RAM-forbrug: 0.24 GB


In [10]:
# Separating responders and non-responders as Jeppesen et al. describes.
patients_responders_numbers = [3,5,6,8,10,14,15,16,21,23,27,28,29,31,34,37,39,40,41,42]
patients_non_responders_numbers = [1,2,4,7,9,11,12,13,17,18,19,20,22,24,25,26,30,32,33,35,36,38,43]
patients_responders = [f"Patient {num}" for num in patients_responders_numbers]
patients_non_responders = [f"Patient {num}" for num in patients_non_responders_numbers]
print("Responders:", patients_responders)
print("Non-Responders:", patients_non_responders)

Responders: ['Patient 3', 'Patient 5', 'Patient 6', 'Patient 8', 'Patient 10', 'Patient 14', 'Patient 15', 'Patient 16', 'Patient 21', 'Patient 23', 'Patient 27', 'Patient 28', 'Patient 29', 'Patient 31', 'Patient 34', 'Patient 37', 'Patient 39', 'Patient 40', 'Patient 41', 'Patient 42']
Non-Responders: ['Patient 1', 'Patient 2', 'Patient 4', 'Patient 7', 'Patient 9', 'Patient 11', 'Patient 12', 'Patient 13', 'Patient 17', 'Patient 18', 'Patient 19', 'Patient 20', 'Patient 22', 'Patient 24', 'Patient 25', 'Patient 26', 'Patient 30', 'Patient 32', 'Patient 33', 'Patient 35', 'Patient 36', 'Patient 38', 'Patient 43']


In [11]:
# for i in patients_responders:
#     run_for_patient(i)

# print(len(patients_responders))

for i in patients_non_responders:
    run_for_patient(i)


[INFO] Processing Patient 1 | Enrollment: (none)
[DEBUG] Potential annotation files for patient 1:
    E:\ML algoritme tl anfaldsdetektion vha HRV\ePatch data from Aarhus to Lausanne\Seizure log ePatch patients with seizures - excluded seizures removed\Patient 1.xls
    E:\ML algoritme tl anfaldsdetektion vha HRV\ePatch data from Aarhus to Lausanne\Seizure log ePatch patients with seizures - excluded seizures removed\Patient 10.xls
    E:\ML algoritme tl anfaldsdetektion vha HRV\ePatch data from Aarhus to Lausanne\Seizure log ePatch patients with seizures - excluded seizures removed\Patient 11.xls
    E:\ML algoritme tl anfaldsdetektion vha HRV\ePatch data from Aarhus to Lausanne\Seizure log ePatch patients with seizures - excluded seizures removed\Patient 12.xls
    E:\ML algoritme tl anfaldsdetektion vha HRV\ePatch data from Aarhus to Lausanne\Seizure log ePatch patients with seizures - excluded seizures removed\Patient 13.xls
    E:\ML algoritme tl anfaldsdetektion vha HRV\ePatch d

  res['date'] = pd.to_datetime(df[date_col], errors='coerce', dayfirst=True).dt.normalize()


[DEBUG] TDMS span Patient 1\recording 1\Patient 1_1.tdms: 2016-02-22 11:04:14 â†’ 2016-02-24 16:09:49.750000 (lo=2016-02-21 23:04:14, hi=2016-02-25 04:09:49.750000)
[DEBUG] Events (raw):
    evt1: 2016-02-23 23:14:01 â†’ 2016-02-23 23:14:42
    evt2: 2016-02-25 05:20:48 â†’ 2016-02-25 05:22:20
    evt3: 2016-02-26 05:30:53 â†’ 2016-02-26 05:31:34
[DEBUG] -> 1/3 events overlap the TDMS
[OK] 1/3 annotering(er) overlapper recording 1.
[SANITY] event01: ok=True within=True err_start=0.00 samp, err_end=0.00 samp




[DEBUG] Efter event 1 RAM-forbrug: 0.97 GB

== TDMS == Patient 1\recording 2\Patient 1_2.tdms  (patient=1)
[FEJL] TDMS indlÃ¦sning: No channels found in the TDMS file.
[DEBUG] After cleanup recording 1 RAM-forbrug: 0.24 GB

[INFO] Processing Patient 2 | Enrollment: (none)


  res['date'] = pd.to_datetime(df[date_col], errors='coerce', dayfirst=True).dt.normalize()


[DEBUG] Potential annotation files for patient 2:
    E:\ML algoritme tl anfaldsdetektion vha HRV\ePatch data from Aarhus to Lausanne\Seizure log ePatch patients with seizures - excluded seizures removed\Patient 12.xls
    E:\ML algoritme tl anfaldsdetektion vha HRV\ePatch data from Aarhus to Lausanne\Seizure log ePatch patients with seizures - excluded seizures removed\Patient 2.xls
    E:\ML algoritme tl anfaldsdetektion vha HRV\ePatch data from Aarhus to Lausanne\Seizure log ePatch patients with seizures - excluded seizures removed\Patient 20.xlsx
    E:\ML algoritme tl anfaldsdetektion vha HRV\ePatch data from Aarhus to Lausanne\Seizure log ePatch patients with seizures - excluded seizures removed\Patient 21.xlsx
    E:\ML algoritme tl anfaldsdetektion vha HRV\ePatch data from Aarhus to Lausanne\Seizure log ePatch patients with seizures - excluded seizures removed\Patient 22.xls
    E:\ML algoritme tl anfaldsdetektion vha HRV\ePatch data from Aarhus to Lausanne\Seizure log ePatch p

  res['date'] = pd.to_datetime(df[date_col], errors='coerce', dayfirst=True).dt.normalize()


[DEBUG] TDMS span Patient 7\recording 1\Patient 7_1.tdms: 2017-02-06 12:26:21 â†’ 2017-02-08 14:00:58.500000 (lo=2017-02-06 00:26:21, hi=2017-02-09 02:00:58.500000)
[DEBUG] Events (raw):
    evt1: 2017-02-07 09:42:22 â†’ 2017-02-07 09:43:11
    evt2: 2017-02-07 11:20:41 â†’ 2017-02-07 11:21:34
    evt3: 2017-02-07 12:50:27 â†’ 2017-02-07 12:51:15
    evt4: 2017-02-09 15:02:12 â†’ 2017-02-09 15:03:11
[DEBUG] -> 3/4 events overlap the TDMS
[OK] 3/4 annotering(er) overlapper recording 1.
[SANITY] event01: ok=True within=True err_start=0.00 samp, err_end=0.00 samp
[DEBUG] Efter event 1 RAM-forbrug: 1.36 GB
[SANITY] event02: ok=True within=True err_start=0.00 samp, err_end=0.00 samp
[DEBUG] Efter event 2 RAM-forbrug: 1.41 GB
[SANITY] event03: ok=True within=True err_start=0.00 samp, err_end=0.00 samp
[DEBUG] Efter event 3 RAM-forbrug: 1.45 GB

== TDMS == Patient 7\recording 2\Patient 7_2.tdms  (patient=7)
[DEBUG] TDMS span Patient 7\recording 2\Patient 7_2.tdms: 2017-02-08 14:02:18 â†’ 2017

  res['date'] = pd.to_datetime(df[date_col], errors='coerce', dayfirst=True).dt.normalize()


[DEBUG] TDMS span Patient 9\recording 1\Patient 9_1.tdms: 2017-06-13 11:03:27 â†’ 2017-06-15 14:17:29 (lo=2017-06-12 23:03:27, hi=2017-06-16 02:17:29)
[DEBUG] Events (raw):
    evt1: 2017-06-13 17:42:04 â†’ 2017-06-13 17:46:01
    evt2: 2017-06-13 18:01:33 â†’ 2017-06-13 18:04:53
    evt3: 2017-06-13 22:33:49 â†’ 2017-06-13 22:38:41
[DEBUG] -> 3/3 events overlap the TDMS
[OK] 3/3 annotering(er) overlapper recording 1.
[SANITY] event01: ok=True within=True err_start=0.00 samp, err_end=0.00 samp
[DEBUG] Efter event 1 RAM-forbrug: 1.57 GB
[SANITY] event02: ok=True within=True err_start=0.00 samp, err_end=0.00 samp
[DEBUG] Efter event 2 RAM-forbrug: 1.61 GB
[SANITY] event03: ok=True within=True err_start=0.00 samp, err_end=0.00 samp
[DEBUG] Efter event 3 RAM-forbrug: 1.66 GB

== TDMS == Patient 9\recording 2\Patient 9_2.tdms  (patient=9)
[DEBUG] TDMS span Patient 9\recording 2\Patient 9_2.tdms: 2017-06-15 14:19:43 â†’ 2017-06-16 07:20:24.750000 (lo=2017-06-15 02:19:43, hi=2017-06-16 19:20:

  res['date'] = pd.to_datetime(df[date_col], errors='coerce', dayfirst=True).dt.normalize()


[DEBUG] TDMS span Patient 11\recording 1\Patient 11_1.tdms: 2017-08-14 14:11:21 â†’ 2017-08-17 13:56:05.500000 (lo=2017-08-14 02:11:21, hi=2017-08-18 01:56:05.500000)
[DEBUG] Events (raw):
    evt1: 2017-08-17 11:46:25 â†’ 2017-08-17 11:47:29
    evt2: 2017-08-17 13:42:18 â†’ 2017-08-17 13:42:39
    evt3: 2017-08-17 17:58:24 â†’ 2017-08-17 17:59:03
    evt4: 2017-08-17 18:33:42 â†’ 2017-08-17 18:34:17
    evt5: 2017-08-17 19:19:31 â†’ 2017-08-17 19:20:06
    evt6: 2017-08-17 20:48:17 â†’ 2017-08-17 20:48:40
[DEBUG] -> 6/6 events overlap the TDMS
[OK] 6/6 annotering(er) overlapper recording 1.
[SANITY] event01: ok=True within=True err_start=0.00 samp, err_end=0.00 samp
[DEBUG] Efter event 1 RAM-forbrug: 1.97 GB
[SANITY] event02: ok=True within=True err_start=0.00 samp, err_end=0.00 samp
[DEBUG] Efter event 2 RAM-forbrug: 2.01 GB
[SANITY] event03: ok=True within=True err_start=0.00 samp, err_end=0.00 samp
[DEBUG] Efter event 3 RAM-forbrug: 2.02 GB
[SANITY] event04: ok=True within=True er

  res['date'] = pd.to_datetime(df[date_col], errors='coerce', dayfirst=True).dt.normalize()


[DEBUG] TDMS span Patient 12\recording 1\Patient 12_1.tdms: 2017-10-02 13:36:06 â†’ 2017-10-04 13:34:50.500000 (lo=2017-10-02 01:36:06, hi=2017-10-05 01:34:50.500000)
[DEBUG] Events (raw):
    evt1: 2007-10-04 11:47:56 â†’ 2007-10-04 11:50:16
    evt2: 2007-10-04 14:57:59 â†’ 2007-10-04 15:00:08
[DEBUG] -> 0/2 events overlap the TDMS
[INFO] 2 annotering(er) fundet, men ingen overlapper denne recording.

== TDMS == Patient 12\recording 2\Patient 12_2.tdms  (patient=12)
[DEBUG] TDMS span Patient 12\recording 2\Patient 12_2.tdms: 2017-10-04 13:35:50 â†’ 2017-10-06 07:59:30.001953 (lo=2017-10-04 01:35:50, hi=2017-10-06 19:59:30.001953)
[DEBUG] Events (raw):
    evt1: 2007-10-04 11:47:56 â†’ 2007-10-04 11:50:16
    evt2: 2007-10-04 14:57:59 â†’ 2007-10-04 15:00:08
[DEBUG] -> 0/2 events overlap the TDMS
[INFO] 2 annotering(er) fundet, men ingen overlapper denne recording.
[DEBUG] After cleanup recording 1 RAM-forbrug: 1.24 GB

[INFO] Processing Patient 13 | Enrollment: (none)
[DEBUG] Potenti

  res['date'] = pd.to_datetime(df[date_col], errors='coerce', dayfirst=True).dt.normalize()


[DEBUG] TDMS span Patient 13\recording 1\Patient 13_1.tdms: 2017-10-30 14:01:40 â†’ 2017-11-01 15:42:18.250000 (lo=2017-10-30 02:01:40, hi=2017-11-02 03:42:18.250000)
[DEBUG] Events (raw):
    evt1: 2017-10-30 23:36:34 â†’ 2017-10-30 23:37:51
    evt2: 2017-11-02 01:28:37 â†’ 2017-11-02 01:29:04
    evt3: 2017-11-02 03:20:37 â†’ 2017-11-02 03:22:00
    evt4: 2017-11-02 04:45:29 â†’ 2017-11-02 04:46:28
    evt5: 2017-11-02 07:18:49 â†’ 2017-11-02 07:19:24
    evt6: 2017-11-02 09:02:11 â†’ 2017-11-02 09:03:04
    evt7: 2017-11-02 10:19:12 â†’ 2017-11-02 10:19:51
    evt8: 2017-11-02 13:12:29 â†’ 2017-11-02 13:12:50
[DEBUG] -> 3/8 events overlap the TDMS
[OK] 3/8 annotering(er) overlapper recording 1.
[SANITY] event01: ok=True within=True err_start=0.00 samp, err_end=0.00 samp
[DEBUG] Efter event 1 RAM-forbrug: 1.98 GB
[SANITY] event02: ok=True within=True err_start=0.00 samp, err_end=0.00 samp
[DEBUG] Efter event 2 RAM-forbrug: 1.98 GB
[SANITY] event03: ok=True within=True err_start=0.00

  res['date'] = pd.to_datetime(df[date_col], errors='coerce', dayfirst=True).dt.normalize()


[DEBUG] TDMS span Patient 18\recording 1\Patient 18_1.tdms: 2016-10-03 11:45:47 â†’ 2016-10-05 09:24:37.750000 (lo=2016-10-02 23:45:47, hi=2016-10-05 21:24:37.750000)
[DEBUG] Events (raw):
    evt1: 2016-10-04 14:44:34 â†’ 2016-10-04 14:46:06
    evt2: 2016-10-04 19:35:47 â†’ 2016-10-04 19:45:18
[DEBUG] -> 2/2 events overlap the TDMS
[OK] 2/2 annotering(er) overlapper recording 1.
[SANITY] event01: ok=True within=True err_start=0.00 samp, err_end=0.00 samp
[DEBUG] Efter event 1 RAM-forbrug: 2.28 GB
[SANITY] event02: ok=True within=True err_start=0.00 samp, err_end=0.00 samp
[DEBUG] Efter event 2 RAM-forbrug: 2.34 GB
[DEBUG] After cleanup recording 1 RAM-forbrug: 1.71 GB

[INFO] Processing Patient 19 | Enrollment: (none)
[DEBUG] Potential annotation files for patient 19:
    E:\ML algoritme tl anfaldsdetektion vha HRV\ePatch data from Aarhus to Lausanne\Seizure log ePatch patients with seizures - excluded seizures removed\Patient 19.xlsx
[OK] Found 3 annotation(s) in: E:\ML algoritme tl

  res['date'] = pd.to_datetime(df[date_col], errors='coerce', dayfirst=True).dt.normalize()


[DEBUG] TDMS span Patient 19\recording 1\Patient 19_1.tdms: 2016-11-15 09:50:01 â†’ 2016-11-17 09:51:09.250000 (lo=2016-11-14 21:50:01, hi=2016-11-17 21:51:09.250000)
[DEBUG] Events (raw):
    evt1: 2016-11-15 14:49:42 â†’ 2016-11-15 14:49:54
    evt2: 2016-11-16 20:55:37 â†’ 2016-11-16 20:56:01
    evt3: 2016-11-17 03:14:11 â†’ 2016-11-17 03:14:20
[DEBUG] -> 3/3 events overlap the TDMS
[OK] 3/3 annotering(er) overlapper recording 1.
[SANITY] event01: ok=True within=True err_start=0.00 samp, err_end=0.00 samp
[DEBUG] Efter event 1 RAM-forbrug: 2.40 GB
[SANITY] event02: ok=True within=True err_start=0.00 samp, err_end=0.00 samp
[DEBUG] Efter event 2 RAM-forbrug: 2.43 GB
[SANITY] event03: ok=True within=True err_start=0.00 samp, err_end=0.00 samp
[DEBUG] Efter event 3 RAM-forbrug: 2.48 GB
[DEBUG] After cleanup recording 1 RAM-forbrug: 1.82 GB

[INFO] Processing Patient 20 | Enrollment: (none)
[DEBUG] Potential annotation files for patient 20:
    E:\ML algoritme tl anfaldsdetektion vha H

  res['date'] = pd.to_datetime(df[date_col], errors='coerce', dayfirst=True).dt.normalize()


[DEBUG] TDMS span Patient 20\recording 1\Patient 20_1.tdms: 2017-03-20 13:12:03 â†’ 2017-03-22 15:19:39 (lo=2017-03-20 01:12:03, hi=2017-03-23 03:19:39)
[DEBUG] Events (raw):
    evt1: 2017-03-21 06:18:12 â†’ 2017-03-21 06:19:10
    evt2: 2017-03-22 03:18:53 â†’ 2017-03-22 03:19:25
    evt3: 2017-03-22 06:39:15 â†’ 2017-03-22 06:40:55
    evt4: 2017-03-23 01:45:54 â†’ 2017-03-23 01:47:08
    evt5: 2017-03-23 04:21:54 â†’ 2017-03-23 04:22:51
[DEBUG] -> 4/5 events overlap the TDMS
[OK] 4/5 annotering(er) overlapper recording 1.
[SANITY] event01: ok=True within=True err_start=0.00 samp, err_end=0.00 samp
[DEBUG] Efter event 1 RAM-forbrug: 2.54 GB
[SANITY] event02: ok=True within=True err_start=0.00 samp, err_end=0.00 samp
[DEBUG] Efter event 2 RAM-forbrug: 2.58 GB
[SANITY] event03: ok=True within=True err_start=0.00 samp, err_end=0.00 samp
[DEBUG] Efter event 3 RAM-forbrug: 2.62 GB
[SANITY] event04: ok=True within=True err_start=0.00 samp, err_end=0.00 samp
[DEBUG] Efter event 4 RAM-forbr

  res['date'] = pd.to_datetime(df[date_col], errors='coerce', dayfirst=True).dt.normalize()


[DEBUG] TDMS span Patient 22\recording 1\Patient 22_1.tdms: 2017-10-09 13:23:52 â†’ 2017-10-11 13:10:21.750000 (lo=2017-10-09 01:23:52, hi=2017-10-12 01:10:21.750000)
[DEBUG] Events (raw):
    evt1: 2017-10-11 13:18:21 â†’ 2017-10-11 13:20:13
    evt2: 2017-10-11 19:02:18 â†’ 2017-10-11 19:09:54
    evt3: 2017-10-12 04:05:42 â†’ 2017-10-12 04:10:05
    evt4: 2017-10-12 14:51:12 â†’ 2017-10-12 14:53:06
[DEBUG] -> 2/4 events overlap the TDMS
[OK] 2/4 annotering(er) overlapper recording 1.
[SANITY] event01: ok=True within=True err_start=0.00 samp, err_end=0.00 samp
[DEBUG] Efter event 1 RAM-forbrug: 2.53 GB
[SANITY] event02: ok=True within=True err_start=0.00 samp, err_end=0.00 samp
[DEBUG] Efter event 2 RAM-forbrug: 2.54 GB

== TDMS == Patient 22\recording 2\Patient 22_2.tdms  (patient=22)
[DEBUG] TDMS span Patient 22\recording 2\Patient 22_2.tdms: 2017-10-11 13:11:18 â†’ 2017-10-13 12:25:55.250000 (lo=2017-10-11 01:11:18, hi=2017-10-14 00:25:55.250000)
[DEBUG] Events (raw):
    evt1: 20

  res['date'] = pd.to_datetime(df[date_col], errors='coerce', dayfirst=True).dt.normalize()


[DEBUG] TDMS span Patient 24\recording 1\Patient 24_1.tdms: 2018-01-29 11:30:46 â†’ 2018-01-31 14:22:12.500000 (lo=2018-01-28 23:30:46, hi=2018-02-01 02:22:12.500000)
[DEBUG] Events (raw):
    evt1: 2018-01-31 04:06:40 â†’ 2018-01-31 04:08:47
    evt2: 2018-01-31 12:50:09 â†’ 2018-01-31 12:53:22
    evt3: 2018-02-01 17:38:00 â†’ 2018-02-01 17:39:34
[DEBUG] -> 2/3 events overlap the TDMS
[OK] 2/3 annotering(er) overlapper recording 1.
[SANITY] event01: ok=True within=True err_start=0.00 samp, err_end=0.00 samp
[DEBUG] Efter event 1 RAM-forbrug: 2.83 GB
[SANITY] event02: ok=True within=True err_start=0.00 samp, err_end=0.00 samp
[DEBUG] Efter event 2 RAM-forbrug: 2.88 GB

== TDMS == Patient 24\recording 2\Patient 24_2.tdms  (patient=24)
[DEBUG] TDMS span Patient 24\recording 2\Patient 24_2.tdms: 2018-01-31 14:22:57 â†’ 2018-02-02 09:10:00.001953 (lo=2018-01-31 02:22:57, hi=2018-02-02 21:10:00.001953)
[DEBUG] Events (raw):
    evt1: 2018-01-31 04:06:40 â†’ 2018-01-31 04:08:47
    evt2: 20

  res['date'] = pd.to_datetime(df[date_col], errors='coerce', dayfirst=True).dt.normalize()


[DEBUG] TDMS span Patient 30\recording 1\Patient 30_1.tdms: 2018-09-04 13:09:13 â†’ 2018-09-06 07:40:44.500000 (lo=2018-09-04 01:09:13, hi=2018-09-06 19:40:44.500000)
[DEBUG] Events (raw):
    evt1: 2018-09-05 03:37:59 â†’ 2018-09-05 03:38:27
    evt2: 2018-09-06 01:44:12 â†’ 2018-09-06 01:44:32
    evt3: 2018-09-06 04:07:42 â†’ 2018-09-06 04:08:02
[DEBUG] -> 3/3 events overlap the TDMS
[OK] 3/3 annotering(er) overlapper recording 1.
[SANITY] event01: ok=True within=True err_start=0.00 samp, err_end=0.00 samp
[DEBUG] Efter event 1 RAM-forbrug: 2.97 GB
[SANITY] event02: ok=True within=True err_start=0.00 samp, err_end=0.00 samp
[DEBUG] Efter event 2 RAM-forbrug: 3.02 GB
[SANITY] event03: ok=True within=True err_start=0.00 samp, err_end=0.00 samp
[DEBUG] Efter event 3 RAM-forbrug: 3.06 GB
[DEBUG] After cleanup recording 1 RAM-forbrug: 2.47 GB

[INFO] Processing Patient 32 | Enrollment: (none)
[DEBUG] Potential annotation files for patient 32:
    E:\ML algoritme tl anfaldsdetektion vha H

  res['date'] = pd.to_datetime(df[date_col], errors='coerce', dayfirst=True).dt.normalize()


[DEBUG] TDMS span Patient 32\recording 1\Patient 32_1.tdms: 2017-05-29 13:58:02 â†’ 2017-05-31 09:47:57 (lo=2017-05-29 01:58:02, hi=2017-05-31 21:47:57)
[DEBUG] Events (raw):
    evt1: 2017-05-30 07:12:36 â†’ 2017-05-30 07:13:38
    evt2: 2017-05-30 15:21:10 â†’ 2017-05-30 15:22:00
    evt3: 2017-05-31 06:45:21 â†’ 2017-05-31 06:45:50
[DEBUG] -> 3/3 events overlap the TDMS
[OK] 3/3 annotering(er) overlapper recording 1.
[SANITY] event01: ok=True within=True err_start=0.00 samp, err_end=0.00 samp
[DEBUG] Efter event 1 RAM-forbrug: 3.12 GB
[SANITY] event02: ok=True within=True err_start=0.00 samp, err_end=0.00 samp
[DEBUG] Efter event 2 RAM-forbrug: 3.16 GB
[SANITY] event03: ok=True within=True err_start=0.00 samp, err_end=0.00 samp
[DEBUG] Efter event 3 RAM-forbrug: 3.20 GB
[DEBUG] After cleanup recording 1 RAM-forbrug: 2.60 GB

[INFO] Processing Patient 33 | Enrollment: (none)
[DEBUG] Potential annotation files for patient 33:
    E:\ML algoritme tl anfaldsdetektion vha HRV\ePatch data

  res['date'] = pd.to_datetime(df[date_col], errors='coerce', dayfirst=True).dt.normalize()


[DEBUG] TDMS span Patient 35\recording 1\Patient 35_1.tdms: 2018-02-12 16:35:14 â†’ 2018-02-13 13:01:09.750000 (lo=2018-02-12 04:35:14, hi=2018-02-14 01:01:09.750000)
[DEBUG] Events (raw):
    evt1: 2018-02-13 02:11:09 â†’ 2018-02-13 02:11:58
    evt2: 2018-02-13 03:11:43 â†’ 2018-02-13 03:12:07
    evt3: 2018-02-13 04:35:32 â†’ 2018-02-13 04:36:07
    evt4: 2018-02-13 06:24:05 â†’ 2018-02-13 06:24:43
    evt5: 2018-02-13 07:31:41 â†’ 2018-02-13 07:32:26
[DEBUG] -> 5/5 events overlap the TDMS
[OK] 5/5 annotering(er) overlapper recording 1.
[SANITY] event01: ok=True within=True err_start=0.00 samp, err_end=0.00 samp
[DEBUG] Efter event 1 RAM-forbrug: 2.92 GB
[SANITY] event02: ok=True within=True err_start=0.00 samp, err_end=0.00 samp
[DEBUG] Efter event 2 RAM-forbrug: 2.95 GB
[SANITY] event03: ok=True within=True err_start=0.00 samp, err_end=0.00 samp
[DEBUG] Efter event 3 RAM-forbrug: 3.00 GB
[SANITY] event04: ok=True within=True err_start=0.00 samp, err_end=0.00 samp
[DEBUG] Efter eve

  res['date'] = pd.to_datetime(df[date_col], errors='coerce', dayfirst=True).dt.normalize()


[DEBUG] TDMS span Patient 36\recording 1\Patient 36_1.tdms: 2018-02-19 14:25:18 â†’ 2018-02-22 08:33:58.750000 (lo=2018-02-19 02:25:18, hi=2018-02-22 20:33:58.750000)
[DEBUG] Events (raw):
    evt1: 2018-02-19 20:38:44 â†’ 2018-02-19 20:40:30
[DEBUG] -> 1/1 events overlap the TDMS
[OK] 1/1 annotering(er) overlapper recording 1.
[SANITY] event01: ok=True within=True err_start=0.00 samp, err_end=0.00 samp
[DEBUG] Efter event 1 RAM-forbrug: 3.75 GB

== TDMS == Patient 36\recording 2\Patient 36_2.tdms  (patient=36)
[DEBUG] TDMS span Patient 36\recording 2\Patient 36_2.tdms: 2018-02-22 08:31:43 â†’ 2018-02-23 08:18:01.500000 (lo=2018-02-21 20:31:43, hi=2018-02-23 20:18:01.500000)
[DEBUG] Events (raw):
    evt1: 2018-02-19 20:38:44 â†’ 2018-02-19 20:40:30
[DEBUG] -> 0/1 events overlap the TDMS
[INFO] 1 annotering(er) fundet, men ingen overlapper denne recording.
[DEBUG] After cleanup recording 1 RAM-forbrug: 2.84 GB

[INFO] Processing Patient 38 | Enrollment: enrollment a
[DEBUG] Potential a

In [None]:
# Missing patients due to excluded seizures from annotations.
missing_patients = ["Patient 6", "Patient 10", "Patient 29"]

for i in missing_patients:
    run_for_patient(i)