In [19]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from nptdms import TdmsFile


ePatch_path_old = r"E:\ML algoritme tl anfaldsdetektion vha HRV\ePatch data from Aarhus to Lausanne\Patients ePatch data"
ePatch_path_new = r"E:\ML algoritme tl anfaldsdetektion vha HRV\ePatch data from Aarhus - Corrected\ePatch data"

In [67]:
import re
from datetime import datetime, timedelta
from pathlib import Path

import numpy as np
import pandas as pd
from nptdms import TdmsFile, TdmsWriter, RootObject, GroupObject, ChannelObject


# ---------- Hjælpefunktioner ----------

def parse_start_from_name(root_name: str) -> datetime | None:
    """
    Forsøger at finde starttid i 'name'-feltet.
    Understøtter fx:
      ..._22_2_2016_kl_110414__24_2_2016_kl_16.tdms
      ..._22_02_2016_kl_110414__24_02_2016_kl_160000.tdms
      ..._22022016_kl_110414__24022016_kl_16.tdms  (går også)
    Returnerer datetime (naiv, lokal) eller None hvis ikke fundet.
    """
    s = root_name

    # 1) D_M_Y + kl_HHMMSS (sekunder valgfrie)
    m = re.search(r'(\d{1,2})_(\d{1,2})_(\d{4})_kl_(\d{1,2})(\d{2})?(\d{2})?', s)
    if m:
        d, M, y = map(int, m.group(1, 2, 3))
        H = int(m.group(4))
        mnt = int(m.group(5)) if m.group(5) else 0
        sec = int(m.group(6)) if m.group(6) else 0
        return datetime(y, M, d, H, mnt, sec)

    # 2) DDMMYYYY + kl_HHMMSS (sekunder valgfrie)
    m = re.search(r'(\d{2})(\d{2})(\d{4})_kl_(\d{1,2})(\d{2})?(\d{2})?', s)
    if m:
        d, M, y = int(m.group(1)), int(m.group(2)), int(m.group(3))
        H = int(m.group(4))
        mnt = int(m.group(5)) if m.group(5) else 0
        sec = int(m.group(6)) if m.group(6) else 0
        return datetime(y, M, d, H, mnt, sec)

    return None


def pick_channel(td: TdmsFile, preferred=("Untitled", "EKG")):
    """
    Vælg kanal:
    - Hvis ('Untitled','EKG') findes, brug den.
    - Ellers første kanal i første gruppe.
    Returnerer (group_name, channel_name, channel_obj)
    """
    try:
        ch = td[preferred[0]][preferred[1]]
        return preferred[0], preferred[1], ch
    except Exception:
        # find første kanal der findes
        for g in td.groups():
            chans = g.channels()
            if chans:
                return g.name, chans[0].name, chans[0]
    raise RuntimeError("Ingen kanaler fundet i TDMS-filen.")


def build_channel_properties(orig_props: dict, new_start_time: datetime, wf_increment: float) -> dict:
    """
    Sæt properties til output-kanalen. Bevar nyttige felter fra originalen,
    men tving ny starttid og increment igennem.
    """
    keep_keys = [
        "unit_string", "wf_xname", "wf_xunit_string",
        "NI_UnitDescription", "NI_ChannelName", "PreFilter", "TransducerType"
    ]
    props = {}

    # Bevar nogle felter hvis de findes
    for k in keep_keys:
        if k in orig_props:
            props[k] = orig_props[k]

    # Sørg for lækre defaults
    props.setdefault("unit_string", orig_props.get("unit_string", "uV"))
    props.setdefault("wf_xname", orig_props.get("wf_xname", "Time"))
    props.setdefault("wf_xunit_string", orig_props.get("wf_xunit_string", "s"))

    # Tving wf_* vi vil styre
    props["wf_increment"] = float(wf_increment)      # sekunder pr. sample
    props["wf_start_time"] = new_start_time          # absolut start
    props["wf_start_offset"] = 0.0                   # vi har trimmet fra starten

    # Kan navngive kanalen pænt
    props["NI_ChannelName"] = "ECG"

    return props


# ---------- Hovedfunktion ----------

def rewrite_tdms(
    src_tdms_path: str | Path,
    dst_dir: str | Path,
    out_group: str = "Recording",
    out_channel: str = "ECG",
    trim_minutes: int = 20,
):
    src_tdms_path = Path(src_tdms_path)
    dst_dir = Path(dst_dir)
    dst_dir.mkdir(parents=True, exist_ok=True)

    # Læs metadata (hurtigt) og åbn så til data
    md = TdmsFile.read_metadata(src_tdms_path)

    # Root props & starttid fra navn
    root_name = md.properties.get("name", src_tdms_path.name)
    parsed_t0 = parse_start_from_name(root_name)

    with TdmsFile.open(src_tdms_path) as td:
        # Vælg kanal
        in_group, in_channel, ch = pick_channel(td)

        # Sampling
        inc = ch.properties.get("wf_increment", None)
        if inc is None or float(inc) <= 0:
            raise ValueError("Kunne ikke finde 'wf_increment' (samplingsinterval) i kanalens properties.")
        fs = 1.0 / float(inc)

        # Original fallback-starttid (hvis navn ikke kan parses)
        t0_meta = ch.properties.get("wf_start_time", None)
        if parsed_t0 is None:
            new_start = t0_meta if t0_meta is not None else datetime(1970, 1, 1)
        else:
            new_start = parsed_t0

        # Trim-parametre
        n = len(ch)
        trim_n = int(round(trim_minutes * 60 * fs))
        if n <= 2 * trim_n:
            raise ValueError(f"Filen er for kort ({n} samples) til at fjerne {trim_minutes} min i begge ender ved fs={fs:.3f} Hz.")

        start_idx = trim_n
        end_idx = n - trim_n  # eksklusiv
        new_len = end_idx - start_idx

        # Ny starttid = parsed_t0 + 20 min
        new_start_adj = new_start + timedelta(minutes=trim_minutes)

        # Lav outputsti
        out_name = src_tdms_path.stem + "_trimmed.tdms"
        dst_path = dst_dir / out_name

        # Forbered properties
        out_root_props = dict(md.properties)  # kopi af root
        # Du kan evt. opdatere 'name' i root props til nyt navn:
        out_root_props["name"] = out_name
        out_root_props["registertxt1"] = "Written by Python (npTDMS)"

        # Bevar originale kanal-properties, men sæt wf_start_time/increment mm.
        chan_props = build_channel_properties(ch.properties, new_start_adj, float(inc))

        # Læs hele signalet (simpelt). Hvis RAM er et problem, skriv i chunks (se kommentar længere nede).
        data_trim = ch[start_idx:end_idx]
        # Sikr en rimelig dtype (bevar original dtype hvis muligt)
        if isinstance(data_trim, np.ndarray):
            out_data = data_trim
        else:
            out_data = np.asarray(data_trim)

        # Skriv ny TDMS
        with TdmsWriter(dst_path) as writer:
            root_obj = RootObject(properties=out_root_props)
            group_obj = GroupObject(out_group, properties={})
            chan_obj = ChannelObject(out_group, out_channel, out_data, properties=chan_props)
            writer.write_segment([root_obj, group_obj, chan_obj])

        print(f"OK → {dst_path}")
        print(f"  Input:  group='{in_group}', channel='{in_channel}', fs={fs:.3f} Hz, n={n}")
        print(f"  Output: group='{out_group}', channel='{out_channel}', n={new_len}")
        print(f"  New wf_start_time: {new_start_adj.isoformat()}")

        # Bonus: skriv et lille summary-CSV (metadata) ved siden af (valgfrit)
        meta = {
            "input_file": src_tdms_path.name,
            "output_file": out_name,
            "fs_Hz": fs,
            "n_in": n,
            "n_out": new_len,
            "trim_minutes_each_end": trim_minutes,
            "new_start_iso": new_start_adj.isoformat(),
            "root_name_parsed": root_name,
        }
        pd.DataFrame([meta]).to_csv(dst_path.with_suffix(".summary.csv"), index=False)


# ---------- Brug: udfyld dine stier herunder ----------

if __name__ == "__main__":
    src = r"E:\ML algoritme tl anfaldsdetektion vha HRV\ePatch data from Aarhus to Lausanne\Patients ePatch data\Patient 1\recording 1\Patient_1_1.tdms"
    dst = r"E:\ML algoritme tl anfaldsdetektion vha HRV\ePatch data from Aarhus - Corrected\ePatch data\Patient 1"
    rewrite_tdms(src, dst, out_group="Recording", out_channel="ECG", trim_minutes=20)


OK → E:\ML algoritme tl anfaldsdetektion vha HRV\ePatch data from Aarhus - Corrected\ePatch data\Patient 1\Patient_1_1_trimmed.tdms
  Input:  group='Untitled', channel='EKG', fs=512.000 Hz, n=97861504
  Output: group='Recording', channel='ECG', n=96632704
  New wf_start_time: 2016-02-22T11:24:14


In [74]:
import os
import re
import traceback
from datetime import datetime, timedelta
from pathlib import Path

import numpy as np
import pandas as pd
from nptdms import TdmsFile, TdmsWriter, RootObject, GroupObject, ChannelObject


# -------- Parse starttid fra root.name --------
def parse_start_from_name(root_name: str) -> datetime | None:
    s = root_name
    m = re.search(r'(\d{1,2})_(\d{1,2})_(\d{4})_kl_(\d{1,2})(\d{2})?(\d{2})?', s)
    if m:
        d, M, y = map(int, m.group(1, 2, 3))
        H = int(m.group(4))
        mnt = int(m.group(5)) if m.group(5) else 0
        sec = int(m.group(6)) if m.group(6) else 0
        return datetime(y, M, d, H, mnt, sec)
    m = re.search(r'(\d{2})(\d{2})(\d{4})_kl_(\d{1,2})(\d{2})?(\d{2})?', s)
    if m:
        d, M, y = int(m.group(1)), int(m.group(2)), int(m.group(3))
        H = int(m.group(4))
        mnt = int(m.group(5)) if m.group(5) else 0
        sec = int(m.group(6)) if m.group(6) else 0
        return datetime(y, M, d, H, mnt, sec)
    return None


# -------- Ekstraher patient-ID fra sti/filnavn --------
def extract_patient_id_from_path(p: Path) -> int | None:
    # kig i hver path-del og til sidst fil-stem
    candidates = list(p.parts) + [p.stem]
    for part in candidates:
        m = re.search(r'Patient[ _-]?(\d+)', part, flags=re.IGNORECASE)
        if m:
            try:
                return int(m.group(1))
            except ValueError:
                pass
    return None


# -------- Vælg kanal --------
def pick_channel(td: TdmsFile, preferred=("Untitled", "EKG")):
    try:
        ch = td[preferred[0]][preferred[1]]
        return preferred[0], preferred[1], ch
    except Exception:
        for g in td.groups():
            chans = g.channels()
            if chans:
                return g.name, chans[0].name, chans[0]
    raise RuntimeError("Ingen kanaler fundet i TDMS-filen.")


# -------- Opbyg kanal-properties til output --------
def build_channel_properties(orig_props: dict, new_start_time: datetime, wf_increment: float) -> dict:
    keep_keys = [
        "unit_string", "wf_xname", "wf_xunit_string",
        "NI_UnitDescription", "NI_ChannelName", "PreFilter", "TransducerType"
    ]
    props = {}
    for k in keep_keys:
        if k in orig_props:
            props[k] = orig_props[k]
    props.setdefault("unit_string", orig_props.get("unit_string", "uV"))
    props.setdefault("wf_xname", orig_props.get("wf_xname", "Time"))
    props.setdefault("wf_xunit_string", orig_props.get("wf_xunit_string", "s"))
    props["wf_increment"] = float(wf_increment)
    props["wf_start_time"] = new_start_time
    props["wf_start_offset"] = 0.0
    props["NI_ChannelName"] = "ECG"
    return props


# -------- Læs -> trim -> skriv én fil --------
def rewrite_tdms(
    src_tdms_path: Path,
    dst_dir: Path,
    out_group: str = "Recording",
    out_channel: str = "ECG",
    trim_minutes: int = 20,
):
    md = TdmsFile.read_metadata(src_tdms_path)
    root_name = md.properties.get("name", src_tdms_path.name)
    parsed_t0 = parse_start_from_name(root_name)

    with TdmsFile.open(src_tdms_path) as td:
        in_group, in_channel, ch = pick_channel(td)

        inc = ch.properties.get("wf_increment", None)
        if inc is None or float(inc) <= 0:
            raise ValueError("Kan ikke finde gyldig 'wf_increment' i kanalens properties.")
        inc = float(inc)
        fs = 1.0 / inc

        t0_meta = ch.properties.get("wf_start_time", None)
        new_start = parsed_t0 if parsed_t0 is not None else (t0_meta if t0_meta is not None else datetime(1970, 1, 1))

        n = len(ch)
        trim_n = int(round(trim_minutes * 60 * fs))
        if n <= 2 * trim_n:
            raise ValueError(f"For få samples ({n}) ift. trim {trim_minutes} min @ fs={fs:.3f} Hz.")

        start_idx = trim_n
        end_idx = n - trim_n
        new_len = end_idx - start_idx
        new_start_adj = new_start + timedelta(minutes=trim_minutes)

        dst_dir.mkdir(parents=True, exist_ok=True)
        out_name = src_tdms_path.stem + "_trimmed.tdms"
        dst_path = dst_dir / out_name

        out_root_props = dict(md.properties)
        out_root_props["name"] = out_name
        out_root_props["registertxt1"] = "Written by Python (npTDMS)"

        chan_props = build_channel_properties(ch.properties, new_start_adj, inc)

        data_trim = ch[start_idx:end_idx]
        out_data = np.asarray(data_trim)

        with TdmsWriter(dst_path) as writer:
            root_obj = RootObject(properties=out_root_props)
            group_obj = GroupObject(out_group, properties={})
            chan_obj = ChannelObject(out_group, out_channel, out_data, properties=chan_props)
            writer.write_segment([root_obj, group_obj, chan_obj])

        return {
            "input": str(src_tdms_path),
            "output": str(dst_path),
            "fs_Hz": fs,
            "n_in": n,
            "n_out": new_len,
            "group_in": in_group,
            "channel_in": in_channel,
            "group_out": out_group,
            "channel_out": out_channel,
            "new_start_iso": new_start_adj.isoformat(),
            "status": "ok",
            "error": "",
        }


# -------- Batch med Excel-filter + spejlet struktur --------
def batch_rewrite_tdms_filtered(
    input_root: str | Path,
    output_root: str | Path,
    excel_path: str | Path,
    excel_col: str | None = None,   # hvis None -> brug første kolonne
    trim_minutes: int = 20,
    out_group: str = "Recording",
    out_channel: str = "ECG",
    skip_existing: bool = True,
):
    input_root = Path(input_root)
    output_root = Path(output_root)

    # --- læs tilladte patient-ID'er fra Excel ---
    df_ids = pd.read_excel(excel_path)
    if excel_col is None:
        series = df_ids.iloc[:, 0]  # første kolonne
    else:
        series = df_ids[excel_col]
    allowed_patients = {int(x) for x in series.dropna().astype(int).tolist()}
    print(f"Patient-ID'er fra Excel: {sorted(allowed_patients)}")

    # --- find alle TDMS og filtrer på patient-ID i stien ---
    all_tdms = [Path(root) / f
                for root, _, files in os.walk(input_root)
                for f in files if f.lower().endswith(".tdms")]
    print(f"Fandt {len(all_tdms)} TDMS-filer i alt under: {input_root}")

    filtered = []
    skipped_patient = []
    for p in all_tdms:
        pid = extract_patient_id_from_path(p)
        if pid is None or pid not in allowed_patients:
            skipped_patient.append(p)
            continue
        filtered.append(p)
    print(f"Udvalgt {len(filtered)} filer baseret på Excel-listen ({len(skipped_patient)} fravalgt).")

    # --- processér udvalgte filer ---
    results = []
    for i, src in enumerate(filtered, 1):
        rel = src.relative_to(input_root).parent
        dst_dir = output_root / rel
        out_name = src.stem + "_trimmed.tdms"
        out_path = dst_dir / out_name

        if skip_existing and out_path.exists():
            results.append({
                "input": str(src), "output": str(out_path), "status": "skipped_exists", "error": ""
            })
            continue

        try:
            rec = rewrite_tdms(
                src_tdms_path=src,
                dst_dir=dst_dir,
                out_group=out_group,
                out_channel=out_channel,
                trim_minutes=trim_minutes,
            )
            results.append(rec)
            print(f"[{i}/{len(filtered)}] OK -> {out_path}")
        except Exception as e:
            msg = "".join(traceback.format_exception_only(type(e), e)).strip()
            results.append({
                "input": str(src), "output": str(out_path),
                "status": "error", "error": msg
            })
            print(f"[{i}/{len(filtered)}] FEJL i {src}: {msg}")

    # --- gem opsummering ---
    output_root.mkdir(parents=True, exist_ok=True)
    df = pd.DataFrame(results)
    summary_csv = output_root / "tdms_batch_summary_filtered.csv"
    df.to_csv(summary_csv, index=False)
    print(f"\nBatch færdig. Summary: {summary_csv}")
    return df


# -------- Kørselseksempel --------
if __name__ == "__main__":
    INPUT_ROOT = r"E:\ML algoritme tl anfaldsdetektion vha HRV\ePatch data from Aarhus to Lausanne\Patients ePatch data"
    OUTPUT_ROOT = r"E:\ML algoritme tl anfaldsdetektion vha HRV\ePatch data from Aarhus - Corrected\ePatch data"
    EXCEL_IDS  = r"E:\ML algoritme tl anfaldsdetektion vha HRV\ePatch data from Aarhus - Corrected\Patient seizures.xlsx"

    # Sæt excel_col til præcis kolonnenavn hvis du vil (ellers bruges første kolonne)
    batch_rewrite_tdms_filtered(
        input_root=INPUT_ROOT,
        output_root=OUTPUT_ROOT,
        excel_path=EXCEL_IDS,
        excel_col="Patient number (as given to Switzerland)",
        trim_minutes=20,
        out_group="Recording",
        out_channel="ECG",
        skip_existing=True,
    )




Patient-ID'er fra Excel: [3, 5, 6, 8, 10, 14, 15, 16, 21, 23, 27, 28, 29, 31, 34, 37, 39, 40, 41, 42]
Fandt 72 TDMS-filer i alt under: E:\ML algoritme tl anfaldsdetektion vha HRV\ePatch data from Aarhus to Lausanne\Patients ePatch data
Udvalgt 37 filer baseret på Excel-listen (35 fravalgt).
[1/37] FEJL i E:\ML algoritme tl anfaldsdetektion vha HRV\ePatch data from Aarhus to Lausanne\Patients ePatch data\Patient 10\recording 1\Patient 10_1.tdms: RuntimeError: Ingen kanaler fundet i TDMS-filen.
[2/37] OK -> E:\ML algoritme tl anfaldsdetektion vha HRV\ePatch data from Aarhus - Corrected\ePatch data\Patient 14\recording 1\Patient 14_1_trimmed.tdms
[3/37] OK -> E:\ML algoritme tl anfaldsdetektion vha HRV\ePatch data from Aarhus - Corrected\ePatch data\Patient 14\recording 2\Patient 14_2_trimmed.tdms
[4/37] OK -> E:\ML algoritme tl anfaldsdetektion vha HRV\ePatch data from Aarhus - Corrected\ePatch data\Patient 15\recording 1\Patient 15_1_trimmed.tdms
[5/37] OK -> E:\ML algoritme tl anfaldsd