In [None]:
import shutil
import random
from pathlib import Path
from typing import List, Dict, Optional

import pandas as pd


# ======================
#       CONFIG
# ======================

# Dossier racine de ta grande TUSZ (celle de ~70 Go, avec train/eval/test)
BIG_ROOT = Path(r"edf/")

# Dossier o√π sera cr√©√©e la petite TUSZ √âQUILIBR√âE (UNE SEULE dataset)
SMALL_ROOT = Path(r"edf_reduit")

# Splits √† scanner dans la grosse base
SPLITS = ["train", "eval", "test"]   # adapte si tu n'as pas les trois

# Nombre TOTAL d'enregistrements souhait√©s dans la petite dataset
# Exemple: 200 => ~100 avec crise + ~100 sans crise au total
TARGET_TOTAL_RECORDINGS = 200

# Optionnel : ne garder qu'un type de montage (ex: "01_tcp_ar"), sinon None
ONLY_MONTAGE_DIR = "01_tcp_ar"      # ou None pour tous les montages

# Pour reproductibilit√©
RANDOM_SEED = 42
random.seed(RANDOM_SEED)


# ======================
#   FONCTIONS UTILES
# ======================

def has_seizure(csv_path: Path) -> bool:
    """
    Retourne True si le fichier csv_bi/csv contient au moins une √©tiquette 'seiz'.
    """
    try:
        df = pd.read_csv(csv_path, comment="#")
    except Exception as e:
        print("[WARN] Impossible de lire %s: %s" % (csv_path, e))
        return False

    if "label" not in df.columns:
        print("[WARN] colonne 'label' absente dans %s, consid√©r√© comme non-seizure." % csv_path)
        return False

    mask = df["label"].astype(str).str.contains("seiz", case=False)
    return bool(mask.any())


def find_annotation_file(edf_path: Path) -> Optional[Path]:
    """
    Trouve le fichier d'annotation correspondant √† un EDF :
    priorit√© .csv_bi, sinon .csv. Retourne None si rien trouv√©.
    """
    csv_bi = edf_path.with_suffix(".csv_bi")
    if csv_bi.exists():
        return csv_bi

    csv = edf_path.with_suffix(".csv")
    if csv.exists():
        return csv

    return None


def matches_montage_filter(edf_path: Path) -> bool:
    """
    Si ONLY_MONTAGE_DIR est d√©fini (ex: "01_tcp_ar"),
    ne garde que les EDF dont le chemin contient ce dossier.
    Sinon, accepte tout.
    """
    if ONLY_MONTAGE_DIR is None:
        return True
    return ONLY_MONTAGE_DIR in edf_path.parts


def copy_recording_to_single_dataset(edf_path: Path,
                                     ann_path: Path,
                                     big_split_dir: Path,
                                     small_root: Path) -> None:
    """
    Copie l'EDF et son annotation dans small_root en supprimant le niveau 'train/eval/test'.
    Exemple :
      BIG_ROOT/train/aaaaaacz/s003_2010/01_tcp_ar/xxx.edf
    devient :
      SMALL_ROOT/aaaaaacz/s003_2010/01_tcp_ar/xxx.edf
    """
    # Chemin relatif √† partir du dossier de split (train / eval / test)
    rel_edf = edf_path.relative_to(big_split_dir)
    dest_edf = small_root / rel_edf
    dest_edf.parent.mkdir(parents=True, exist_ok=True)
    shutil.copy2(edf_path, dest_edf)

    rel_ann = ann_path.relative_to(big_split_dir)
    dest_ann = small_root / rel_ann
    dest_ann.parent.mkdir(parents=True, exist_ok=True)
    shutil.copy2(ann_path, dest_ann)


# ======================
#          MAIN
# ======================

def main():
    if not BIG_ROOT.exists():
        raise FileNotFoundError("BIG_ROOT introuvable : %s" % BIG_ROOT)

    SMALL_ROOT.mkdir(parents=True, exist_ok=True)

    # On agr√®ge TOUTES les infos (train + eval + test)
    seiz_recs = []      # liste de dicts {"edf": Path, "ann": Path, "split": str}
    nonseiz_recs = []

    # 1) Scanner tous les splits
    for split in SPLITS:
        split_dir = BIG_ROOT / split
        if not split_dir.exists():
            print("[INFO] Split %s introuvable, on saute." % split)
            continue

        print("\n===== SCAN SPLIT : %s =====" % split)
        edf_files = sorted(split_dir.rglob("*.edf"))
        print("  %d fichiers EDF trouv√©s dans %s." % (len(edf_files), split))

        for edf_path in edf_files:
            if not matches_montage_filter(edf_path):
                continue

            ann_path = find_annotation_file(edf_path)
            if ann_path is None:
                print("[WARN] Pas de csv_bi/csv pour %s, on ignore." % edf_path)
                continue

            rec_info = {"edf": edf_path, "ann": ann_path, "split": split}

            if has_seizure(ann_path):
                seiz_recs.append(rec_info)
            else:
                nonseiz_recs.append(rec_info)

    # 2) R√©sum√© global
    print("\n===== R√âSUM√â GLOBAL =====")
    print("  Total enregistrements avec crise   :", len(seiz_recs))
    print("  Total enregistrements sans crise   :", len(nonseiz_recs))

    if len(seiz_recs) == 0 or len(nonseiz_recs) == 0:
        print("[ERREUR] Impossible d'√©quilibrer : une des classes est vide.")
        return

    if TARGET_TOTAL_RECORDINGS is None:
        max_per_class_from_target = min(len(seiz_recs), len(nonseiz_recs))
    else:
        max_per_class_from_target = TARGET_TOTAL_RECORDINGS // 2

    per_class = min(len(seiz_recs), len(nonseiz_recs), max_per_class_from_target)

    if per_class == 0:
        print("[ERREUR] per_class = 0 (TARGET_TOTAL_RECORDINGS trop petit ou trop peu de donn√©es).")
        return

    print("  => On va construire une petite dataset UNIQUE avec :")
    print("       %d enregistrements avec crise" % per_class)
    print("       %d enregistrements sans crise" % per_class)
    print("     soit un total ‚âà %d enregistrements." % (2 * per_class))

    # 3) √âchantillonnage √©quilibr√©
    seiz_selected = random.sample(seiz_recs, per_class)
    nonseiz_selected = random.sample(nonseiz_recs, per_class)

    # 4) Copie dans la petite dataset (sans niveau train/eval/test)
    print("\n===== COPIE VERS DATASET UNIQUE =====")
    for rec in seiz_selected + nonseiz_selected:
        edf_path = rec["edf"]
        ann_path = rec["ann"]
        split = rec["split"]
        split_dir = BIG_ROOT / split
        copy_recording_to_single_dataset(edf_path, ann_path, split_dir, SMALL_ROOT)

    print("\n=== Termin√© ===")
    print("Petite dataset √©quilibr√©e (seizure / non-seizure) cr√©√©e dans :")
    print("  %s" % SMALL_ROOT)


if __name__ == "__main__":
    main()


In [1]:
from dataclasses import dataclass
from pathlib import Path
from typing import List, Tuple, Optional, Dict, Any

import numpy as np
import pandas as pd
import mne
from scipy.signal import stft


# ======================
#      CONFIG GLOBALE
# ======================

@dataclass
class Config:
    # Chemins
    raw_root: Path          # dossier racine de la NOUVELLE dataset r√©duite (sans train/eval/test)
    prepared_root: Path     # dossier de sortie (contenant prepared/)
    split: str = "train"    # juste un nom logique pour le sous-dossier de sortie

    # EEG
    montage: Optional[str] = None   # on n'essaie plus d'appliquer "TCP_AR" de MNE
    l_freq: float = 0.5
    h_freq: float = 40.0
    notch_freq: Optional[float] = 50.0
    sfreq_target: Optional[float] = 256.0  # resampling

    # Fen√™tres temporelles
    win_size: float = 4.0    # secondes
    hop_size: float = 2.0    # secondes (overlap 50% si hop = win/2)

    # Intervalles pr√©-/inter-ictaux
    preictal_dur: float = 120.0
    inter_tampon: float = 60.0   # tampon APR√àS la derni√®re crise pour l'interictal
    min_preictal: float = 60.0   # si onset < 60s ‚Üí pas de pr√©-ictal exploitable

    # Spectrogrammes
    nperseg: int = 256
    noverlap: int = 128
    fmin_spec: float = 0.5
    fmax_spec: float = 40.0

    # S√©quences optionnelles
    make_sequences: bool = True
    seq_len: int = 10

    # Filtrage patients / sessions (optionnel)
    patients_whitelist: Optional[List[str]] = None   # ex: ["aaaaaacz", "aaaaaaju"]
    sessions_whitelist: Optional[List[str]] = None   # ex: ["s003_2010", "s005_2010"]


Interval = Tuple[float, float]

# ======================
#   MONTAGE √Ä GARDER
# ======================

ONLY_MONTAGE_DIR = "01_tcp_ar"


def get_patient_session_from_path(edf_path: Path) -> Tuple[str, str]:
    """
    Extrait patient et session √† partir du chemin TUSZ.
    Pour la dataset r√©duite :
      .../data_small_one/aaaaaacz/s003_2010/01_tcp_ar/aaaaaacz_s003_t000.edf
      -> patient = aaaaaacz, session = s003_2010
    """
    parts = edf_path.parts
    if len(parts) < 4:
        return ("unknown", "unknown")
    patient = parts[-4]
    session = parts[-3]
    return patient, session


def is_01_tcp_ar_recording(edf_path: Path, cfg: Config) -> bool:
    """
    True si le fichier EDF :
      - appartient √† un dossier de montage 01_tcp_ar
      - et respecte √©ventuellement les whitelists patient/session.
    """
    parts = edf_path.parts
    if ONLY_MONTAGE_DIR not in parts:
        return False

    # R√©cup√©rer patient et session
    patient, session = get_patient_session_from_path(edf_path)

    # Filtre patient
    if cfg.patients_whitelist is not None:
        if patient not in cfg.patients_whitelist:
            return False

    # Filtre session
    if cfg.sessions_whitelist is not None:
        if session not in cfg.sessions_whitelist:
            return False

    return True


# ======================
#   OUTILS SUR LES INTERVALLES
# ======================

def merge_intervals(intervals: List[Interval]) -> List[Interval]:
    """Fusionne des intervalles qui se chevauchent."""
    if not intervals:
        return []
    intervals = sorted(intervals, key=lambda x: x[0])
    merged = [list(intervals[0])]
    for s, e in intervals[1:]:
        last_s, last_e = merged[-1]
        if s <= last_e:
            merged[-1][1] = max(last_e, e)
        else:
            merged.append([s, e])
    return [(s, e) for s, e in merged]


def is_inside_intervals(start: float, end: float, intervals: List[Interval]) -> bool:
    """Vrai si [start, end] est enti√®rement inclus dans au moins un intervalle."""
    for s, e in intervals:
        if start >= s and end <= e:
            return True
    return False


# ======================
#       LECTURE CSV_BI
# ======================

def read_seizure_intervals(csv_bi_path: Path) -> List[Interval]:
    """
    Lit le fichier csv_bi global et renvoie la liste des intervalles de crise (seiz).
    """
    df = pd.read_csv(csv_bi_path, comment="#")
    if "label" not in df.columns or "start_time" not in df.columns or "stop_time" not in df.columns:
        raise ValueError(f"Colonnes manquantes dans {csv_bi_path}")
    mask = df["label"].astype(str).str.contains("seiz", case=False)
    seizures = df[mask]
    return [(float(row.start_time), float(row.stop_time)) for row in seizures.itertuples(index=False)]


def compute_intervals(duration: float,
                      seizures: List[Interval],
                      cfg: Config) -> Dict[str, List[Interval]]:
    """
    √Ä partir de la dur√©e d'un enregistrement et des intervalles de crises,
    calcule les intervalles :
      - 'preictal': intervalle pr√©-ictal juste AVANT la 1√®re crise
      - 'interictal': intervalle inter-ictal APR√àS la DERNI√àRE crise + tampon
    Tout le reste est ignor√© (fen√™tres exclues).
    """
    if not seizures:
        # Pas de crise : tout est inter-ictal
        return {
            "preictal": [],
            "interictal": [(0.0, duration)],
        }

    seizures = sorted(seizures, key=lambda x: x[0])
    first_onset, first_offset = seizures[0]
    last_onset, last_offset = seizures[-1]

    preictal: List[Interval] = []
    interictal: List[Interval] = []

    # ----- Pr√©ictal : juste avant la 1√®re crise -----
    if first_onset >= cfg.min_preictal:
        pre_start = max(0.0, first_onset - cfg.preictal_dur)
        pre_end = first_onset
        preictal.append((pre_start, pre_end))
        preictal = merge_intervals(preictal)
    else:
        preictal = []

    # ----- Interictal : APR√àS la derni√®re crise + tampon -----
    post_start = min(duration, last_offset + cfg.inter_tampon)
    if post_start < duration:
        interictal.append((post_start, duration))
        interictal = merge_intervals(interictal)
    else:
        interictal = []

    return {
        "preictal": preictal,
        "interictal": interictal,
    }


# ======================
#   SPECTROGRAMMES
# ======================

def make_spectrogram(window_data: np.ndarray, sfreq: float, cfg: Config) -> np.ndarray:
    """
    Transforme [C, samples] en spectrogramme [C, F, T].
    """
    window_data = window_data.astype(np.float32)

    f, t, Zxx = stft(window_data,
                     fs=sfreq,
                     nperseg=cfg.nperseg,
                     noverlap=cfg.noverlap,
                     axis=-1,
                     boundary=None)
    # Garder la bande de fr√©quences d√©sir√©e
    band = (f >= cfg.fmin_spec) & (f <= cfg.fmax_spec)
    Zxx = Zxx[:, band, :]
    # Puissance
    Sxx = np.abs(Zxx) ** 2
    # Log
    Sxx = np.log10(Sxx + 1e-10)
    # Normalisation par canal
    C, F, T = Sxx.shape
    Sxx_norm = np.empty_like(Sxx, dtype=np.float32)
    for c in range(C):
        chan = Sxx[c]
        mean = chan.mean()
        std = chan.std()
        if std < 1e-6:
            Sxx_norm[c] = (chan - mean).astype(np.float32)
        else:
            Sxx_norm[c] = ((chan - mean) / std).astype(np.float32)
    return Sxx_norm


# ======================
#   TRAITEMENT D'UN EDF
# ======================

def process_one_recording(edf_path: Path, csv_bi_path: Path, cfg: Config,
                          windows_dir: Path,
                          sequences_dir: Optional[Path],
                          windows_rows: List[Dict[str, Any]],
                          sequences_rows: List[Dict[str, Any]]):
    """
    Traite un seul fichier EDF :
     - lit le signal
     - calcule les intervalles pr√©-/inter-ictaux
     - d√©coupe en fen√™tres
     - transforme en spectrogrammes
     - sauvegarde les .npy + met √† jour les CSV index (windows & s√©quences).
    """
    print(f"==> Traitement de {edf_path}")

    patient, session = get_patient_session_from_path(edf_path)
    rec_name = edf_path.stem  # ex: xxx_s003_t000

    # --- 1) Crises depuis csv_bi ---
    try:
        seizures = read_seizure_intervals(csv_bi_path)
    except Exception as e:
        print(f"   [ERREUR] lecture csv_bi {csv_bi_path}: {e}")
        return

    # --- 2) Lire le signal EDF ---
    try:
        raw = mne.io.read_raw_edf(edf_path, preload=True, verbose=False)
    except Exception as e:
        print(f"   [ERREUR] lecture EDF {edf_path}: {e}")
        return

    # Garder seulement EEG
    raw.pick_types(eeg=True)

    # Montage MNE d√©sactiv√© (TCP_AR n'existe pas en standard)
    if cfg.montage is not None:
        try:
            montage = mne.channels.make_standard_montage(cfg.montage)
            raw.set_montage(montage, on_missing='ignore')
        except Exception as e:
            print(f"   [WARN] montage non appliqu√© ({e})")

    # Filtre bande
    raw.filter(cfg.l_freq, cfg.h_freq, fir_design="firwin", verbose=False)

    # Notch 50/60 Hz si demand√©
    if cfg.notch_freq is not None:
        freqs = np.array([cfg.notch_freq, cfg.notch_freq * 2])
        raw.notch_filter(freqs=freqs, verbose=False)

    # Resample
    if cfg.sfreq_target is not None:
        raw.resample(cfg.sfreq_target, verbose=False)
    sfreq = raw.info["sfreq"]
    duration = raw.n_times / sfreq

    # --- 3) Intervalles pr√©-/inter-ictaux ---
    intervals = compute_intervals(duration, seizures, cfg)
    preictal_intervals = intervals["preictal"]
    interictal_intervals = intervals["interictal"]

    # --- 4) Fen√™trage ---
    win_samp = int(round(cfg.win_size * sfreq))
    hop_samp = int(round(cfg.hop_size * sfreq))

    local_windows_specs: List[np.ndarray] = []
    local_windows_meta: List[Dict[str, Any]] = []

    win_idx = 0
    start_samp = 0
    while start_samp + win_samp <= raw.n_times:
        end_samp = start_samp + win_samp
        t_start = start_samp / sfreq
        t_end = end_samp / sfreq

        # Label en fonction des intervalles
        if is_inside_intervals(t_start, t_end, preictal_intervals):
            label = 1
        elif is_inside_intervals(t_start, t_end, interictal_intervals):
            label = 0
        else:
            start_samp += hop_samp
            continue

        # Extraire les donn√©es EEG : [C, samples]
        data = raw.get_data(start=start_samp, stop=end_samp)
        data = (data * 1e6).astype(np.float32)

        # Spectrogramme [C, F, T]
        spec = make_spectrogram(data, sfreq, cfg)

        # Sauvegarde .npy pour cette fen√™tre
        out_name = f"{rec_name}_win{win_idx:04d}.npy"
        out_path = windows_dir / out_name
        np.save(out_path, spec)

        rel_path = out_path.relative_to(cfg.prepared_root)

        meta = {
            "path": str(rel_path).replace("\\", "/"),
            "label": int(label),
            "patient": patient,
            "session": session,
            "recording": rec_name,
            "t_start": t_start,
            "t_end": t_end,
        }
        windows_rows.append(meta)
        local_windows_specs.append(spec)
        local_windows_meta.append(meta)

        win_idx += 1
        start_samp += hop_samp

    print(
        f"   -> {len(local_windows_meta)} fen√™tres gard√©es "
        f"(preictal={sum(m['label']==1 for m in local_windows_meta)}, "
        f"interictal={sum(m['label']==0 for m in local_windows_meta)})"
    )

    # --- 5) Optionnel : s√©quences de N fen√™tres ---
    if cfg.make_sequences and sequences_dir is not None and len(local_windows_specs) >= cfg.seq_len:
        num_seq = 0
        for i in range(len(local_windows_specs) - cfg.seq_len + 1):
            seq_specs = local_windows_specs[i:i + cfg.seq_len]
            seq_meta = local_windows_meta[i:i + cfg.seq_len]

            X_seq = np.stack(seq_specs, axis=0).astype(np.float32)  # [N, C, F, T]
            label_seq = seq_meta[-1]["label"]
            last_center_s = 0.5 * (seq_meta[-1]["t_start"] + seq_meta[-1]["t_end"])

            seq_name = f"{rec_name}_seq{num_seq:04d}.npy"
            seq_path = sequences_dir / seq_name

            try:
                np.save(seq_path, X_seq)
            except OSError as e:
                print(f"   [ERREUR] impossible de sauvegarder la s√©quence {seq_name}: {e}")
                continue

            rel_seq_path = seq_path.relative_to(cfg.prepared_root)

            sequences_rows.append({
                "path": str(rel_seq_path).replace("\\", "/"),
                "label": int(label_seq),
                "patient": patient,
                "session": session,
                "recording": rec_name,
                "last_win_center_s": last_center_s,
            })
            num_seq += 1

        print(f"   -> {num_seq} s√©quences de {cfg.seq_len} fen√™tres cr√©√©es.")


# ======================
#   FONCTION D'√âQUILIBRAGE
# ======================

def make_balanced_index(windows_rows: List[Dict[str, Any]],
                        sequences_rows: List[Dict[str, Any]],
                        prepared_split_dir: Path):
    """
    Cr√©e des index √©quilibr√©s preictal/interictal pour les fen√™tres et, si possible, pour les s√©quences.
    On sous-√©chantillonne l'interictal pour avoir ~autant de 0 que de 1.
    """
    if not windows_rows:
        print("\n[INFO] Aucun fen√™tre, index √©quilibr√© non cr√©√©.")
        return

    df_win = pd.DataFrame(windows_rows)
    n_pre = (df_win["label"] == 1).sum()
    n_inter = (df_win["label"] == 0).sum()
    print(f"\n[STATS] Fen√™tres totales: {len(df_win)} (preictal={n_pre}, interictal={n_inter})")

    if n_pre == 0 or n_inter == 0:
        print("[WARN] Impossible d'√©quilibrer (une des classes est vide).")
        balanced_win = df_win
    else:
        n_target = min(n_pre, n_inter)
        pre_df = df_win[df_win["label"] == 1]
        inter_df = df_win[df_win["label"] == 0]

        pre_sample = pre_df.sample(n=n_target, random_state=42) if n_pre > n_target else pre_df
        inter_sample = inter_df.sample(n=n_target, random_state=42) if n_inter > n_target else inter_df

        balanced_win = pd.concat([pre_sample, inter_sample], axis=0).sample(frac=1.0, random_state=42)
        print(f"[OK] Index fen√™tres √©quilibr√©: {len(balanced_win)} lignes (‚âà {n_target} + {n_target}).")

    balanced_win_path = prepared_split_dir / "train_windows_index_balanced.csv"
    balanced_win.to_csv(balanced_win_path, index=False)
    print(f"Index fen√™tres √©quilibr√© sauvegard√© dans {balanced_win_path}")

    # M√™me logique possible pour les s√©quences (si elles existent)
    if sequences_rows:
        df_seq = pd.DataFrame(sequences_rows)
        n_pre_s = (df_seq["label"] == 1).sum()
        n_inter_s = (df_seq["label"] == 0).sum()
        print(f"\n[STATS] S√©quences totales: {len(df_seq)} (preictal={n_pre_s}, interictal={n_inter_s})")

        if n_pre_s == 0 or n_inter_s == 0:
            print("[WARN] Impossible d'√©quilibrer les s√©quences (une des classes est vide).")
            balanced_seq = df_seq
        else:
            n_target_s = min(n_pre_s, n_inter_s)
            pre_seq = df_seq[df_seq["label"] == 1]
            inter_seq = df_seq[df_seq["label"] == 0]

            pre_seq_sample = pre_seq.sample(n=n_target_s, random_state=42) if n_pre_s > n_target_s else pre_seq
            inter_seq_sample = inter_seq.sample(n=n_target_s, random_state=42) if n_inter_s > n_target_s else inter_seq

            balanced_seq = pd.concat([pre_seq_sample, inter_seq_sample], axis=0).sample(frac=1.0, random_state=42)
            print(f"[OK] Index s√©quences √©quilibr√©: {len(balanced_seq)} lignes (‚âà {n_target_s} + {n_target_s}).")

        balanced_seq_path = prepared_split_dir / "train_sequences_index_balanced.csv"
        balanced_seq.to_csv(balanced_seq_path, index=False)
        print(f"Index s√©quences √©quilibr√© sauvegard√© dans {balanced_seq_path}")


# ======================
#        MAIN
# ======================

def main():
    # ‚ö†Ô∏è ADAPTER CES CHEMINS √Ä TA NOUVELLE DATASET R√âDUITE
    cfg = Config(
        raw_root=Path(r"edf_reduit/"),  # <-- racine de la dataset r√©duite
        prepared_root=Path(r"prepared/"),
        split="all",               # juste un nom (tu peux mettre "all" au lieu de "train")
        make_sequences=True,
        seq_len=10,
        patients_whitelist=None,
        sessions_whitelist=None,
    )

    # üîπ ICI : contrairement √† avant, on NE met plus /cfg.split pour lire les EDF
    raw_dir = cfg.raw_root                       # la petite dataset unique
    split_out_dir = cfg.prepared_root / cfg.split
    windows_dir = split_out_dir / "windows"
    sequences_dir = split_out_dir / "sequences" if cfg.make_sequences else None

    windows_dir.mkdir(parents=True, exist_ok=True)
    if sequences_dir is not None:
        sequences_dir.mkdir(parents=True, exist_ok=True)

    windows_rows: List[Dict[str, Any]] = []
    sequences_rows: List[Dict[str, Any]] = []

    # Parcourir tous les .edf de la dataset r√©duite
    edf_files = sorted(raw_dir.rglob("*.edf"))

    for edf_path in edf_files:
        # Filtrer : ne garder que les enregistrements 01_tcp_ar + whitelist
        if not is_01_tcp_ar_recording(edf_path, cfg):
            continue

        # Fichier d'annotations globales : m√™me base que l'EDF
        csv_bi_path = edf_path.with_suffix(".csv_bi")
        if not csv_bi_path.exists():
            csv_bi_path = edf_path.with_suffix(".csv")
        if not csv_bi_path.exists():
            print(f"[WARN] csv_bi introuvable pour {edf_path}")
            continue

        process_one_recording(
            edf_path=edf_path,
            csv_bi_path=csv_bi_path,
            cfg=cfg,
            windows_dir=windows_dir,
            sequences_dir=sequences_dir,
            windows_rows=windows_rows,
            sequences_rows=sequences_rows,
        )

    # Index brut (non √©quilibr√©)
    windows_index_path = split_out_dir / "windows_index_all.csv"
    pd.DataFrame(windows_rows).to_csv(windows_index_path, index=False)
    print(f"\nIndex fen√™tres (brut) sauvegard√© dans {windows_index_path} ({len(windows_rows)} lignes)")

    if cfg.make_sequences and sequences_rows:
        seq_index_path = split_out_dir / "sequences_index_all.csv"
        pd.DataFrame(sequences_rows).to_csv(seq_index_path, index=False)
        print(f"Index s√©quences (brut) sauvegard√© dans {seq_index_path} ({len(sequences_rows)} lignes)")

    # Cr√©ation des index √©quilibr√©s
    make_balanced_index(windows_rows, sequences_rows, split_out_dir)


if __name__ == "__main__":
    main()


==> Traitement de edf_reduit\aaaaaarq\s017_2014\01_tcp_ar\aaaaaarq_s017_t005.edf
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
   -> 491 fen√™tres gard√©es (preictal=58, interictal=433)
   -> 482 s√©quences de 10 fen√™tres cr√©√©es.
==> Traitement de edf_reduit\aaaaaarq\s018_2014\01_tcp_ar\aaaaaarq_s018_t002.edf
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
   -> 149 fen√™tres gard√©es (preictal=0, interictal=149)
   -> 140 s√©quences de 10 fen√™tres cr√©√©es.
==> Traitement de edf_reduit\aaaaaasy\s003_2003\01_tcp_ar\aaaaaasy_s003_t000.edf
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
   -> 33 fen√™tres gard√©es (preictal=0, interictal=33)
   -> 24 s√©quences de 10 fen√™tres cr√©√©es.
==> Traitement de edf_reduit\aaaaaasy\s003_2003\01_tcp_ar\aaaaaasy_s003_t004.edf
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
   -> 66 fen√™tres gard√©es (preictal=0, interictal=66)