In [1]:
import os
import glob
import time
import random
import gc
import numpy as np
import h5py
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, Subset
from scipy.interpolate import interp1d
from scipy.integrate import trapezoid

# ==========================================
# [0] Experiment settings
# ==========================================
MAT_DIR = "/content/drive/MyDrive/Colab Notebooks/PulseDB"  # directory containing p*.mat
SEGMENT_LIMIT = None
PAD_LEN = 200
SEC_PER_SEGMENT = 10.0

BATCH_SIZE = 32
EPOCHS = 100
LR = 1e-3
WEIGHT_DECAY = 1e-4
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

SEED = 42
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

print(f"Using Device: {DEVICE}")

# Phase-3 protocol config
N_FOLDS = 5
BLOCK_GAP_MIN = 0                 # fixed
TIME_GAP_MIN = 5                  # fixed (main protocol candidate)
TEST_DUR_MIN = 5                  # fixed
TRAIN_DUR_SWEEP_MIN = [3, 5, 10, 15]
VAL_FRAC_IN_TRAIN = 0.20

FILTERING_SWEEP = [False, True]   # NEW

# ==========================================
# [1] Re-sample + Prior feature
# ==========================================
def preprocess_ensemble_by_rpeaks(ppg_raw, rpeaks_raw, sbp, dbp, target_len=125, threshold_corr=0.7):
    """
    NEW filtering:
      1) BP range filter
      2) split beats by ECG R-peaks
      3) resample each beat to target_len
      4) ensemble average + consistency check via correlation
    Return: processed PPG (0~1) or None (skip)
    """
    if not (50 <= sbp <= 250) or not (30 <= dbp <= 160):
        return None

    ppg = ppg_raw.squeeze()
    rpeaks = rpeaks_raw.squeeze()
    rpeaks = np.sort(rpeaks.astype(int))

    beats = []
    for i in range(len(rpeaks) - 1):
        start, end = rpeaks[i], rpeaks[i + 1]
        if start < 0 or end > len(ppg):
            continue
        beat_segment = ppg[start:end]
        if len(beat_segment) < 20:
            continue

        x_old = np.linspace(0, 1, len(beat_segment))
        x_new = np.linspace(0, 1, target_len)
        f_interp = interp1d(x_old, beat_segment, kind="linear", fill_value="extrapolate")
        beats.append(f_interp(x_new))

    if len(beats) < 5:
        return None

    beats = np.array(beats)
    ensemble_avg = np.mean(beats, axis=0)

    e_min, e_max = ensemble_avg.min(), ensemble_avg.max()
    if e_max - e_min > 1e-6:
        ensemble_avg = (ensemble_avg - e_min) / (e_max - e_min)

    correlations = [np.corrcoef(ensemble_avg, b)[0, 1] for b in beats]
    consistent_beats_count = sum(1 for c in correlations if c >= threshold_corr)

    if (consistent_beats_count / len(beats)) < 0.7:
        return None

    return ensemble_avg.astype(np.float32)

def cubic_resample(ppg, target_len=PAD_LEN):
    x_old = np.linspace(0, 1, len(ppg))
    x_new = np.linspace(0, 1, target_len)
    if len(ppg) < 4:
        return np.interp(x_new, x_old, ppg).astype(np.float32)
    try:
        f = interp1d(x_old, ppg, kind="cubic", bounds_error=False, fill_value="extrapolate")
        return f(x_new).astype(np.float32)
    except Exception:
        return np.interp(x_new, x_old, ppg).astype(np.float32)

def extract_multiscale_morph_features(ppg_01):
    scales = [100, 150, 200, 250]
    all_features = []
    for scale in scales:
        x = cubic_resample(ppg_01, scale)

        peak_idx = int(np.argmax(x))
        end_idx = scale - 1

        vp = float(x[peak_idx])
        vt = float(x[end_idx])
        dv = vp - vt
        vm = float(np.mean(x))
        std_val = float(np.std(x))

        tvp = peak_idx / scale

        diff = np.diff(x)
        kmax = float(np.max(diff)) if len(diff) > 0 else 0.0
        tkmax = (int(np.argmax(diff)) / scale) if len(diff) > 0 else 0.0

        amax = float(trapezoid(x[:peak_idx])) if peak_idx > 0 else 0.0

        centered = x - vm
        skew_approx = float(np.mean(centered**3) / (std_val**3)) if std_val > 0 else 0.0
        kurt_approx = float(np.mean(centered**4) / (std_val**4)) if std_val > 0 else 0.0

        all_features.extend([vp, vt, dv, vm, kmax, tkmax, amax, std_val, tvp, skew_approx, kurt_approx])

    return np.array(all_features, dtype=np.float32)

# ==========================================
# [2] Load data (filtering toggle)
# ==========================================
def load_data_from_mat(mat_path, segment_limit=None, filtering=False):
    segments, priors, targets = [], [], []
    skip_bp, skip_noise = 0, 0

    with h5py.File(mat_path, "r") as f:
        sw = f["Subj_Wins"]
        ppg_refs = sw["PPG_F"][0]
        sbp_refs = sw["SegSBP"][0]
        dbp_refs = sw["SegDBP"][0]

        ecg_refs = sw["ECG_RPeaks"][0] if "ECG_RPeaks" in sw.keys() else None

        total = min(len(ppg_refs), segment_limit) if segment_limit else len(ppg_refs)

        for i in range(total):
            sbp = float(f[sbp_refs[i]][()][0][0])
            dbp = float(f[dbp_refs[i]][()][0][0])

            if filtering:
                if ecg_refs is None:
                    # cannot run filtering without ECG_RPeaks
                    skip_noise += 1
                    continue

                ppg_raw = f[ppg_refs[i]][()]
                rpeaks_raw = f[ecg_refs[i]][()]
                processed_ppg = preprocess_ensemble_by_rpeaks(ppg_raw, rpeaks_raw, sbp, dbp)

                if processed_ppg is None:
                    if not (50 <= sbp <= 250) or not (30 <= dbp <= 160):
                        skip_bp += 1
                    else:
                        skip_noise += 1
                    continue

                ppg = processed_ppg  # (125,) 0~1
            else:
                # baseline: use PPG_F as-is (already 0~1 per segment)
                ppg = f[ppg_refs[i]][()].squeeze().astype(np.float32)

            segments.append(ppg)
            priors.append(extract_multiscale_morph_features(ppg))
            targets.append([sbp, dbp])

    if filtering:
        print(f"✅ filtering=True | kept: {len(segments)} / {total}")
        print(f"❌ excluded: (bp_range: {skip_bp}, noise/other: {skip_noise})")
    else:
        print(f"✅ filtering=False | kept: {len(segments)} / {total}")

    return segments, np.stack(priors).astype(np.float32), np.array(targets, dtype=np.float32)

# ==========================================
# [3] Dataset
# ==========================================
class PPGDatasetRawY(Dataset):
    def __init__(self, segments, priors, targets_mmHg):
        self.segments = segments
        self.priors = priors
        self.targets = targets_mmHg

    def __len__(self):
        return len(self.segments)

    def __getitem__(self, idx):
        x = cubic_resample(self.segments[idx], PAD_LEN)
        x = torch.tensor(x, dtype=torch.float32).unsqueeze(0)
        p = torch.tensor(self.priors[idx], dtype=torch.float32)
        y = torch.tensor(self.targets[idx], dtype=torch.float32)
        return x, p, y

# ==========================================
# [4] Model
# ==========================================
class MorphCNNRegressor(nn.Module):
    def __init__(self, prior_dim=44):
        super().__init__()
        self.cnn = nn.Sequential(
            nn.Conv1d(1, 32, 7, padding=3),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.MaxPool1d(2),

            nn.Conv1d(32, 64, 5, padding=2),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.MaxPool1d(2),

            nn.Conv1d(64, 128, 5, padding=2),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.MaxPool1d(2),

            nn.Conv1d(128, 256, 3, padding=1),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.AdaptiveAvgPool1d(1)
        )

        self.fc_prior = nn.Sequential(
            nn.Linear(prior_dim, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 256),
            nn.BatchNorm1d(256),
            nn.ReLU()
        )

        self.fc_out = nn.Sequential(
            nn.Linear(256 + 256, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 2)
        )

    def forward(self, x, prior):
        feat = self.cnn(x).squeeze(-1)
        pfeat = self.fc_prior(prior)
        return self.fc_out(torch.cat([feat, pfeat], dim=1))

# ==========================================
# [5] Train-only label scaler
# ==========================================
class LabelScaler2D:
    def __init__(self, mode="minmax", eps=1e-6):
        assert mode in ["minmax", "zscore"]
        self.mode = mode
        self.eps = eps
        self.fitted = False

    def fit(self, y_train_mmHg: np.ndarray):
        y = np.asarray(y_train_mmHg, dtype=np.float32)
        if self.mode == "minmax":
            self.y_min = y.min(axis=0)
            self.y_max = y.max(axis=0)
        else:
            self.y_mean = y.mean(axis=0)
            self.y_std = y.std(axis=0)
        self.fitted = True
        return self

    def transform(self, y_mmHg: torch.Tensor) -> torch.Tensor:
        assert self.fitted
        if self.mode == "minmax":
            y_min = torch.tensor(self.y_min, device=y_mmHg.device, dtype=y_mmHg.dtype)
            y_max = torch.tensor(self.y_max, device=y_mmHg.device, dtype=y_mmHg.dtype)
            return (y_mmHg - y_min) / (y_max - y_min + self.eps)
        else:
            y_mean = torch.tensor(self.y_mean, device=y_mmHg.device, dtype=y_mmHg.dtype)
            y_std = torch.tensor(self.y_std, device=y_mmHg.device, dtype=y_mmHg.dtype)
            return (y_mmHg - y_mean) / (y_std + self.eps)

    def inverse(self, y_scaled: torch.Tensor) -> torch.Tensor:
        assert self.fitted
        if self.mode == "minmax":
            y_min = torch.tensor(self.y_min, device=y_scaled.device, dtype=y_scaled.dtype)
            y_max = torch.tensor(self.y_max, device=y_scaled.device, dtype=y_scaled.dtype)
            return y_scaled * (y_max - y_min + self.eps) + y_min
        else:
            y_mean = torch.tensor(self.y_mean, device=y_scaled.device, dtype=y_scaled.dtype)
            y_std = torch.tensor(self.y_std, device=y_scaled.device, dtype=y_scaled.dtype)
            return y_scaled * (y_std + self.eps) + y_mean

# ==========================================
# [6] Train / Eval
# ==========================================
def set_seed(seed=SEED):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

def train_one_model(train_loader, val_loader, scaler: LabelScaler2D):
    model = MorphCNNRegressor(prior_dim=44).to(DEVICE)
    optimizer = optim.Adam(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
    criterion = nn.MSELoss()

    best_val = float("inf")
    best_state = None

    for _epoch in range(1, EPOCHS + 1):
        model.train()
        for x, p, y_mmHg in train_loader:
            x, p, y_mmHg = x.to(DEVICE), p.to(DEVICE), y_mmHg.to(DEVICE)
            y = scaler.transform(y_mmHg)
            pred = model(x, p)
            loss = criterion(pred, y)
            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()

        model.eval()
        val_losses = []
        with torch.no_grad():
            for x, p, y_mmHg in val_loader:
                x, p, y_mmHg = x.to(DEVICE), p.to(DEVICE), y_mmHg.to(DEVICE)
                y = scaler.transform(y_mmHg)
                pred = model(x, p)
                val_losses.append(float(criterion(pred, y).item()))
        avg_val = float(np.mean(val_losses)) if len(val_losses) else float("inf")

        if avg_val < best_val:
            best_val = avg_val
            best_state = {k: v.detach().cpu().clone() for k, v in model.state_dict().items()}

    if best_state is not None:
        model.load_state_dict(best_state)
    return model

def eval_mae_sd_mmHg(model, loader, scaler: LabelScaler2D):
    model.eval()
    errs = []
    with torch.no_grad():
        for x, p, y_mmHg in loader:
            x, p, y_mmHg = x.to(DEVICE), p.to(DEVICE), y_mmHg.to(DEVICE)
            pred_scaled = model(x, p)
            pred_mmHg = scaler.inverse(pred_scaled)
            err = (pred_mmHg - y_mmHg).detach().cpu().numpy()
            errs.append(err)

    if len(errs) == 0:
        return dict(mae_sbp=np.nan, sd_sbp=np.nan, mae_dbp=np.nan, sd_dbp=np.nan, n=0)

    E = np.concatenate(errs, axis=0)
    e_sbp, e_dbp = E[:, 0], E[:, 1]
    return dict(
        mae_sbp=float(np.mean(np.abs(e_sbp))),
        sd_sbp=float(np.std(e_sbp, ddof=0)),
        mae_dbp=float(np.mean(np.abs(e_dbp))),
        sd_dbp=float(np.std(e_dbp, ddof=0)),
        n=int(E.shape[0]),
    )

# ==========================================
# [7] Phase-3 Engine (multi-patient + filtering sweep)
# ==========================================
def segs_from_minutes(minutes: float) -> int:
    return int((minutes * 60.0) / SEC_PER_SEGMENT)

def min_required_total_len_for_any_traindur():
    """
    Conservative min segments to ensure ALL TrainDur in sweep can run for all folds:
      fold_len = floor(total_len / N_FOLDS)  (BlockGap=0)
      Need per fold >= max_train + gap + test
      => total_len >= N_FOLDS * (max_train + gap + test)
    """
    max_tr_segs = segs_from_minutes(max(TRAIN_DUR_SWEEP_MIN))
    gap_segs = segs_from_minutes(TIME_GAP_MIN)
    test_segs = segs_from_minutes(TEST_DUR_MIN)
    need_per_fold = max_tr_segs + gap_segs + test_segs
    return N_FOLDS * need_per_fold

def run_phase3_one_patient(mat_path: str, filtering: bool):
    set_seed(SEED)

    print("\n" + "=" * 80)
    print(f"[PATIENT] {os.path.basename(mat_path)} | filtering={filtering}")
    print("=" * 80)

    segments, priors, targets_mmHg = load_data_from_mat(mat_path, segment_limit=SEGMENT_LIMIT, filtering=filtering)
    ds = PPGDatasetRawY(segments, priors, targets_mmHg)

    total_len = len(ds)
    print(f"[Data Ready] total_len={total_len}")

    min_need = min_required_total_len_for_any_traindur()
    if total_len < min_need:
        raise ValueError(f"Insufficient segments: total_len={total_len} < min_required={min_need}")

    b_gap_segs = segs_from_minutes(BLOCK_GAP_MIN)
    gap_segs   = segs_from_minutes(TIME_GAP_MIN)
    test_segs  = segs_from_minutes(TEST_DUR_MIN)

    available_len = total_len - (N_FOLDS - 1) * b_gap_segs
    if available_len <= 0:
        raise ValueError("Not enough segments for the requested N_FOLDS and BLOCK_GAP_MIN.")

    fold_len = available_len // N_FOLDS
    if fold_len <= 0:
        raise ValueError("Computed fold_len <= 0.")
    if fold_len <= test_segs + 1:
        raise ValueError("Fold too short for test duration.")

    print("\n=== PHASE 3 CONFIG ===")
    print(f"N_FOLDS={N_FOLDS} | BlockGap={BLOCK_GAP_MIN}min ({b_gap_segs} segs)")
    print(f"FoldLen={fold_len} segs (~{fold_len*SEC_PER_SEGMENT/60.0:.2f} min)")
    print(f"TimeGap={TIME_GAP_MIN}min ({gap_segs} segs) | TestDur={TEST_DUR_MIN}min ({test_segs} segs)")
    print(f"TrainDur sweep (min): {TRAIN_DUR_SWEEP_MIN}")
    print(f"ValFracInTrain={VAL_FRAC_IN_TRAIN:.2f}")

    patient_summary = {}  # tr_min -> {valid_folds, mae_sbp, sd_sbp, mae_dbp, sd_dbp}
    t_global0 = time.time()

    for tr_min in TRAIN_DUR_SWEEP_MIN:
        tr_segs = segs_from_minutes(tr_min)

        print(f"\n===============================")
        print(f" TrainDur = {tr_min} min")
        print(f"===============================")

        fold_stats = []
        t_tr0 = time.time()

        for f_idx in range(N_FOLDS):
            fold_start = f_idx * (fold_len + b_gap_segs)
            fold_end   = fold_start + fold_len

            test_end = fold_end
            test_start = test_end - test_segs

            train_end = test_start - gap_segs
            train_start = train_end - tr_segs

            if train_start < fold_start or train_end > test_start:
                print(f"[Fold {f_idx+1}] SKIP (insufficient room): "
                      f"fold=({fold_start},{fold_end}) train=({train_start},{train_end}) test=({test_start},{test_end})")
                fold_stats.append(None)
                continue

            train_indices = list(range(train_start, train_end))
            test_indices  = list(range(test_start, test_end))

            n_total = len(train_indices)
            n_val = max(1, int(n_total * VAL_FRAC_IN_TRAIN))
            if n_total - n_val < 1:
                print(f"[Fold {f_idx+1}] SKIP (train too small after val split).")
                fold_stats.append(None)
                continue

            real_train_idx = train_indices[:-n_val]
            val_idx        = train_indices[-n_val:]

            y_train = targets_mmHg[np.array(real_train_idx)]
            scaler = LabelScaler2D(mode="minmax", eps=1e-6).fit(y_train)

            train_loader = DataLoader(Subset(ds, real_train_idx), batch_size=BATCH_SIZE, shuffle=True)
            val_loader   = DataLoader(Subset(ds, val_idx), batch_size=BATCH_SIZE, shuffle=False)
            test_loader  = DataLoader(Subset(ds, test_indices), batch_size=BATCH_SIZE, shuffle=False)

            t0 = time.time()
            model = train_one_model(train_loader, val_loader, scaler)
            stat = eval_mae_sd_mmHg(model, test_loader, scaler)
            elapsed = time.time() - t0

            stat.update({
                "fold": f_idx + 1,
                "train_dur_min": tr_min,
                "train_n": len(real_train_idx),
                "val_n": len(val_idx),
                "test_n": len(test_indices),
                "elapsed_s": float(elapsed),
            })
            fold_stats.append(stat)

            print(f"\n[Fold {f_idx+1}] fold=({fold_start},{fold_end}) | "
                  f"train=({train_start},{train_end}) | test=({test_start},{test_end})")
            print(f"  sizes Train/Val/Test: {len(real_train_idx)}/{len(val_idx)}/{len(test_indices)}")
            print(f"  SBP: MAE={stat['mae_sbp']:.4f} | SD={stat['sd_sbp']:.4f}")
            print(f"  DBP: MAE={stat['mae_dbp']:.4f} | SD={stat['sd_dbp']:.4f}")
            print(f"  elapsed: {elapsed:.1f}s")

            del model
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
            gc.collect()

        valid = [fs for fs in fold_stats if fs is not None and np.isfinite(fs["mae_sbp"])]

        def mean_key(key):
            vals = [v[key] for v in valid]
            return float(np.mean(vals)) if len(vals) else float("nan")

        tr_summary = {
            "valid_folds": len(valid),
            "mae_sbp": mean_key("mae_sbp"),
            "sd_sbp": mean_key("sd_sbp"),
            "mae_dbp": mean_key("mae_dbp"),
            "sd_dbp": mean_key("sd_dbp"),
        }
        patient_summary[tr_min] = tr_summary

        print(f"\n--- TrainDur {tr_min} min SUMMARY (TimeGap={TIME_GAP_MIN}min) ---")
        print(f"ValidFolds: {tr_summary['valid_folds']}/{N_FOLDS}")
        print(f"Avg SBP: MAE={tr_summary['mae_sbp']:.4f} | SD={tr_summary['sd_sbp']:.4f}")
        print(f"Avg DBP: MAE={tr_summary['mae_dbp']:.4f} | SD={tr_summary['sd_dbp']:.4f}")
        print(f"Elapsed for this TrainDur: {time.time() - t_tr0:.1f}s")

    print(f"\n[Patient Done] {os.path.basename(mat_path)} | filtering={filtering} | elapsed: {time.time() - t_global0:.1f}s")
    return patient_summary

def run_phase3_all_patients(pulsedb_dir: str):
    patient_files = sorted(glob.glob(os.path.join(pulsedb_dir, "p*.mat")))
    if len(patient_files) == 0:
        raise FileNotFoundError(f"No .mat files found under: {pulsedb_dir}")

    print("\n" + "#" * 80)
    print(f"[RUN ALL] Found {len(patient_files)} patients in: {pulsedb_dir}")
    for f in patient_files:
        print("  -", os.path.basename(f))
    print("#" * 80)

    results_by_filter = {}  # filtering -> list of {"patient", "summary"}
    skipped_by_filter = {}  # filtering -> list of (patient, reason)

    t0_all = time.time()

    for filtering in FILTERING_SWEEP:
        print("\n" + "#" * 80)
        print(f"[FILTER SWEEP] filtering={filtering}")
        print("#" * 80)

        all_results = []
        skipped = []

        for i, mat_path in enumerate(patient_files, 1):
            print("\n" + "-" * 80)
            print(f"[{i}/{len(patient_files)}] START {os.path.basename(mat_path)} | filtering={filtering}")
            print("-" * 80)

            try:
                summary = run_phase3_one_patient(mat_path, filtering=filtering)
                all_results.append({"patient": os.path.basename(mat_path), "summary": summary})
            except ValueError as e:
                reason = str(e)
                print(f"[SKIP PATIENT] {os.path.basename(mat_path)} | filtering={filtering} | reason: {reason}")
                skipped.append((os.path.basename(mat_path), reason))
            except Exception as e:
                reason = f"Unexpected error: {type(e).__name__}: {e}"
                print(f"[SKIP PATIENT] {os.path.basename(mat_path)} | filtering={filtering} | reason: {reason}")
                skipped.append((os.path.basename(mat_path), reason))

            if torch.cuda.is_available():
                torch.cuda.empty_cache()
            gc.collect()

        results_by_filter[filtering] = all_results
        skipped_by_filter[filtering] = skipped

        # ---- FINAL SUMMARY for this filtering mode ----
        print("\n" + "#" * 80)
        print(f"[FINAL SUMMARY] filtering={filtering} | patient-level mean/std across usable patients")
        print(f"Used patients: {len(all_results)} / {len(patient_files)}")
        print("#" * 80)

        def safe_mean(arr):
            return float(np.mean(arr)) if len(arr) else float("nan")

        def safe_std(arr):
            return float(np.std(arr, ddof=1)) if len(arr) >= 2 else float("nan")

        for tr_min in TRAIN_DUR_SWEEP_MIN:
            sbp_maes, sbp_sds, dbp_maes, dbp_sds = [], [], [], []
            used_patients_for_tr = 0

            for item in all_results:
                g = item["summary"].get(tr_min, None)
                if g is None:
                    continue
                if not np.isfinite(g["mae_sbp"]):
                    continue
                used_patients_for_tr += 1
                sbp_maes.append(g["mae_sbp"])
                sbp_sds.append(g["sd_sbp"])
                dbp_maes.append(g["mae_dbp"])
                dbp_sds.append(g["sd_dbp"])

            print(f"\n[TrainDur={tr_min} min] PatientsUsed: {used_patients_for_tr}/{len(all_results)}")
            print(f"  SBP(MAE): mean={safe_mean(sbp_maes):.4f} | std={safe_std(sbp_maes):.4f}")
            print(f"  DBP(MAE): mean={safe_mean(dbp_maes):.4f} | std={safe_std(dbp_maes):.4f}")
            print(f"  SBP(err SD): mean={safe_mean(sbp_sds):.4f} | std={safe_std(sbp_sds):.4f}")
            print(f"  DBP(err SD): mean={safe_mean(dbp_sds):.4f} | std={safe_std(dbp_sds):.4f}")

        # Report skipped patients
        if len(skipped) > 0:
            print("\n" + "#" * 80)
            print(f"[SKIPPED PATIENTS] filtering={filtering}")
            print("#" * 80)
            for p, r in skipped:
                print(f"- {p}: {r}")

    print(f"\n[ALL DONE] Total elapsed: {time.time() - t0_all:.1f}s")
    return results_by_filter, skipped_by_filter

# ==========================================
# [MAIN]
# ==========================================
if __name__ == "__main__":
    run_phase3_all_patients(MAT_DIR)


Using Device: cuda

################################################################################
[RUN ALL] Found 10 patients in: /content/drive/MyDrive/Colab Notebooks/PulseDB
  - p001855.mat
  - p004679.mat
  - p004833.mat
  - p009993.mat
  - p030582.mat
  - p030589.mat
  - p030670.mat
  - p040299.mat
  - p041107.mat
  - p043774.mat
################################################################################

################################################################################
[FILTER SWEEP] filtering=False
################################################################################

--------------------------------------------------------------------------------
[1/10] START p001855.mat | filtering=False
--------------------------------------------------------------------------------

[PATIENT] p001855.mat | filtering=False
✅ filtering=False | kept: 2178 / 2178
[Data Ready] total_len=2178

=== PHASE 3 CONFIG ===
N_FOLDS=5 | BlockGap=0min (0 segs)
FoldLen=435 s