In [1]:
import os
import numpy as np
import pandas as pd
from scipy.io import loadmat

ROOT = r"C:\Users\ADMIN\Desktop\NDM_Project"

labels_path = os.path.join(ROOT, "data", "labels.csv")
df = pd.read_csv(labels_path)

def pick_de_signal(mat: dict):
    # 1) ưu tiên key có 'DE' và 'time' (chuẩn CWRU thường là *_DE_time)
    keys = [k for k in mat.keys() if not k.startswith("__")]
    cand = [k for k in keys if ("DE" in k.upper() and "TIME" in k.upper())]
    if len(cand) > 0:
        k = cand[0]
        x = mat[k].squeeze()
        return k, x

    # 2) fallback: chọn vector 1D dài nhất
    best_k, best_x = None, None
    best_len = -1
    for k in keys:
        v = mat[k]
        if isinstance(v, np.ndarray):
            x = v.squeeze()
            if x.ndim == 1 and x.size > best_len:
                best_k, best_x = k, x
                best_len = x.size
    if best_x is None:
        raise ValueError("Cannot find 1D signal in mat keys=" + str(keys))
    return best_k, best_x

for i, row in df.iterrows():
    rel = row["relpath"]
    p = os.path.join(ROOT, rel)
    mat = loadmat(p)
    k, x = pick_de_signal(mat)

    ok = np.isfinite(x).all() and x.size > 0
    print(f"[{i:02d}] {row['label']:7s} | file={os.path.basename(p):15s} | key={k:25s} | len={x.size:8d} | ok={ok}")


[00] H       | file=Normal_1.mat    | key=X098_DE_time              | len=  483903 | ok=True
[01] BF_007  | file=B007_1.mat      | key=X123_DE_time              | len=  487384 | ok=True
[02] BF_014  | file=B014_1.mat      | key=X190_DE_time              | len=  486224 | ok=True
[03] BF_021  | file=B021_1.mat      | key=X227_DE_time              | len=  486804 | ok=True
[04] IRF_007 | file=IR007_1.mat     | key=X110_DE_time              | len=  486224 | ok=True
[05] IRF_014 | file=IR014_1.mat     | key=X217_DE_time              | len=  489125 | ok=True
[06] IRF_021 | file=IR021_1.mat     | key=X214_DE_time              | len=  485063 | ok=True
[07] ORF_007 | file=OR007@6_1.mat   | key=X136_DE_time              | len=  486804 | ok=True
[08] ORF_014 | file=OR014@6_1.mat   | key=X202_DE_time              | len=  484483 | ok=True
[09] ORF_021 | file=OR021@6_1.mat   | key=X239_DE_time              | len=  489125 | ok=True
