In [None]:
from pathlib import Path
import numpy as np
import glob
from scipy.io import loadmat
from scipy.signal import resample

PROJECT_ROOT = Path.cwd().resolve().parents[2]

DATASETS_DIR = PROJECT_ROOT / "datasets"

BNCI_RAW_DIR = DATASETS_DIR / "bnci_dataset" / "raw"
BNCI_PROCESSED_DIR = DATASETS_DIR / "bnci_dataset" / "processed"

BNCI_PROCESSED_DIR.mkdir(parents=True, exist_ok=True)

OUT_PATH = BNCI_PROCESSED_DIR / "preprocessed_BNCI.npz"

print("Project Root:", PROJECT_ROOT)
print("Saving to:", OUT_PATH)

In [2]:
mat_files = sorted(glob.glob(str(BNCI_RAW_DIR / "*.mat")))

if len(mat_files) == 0:
    raise FileNotFoundError(f"No .mat files found in {BNCI_RAW_DIR}")

print(f"Found {len(mat_files)} BNCI files.")

Found 18 BNCI files.


In [3]:
global_label_map = {}
next_label_id = 0

def normalize_label(x):
    if isinstance(x, bytes):
        x = x.decode("utf-8", errors="ignore")
    if isinstance(x, str):
        return x.strip().lower()
    return str(int(x))

def map_label(x):
    global next_label_id
    key = normalize_label(x)
    if key not in global_label_map:
        global_label_map[key] = next_label_id
        next_label_id += 1
    return global_label_map[key]

In [None]:
def extract_epochs(elem, target_n_times=561, target_sfreq=250.0):

    X = getattr(elem, "X", None)
    fs = getattr(elem, "fs", target_sfreq)
    classes = getattr(elem, "classes", None)

    if X is None or classes is None:
        return None, None

    X = np.array(X, dtype=np.float32)
    classes = np.asarray(classes).ravel()

    n_samples, n_ch = X.shape
    n_trials = len(classes)

    if n_trials == 0:
        return None, None

    samples_per_trial = n_samples // n_trials

    epochs = []
    labels = []

    for i in range(n_trials):
        st = i * samples_per_trial
        ed = st + samples_per_trial
        if ed > n_samples:
            break

        seg = X[st:ed, :].T  # (ch, time)
        epochs.append(seg)
        labels.append(map_label(classes[i]))

    epochs = np.stack(epochs).astype(np.float32)
    labels = np.array(labels, dtype=int)

    if epochs.shape[2] != target_n_times:
        epochs_res = np.zeros((epochs.shape[0], epochs.shape[1], target_n_times), dtype=np.float32)
        for t in range(epochs.shape[0]):
            for ch in range(epochs.shape[1]):
                epochs_res[t, ch, :] = resample(epochs[t, ch, :], target_n_times)
        epochs = epochs_res

    return epochs, labels

In [5]:
all_epochs = []
all_labels = []

for mf in mat_files:
    mat = loadmat(mf, squeeze_me=True, struct_as_record=False)
    data_arr = mat.get("data", None)

    if data_arr is None:
        continue

    for elem in data_arr.flat:
        epochs, labels = extract_epochs(elem)
        if epochs is None:
            continue

        all_epochs.append(epochs)
        all_labels.append(labels)

if len(all_epochs) == 0:
    raise RuntimeError("No BNCI epochs extracted.")

X_bnci = np.concatenate(all_epochs, axis=0)
y_bnci = np.concatenate(all_labels, axis=0)

print("Final BNCI shape:", X_bnci.shape)
print("Label distribution:", dict(zip(*np.unique(y_bnci, return_counts=True))))

Final BNCI shape: (640, 25, 561)
Label distribution: {np.int64(0): np.int64(160), np.int64(1): np.int64(160), np.int64(2): np.int64(160), np.int64(3): np.int64(160)}


In [6]:
meta = {
    "sfreq": 250.0,
    "dataset": "BNCI",
    "n_channels": X_bnci.shape[1],
    "n_times": X_bnci.shape[2],
    "label_map": global_label_map
}

np.savez_compressed(
    OUT_PATH,
    X=X_bnci.astype(np.float32),
    y=y_bnci.astype(int),
    meta=meta
)

print("Saved BNCI preprocessed to:", OUT_PATH)

Saved BNCI preprocessed to: C:\Users\roger\Desktop\vs_code\eeg_representation_geometry\datasets\bnci_dataset\processed\preprocessed_BNCI.npz
