In [2]:
# BNCI .mat auto-inspect + epoch extractor + merge with existing preprocessed.npz
# Paste into a NEW notebook. Requires: numpy, scipy, mne
import os, glob, textwrap
import numpy as np
from scipy.io import loadmat
from scipy.signal import resample
import mne, pprint

# ---------- USER: point to folder ----------
BNCI_ROOT = r"C:\Users\roger\Desktop\vs_code\brain_intent_decoding\BNCI_folder"  # <<-- change to your BNCI folder path
PREPRO_PATH = "preprocessed.npz"   # your existing EEGBCI preprocessed file
OUT_PATH = "preprocessed_combined.npz"
# epoch time window (seconds) relative to event onset -- we will adapt epoch length to baseline epoch length
TMIN = 0.0
TMAX = 3.0

# ---------- helper utilities ----------
def list_mat_keys(path):
    try:
        m = loadmat(path, squeeze_me=True, struct_as_record=False)
        return sorted(list(k for k in m.keys() if not k.startswith("__")))
    except Exception as e:
        return f"ERR: {e}"

def safe_get(mat, keys):
    """Try a list of candidate keys and return first present, else None"""
    for k in keys:
        if k in mat:
            return mat[k]
    return None

def struct_field(obj, field):
    # helper to try to access MATLAB struct-like field robustly
    try:
        return getattr(obj, field)
    except Exception:
        try:
            return obj[field]
        except Exception:
            return None

def try_infer_eeg_and_markers(mat_dict):
    """
    Attempts to infer EEG time-series array and marker structure from a loaded .mat dict.
    Returns (data, sfreq, markers) where:
      - data is numpy array shape (n_channels, n_samples)
      - sfreq is float (Hz) or None
      - markers is dict {code: list_of_sample_indices} or dict with keys 'pos' and 'y' as arrays
    """
    # Common candidates
    data_candidates = ['cnt', 'data', 'EEG', 'X', 'signal', 'cnt_x']  # try these in order
    mrk_candidates  = ['mrk', 'marker', 'events', 'event', 'trial', 'pos', 'eventpos']
    sfreq_candidates = ['fs', 'srate', 'sr', 'nfo', 'nfo_fs']

    data = None
    for k in data_candidates:
        if k in mat_dict:
            data = mat_dict[k]
            break
    # If data is a struct, try to extract field 'x' or 'cnt'
    if data is not None and (not isinstance(data, np.ndarray)):
        # sometimes data is MATLAB object with .x or .data
        for fld in ('x', 'X', 'data', 'cnt'):
            try:
                val = getattr(data, fld)
                if isinstance(val, np.ndarray):
                    data = val; break
            except Exception:
                pass

    # Now markers detection
    markers = {}
    # mrk struct with pos & y (common in BNCI)
    if 'mrk' in mat_dict:
        mrk = mat_dict['mrk']
        pos = struct_field(mrk, 'pos') or struct_field(mrk, 'onset') or safe_get(mrk.__dict__ if hasattr(mrk,'__dict__') else mrk, ['pos','onset'])
        y   = struct_field(mrk, 'y')   or struct_field(mrk, 'classlabel') or safe_get(mrk.__dict__ if hasattr(mrk,'__dict__') else mrk, ['y','classlabel'])
        if pos is not None and y is not None:
            # try to build dict of code->samples
            try:
                pos = np.array(pos).astype(int).ravel()
                y_arr = np.array(y).ravel()
                for p,lab in zip(pos, y_arr):
                    markers.setdefault(int(lab), []).append(int(p))
                return np.array(data), None, markers
            except Exception:
                pass

    # try event arrays directly
    for k in ('pos','eventpos','events','event'):
        if k in mat_dict:
            pos = mat_dict[k]
            if isinstance(pos, np.ndarray) and pos.size>0:
                # if there's also labels array
                y_candidates = ['y','classlabel','label','codes']
                y_val = None
                for yc in y_candidates:
                    if yc in mat_dict:
                        y_val = mat_dict[yc]; break
                if y_val is not None:
                    pos = np.array(pos).astype(int).ravel()
                    y_arr = np.array(y_val).ravel()
                    for p,lab in zip(pos, y_arr):
                        markers.setdefault(int(lab), []).append(int(p))
                    return np.array(data), None, markers

    # fallback: some BNCI .mat have 'trial' list where each trial contains data and class
    if 'trial' in mat_dict:
        trials = mat_dict['trial']
        # try to parse trials (list-like)
        try:
            samples = []
            labels = []
            for t in trials:
                if hasattr(t, 'data'):
                    samples.append(np.array(t.data))
                elif isinstance(t, np.ndarray) and t.ndim==2:
                    samples.append(t)
                # labels
                lab = struct_field(t, 'class') or struct_field(t,'label')
                if lab is not None:
                    labels.append(int(lab))
            if len(samples)>0 and len(labels)==len(samples):
                # stitch together and compute marker positions
                # produce continuous concatenation (not always correct) - return as fallback
                data = np.concatenate(samples, axis=1)
                pos = []
                cur = 0
                for s in samples:
                    pos.append(cur); cur += s.shape[1]
                for p,l in zip(pos, labels):
                    markers.setdefault(int(l), []).append(int(p))
                return np.array(data), None, markers
        except Exception:
            pass

    # final fallback: can't parse
    return (np.array(data) if data is not None else None), None, None

# ---------- Inspect .mat files in BNCI_ROOT ----------
mat_files = sorted(glob.glob(os.path.join(BNCI_ROOT, "*.mat")))
if len(mat_files)==0:
    # also look recursively
    mat_files = sorted(glob.glob(os.path.join(BNCI_ROOT, "**", "*.mat"), recursive=True))

print(f"Found {len(mat_files)} .mat files (showing up to 30):")
for mf in mat_files[:30]:
    print(" -", os.path.basename(mf))
print("\nInspecting a few files to show keys inside (first 6):\n")
for mf in mat_files[:6]:
    print("FILE:", os.path.basename(mf))
    try:
        keys = list_mat_keys(mf)
        print("  keys:", keys)
    except Exception as e:
        print("  failed to read keys:", e)
    print("-"*40)

# ---------- Load baseline preprocessed to get shape & sfreq ----------
if not os.path.exists(PREPRO_PATH):
    raise FileNotFoundError(f"Baseline preprocessed file '{PREPRO_PATH}' not found. Save it first.")
d = np.load(PREPRO_PATH, allow_pickle=True)
X0 = d['X']; y0 = d['y'].astype(int)
meta0 = {}
if 'meta' in d:
    meta_raw = d['meta']
    try:
        meta0 = meta_raw.item() if meta_raw.shape==() else dict(meta_raw)
    except Exception:
        meta0 = {}
n_chans = X0.shape[1]; n_times_target = X0.shape[2]
sfreq_target = float(meta0.get('sfreq', 250.0))
epoch_duration = n_times_target / sfreq_target
print("\nBaseline epoch shape:", X0.shape, "sfreq_target:", sfreq_target, "epoch_duration(s):", epoch_duration)

# ---------- Try quick extraction on each .mat and report success metrics ----------
extracted = []
for mf in mat_files:
    print("\n=== Trying:", os.path.basename(mf))
    try:
        mat = loadmat(mf, squeeze_me=True, struct_as_record=False)
    except Exception as e:
        print(" loadmat failed:", e); continue
    data, sfreq_guess, markers = try_infer_eeg_and_markers(mat)
    if data is None:
        print("  Could not find continuous EEG array in this .mat (tried common keys). Keys:", list(mat.keys())[:30])
        continue
    print("  Found EEG array with shape:", np.shape(data))
    if markers is None:
        print("  Could not find marker positions in this file automatically.")
        # show keys that might contain markers to help you set EVENT_ID_MAP later
        possible = [k for k in mat.keys() if any(sub in k.lower() for sub in ['mrk','evt','event','pos','trial','y','label'])]
        print("  Possible keys for markers:", possible)
        continue

    # validate channel count match or not
    if data.ndim==2:
        ch, samples = data.shape
    elif data.ndim==1:
        ch = 1; samples = data.shape[0]
        data = data.reshape(1, -1)
    else:
        print("  Unexpected data ndim:", data.ndim); continue

    print("  marker codes found:", sorted(markers.keys())[:20], "counts:", {k: len(v) for k,v in markers.items()})
    if ch != n_chans:
        print(f"  Channel count mismatch: file has {ch} ch but baseline uses {n_chans} ch.")
        # attempt to find channel names in mat to align (best-effort)
        ch_names = None
        for cand in ('clab','chan','labels','label','chans','channel'):
            if cand in mat:
                ch_names = mat[cand]
                break
        if ch_names is not None:
            print("   Found candidate channel names in file:", getattr(ch_names, 'shape', 'ok'))
        else:
            print("   No channel name info found; we'll skip auto-extraction for this file.")
            continue

    # if channels match: compute epochs from markers (resample if needed)
    # convert data to float32 and ensure shape chan x samples
    data = np.array(data, dtype=np.float32)
    # determine file sampling freq if present
    file_sfreq = None
    for sfk in ('fs','srate','sr','sfreq'):
        if sfk in mat:
            file_sfreq = float(mat[sfk])
            break
    if file_sfreq is None and sfreq_guess is not None:
        file_sfreq = sfreq_guess
    if file_sfreq is None:
        file_sfreq = sfreq_target  # fallback
    # resample if file_sfreq != sfreq_target
    if abs(file_sfreq - sfreq_target) > 1e-3:
        # resample each channel to target
        n_target = int(round(data.shape[1] * (sfreq_target / file_sfreq)))
        data = resample(data, n_target, axis=1).astype(np.float32)
        print(f"  Resampled from {file_sfreq}Hz -> {sfreq_target}Hz; new samples {data.shape[1]}")

    # compute epoch sample window
    tmin_samps = int(round(TMIN * sfreq_target))
    tmax_samps = int(round((TMIN + epoch_duration) * sfreq_target))
    n_times = tmax_samps - tmin_samps

    # for each marker code, extract epochs
    epochs_list = []
    labels_list = []
    for code, pos_list in markers.items():
        for p in pos_list:
            st = int(p + tmin_samps)
            ed = int(p + tmax_samps)
            if st < 0 or ed > data.shape[1]:
                continue
            seg = data[:, st:ed]
            if seg.shape[1] != n_times:
                continue
            epochs_list.append(seg)
            labels_list.append(int(code))
    if len(epochs_list)==0:
        print("  No usable epochs extracted (markers may be outside bounds).")
        continue
    epochs_arr = np.stack(epochs_list).astype(np.float32)  # shape (n_epochs_file, ch, n_times)
    print(f"  Extracted {epochs_arr.shape[0]} epochs shape {epochs_arr.shape}")
    extracted.append((mf, epochs_arr, np.array(labels_list, dtype=int)))

# ---------- Summary and next actions ----------
print("\n\n=== Extraction summary ===")
total_ex = sum(e[1].shape[0] for e in extracted)
print("Files with successful extraction:", len(extracted), "total extracted epochs:", total_ex)
for f, arr, labs in extracted:
    print(" -", os.path.basename(f), "->", arr.shape, "labels:", dict(zip(*np.unique(labs, return_counts=True))))

if len(extracted)==0:
    print("\nNo files were auto-extracted. Inspect the keys printed above for a representative .mat and tell me which key contains EEG data and which contains event positions/labels (I can then give a short mapping).")
else:
    # OPTIONAL: combine extracted into a single X_BNCI and y_BNCI
    X_bnci = np.concatenate([arr for (_,arr,_) in extracted], axis=0)
    y_bnci = np.concatenate([labs for (_,_,labs) in extracted], axis=0)
    print("\nCombined BNCI extracted shape:", X_bnci.shape, y_bnci.shape)
    # NOTE: BNCI labels may be non-zero-indexed (e.g., 1/2) -> rebase to 0..C-1
    y_bnci = y_bnci - y_bnci.min()
    print("BNCI label distribution after rebase:", dict(zip(*np.unique(y_bnci, return_counts=True))))
    # Merge with baseline preprocessed
    X_comb = np.concatenate([X0, X_bnci], axis=0)
    y_comb = np.concatenate([y0 - y0.min(), y_bnci], axis=0)  # ensure baseline labels are zero-indexed
    print("Merged shape:", X_comb.shape, y_comb.shape, "saving to:", OUT_PATH)
    meta_comb = {'sfreq': sfreq_target}
    np.savez_compressed(OUT_PATH, X=X_comb.astype(np.float32), y=y_comb.astype(int), meta=meta_comb)
    print("Saved merged dataset to", OUT_PATH)


Found 18 .mat files (showing up to 30):
 - A01E.mat
 - A01T.mat
 - A02E.mat
 - A02T.mat
 - A03E.mat
 - A03T.mat
 - A04E.mat
 - A04T.mat
 - A05E.mat
 - A05T.mat
 - A06E.mat
 - A06T.mat
 - A07E.mat
 - A07T.mat
 - A08E.mat
 - A08T.mat
 - A09E.mat
 - A09T.mat

Inspecting a few files to show keys inside (first 6):

FILE: A01E.mat
  keys: ['data']
----------------------------------------
FILE: A01T.mat
  keys: ['data']
----------------------------------------
FILE: A02E.mat
  keys: ['data']
----------------------------------------
FILE: A02T.mat
  keys: ['data']
----------------------------------------
FILE: A03E.mat
  keys: ['data']
----------------------------------------
FILE: A03T.mat
  keys: ['data']
----------------------------------------

Baseline epoch shape: (30, 64, 561) sfreq_target: 250.0 epoch_duration(s): 2.244

=== Trying: A01E.mat
  Found EEG array with shape: (9,)
  Could not find marker positions in this file automatically.
  Possible keys for markers: []

=== Trying: A01T

In [4]:
# Diagnostic: inspect structure inside first BNCI .mat file (A01E.mat)
from scipy.io import loadmat
import numpy as np, os, glob, pprint

BNCI_ROOT = r"C:\Users\roger\Desktop\vs_code\brain_intent_decoding\BNCI_folder"  # adjust if needed
mat_files = sorted(glob.glob(os.path.join(BNCI_ROOT, "*.mat")))
if len(mat_files) == 0:
    mat_files = sorted(glob.glob(os.path.join(BNCI_ROOT, "**", "*.mat"), recursive=True))
print("Found", len(mat_files), "mat files. Using first:", os.path.basename(mat_files[0]))

mf = mat_files[0]
print("Loading:", mf)
mat = loadmat(mf, squeeze_me=True, struct_as_record=False)

print("\nTop-level keys in .mat:")
top_keys = [k for k in mat.keys() if not k.startswith("__")]
print(top_keys)

# Show mat['data'] type and a small preview
if 'data' in mat:
    d = mat['data']
    print("\nmat['data'] type:", type(d))
    try:
        print("ndim:", getattr(d, 'ndim', None), "shape:", getattr(d,'shape', None))
    except Exception:
        pass
    # if numeric array, show min/max and dtype
    if isinstance(d, np.ndarray) and np.issubdtype(d.dtype, np.number):
        print("numeric array info: dtype=", d.dtype, "shape=", d.shape,
              "min/max:", np.min(d), np.max(d))
    else:
        # if struct-like object, print its attributes / fields
        print("\nInspecting attributes/fields of mat['data'] (first-level):")
        try:
            # Try dict-like access
            if hasattr(d, '__dict__'):
                fields = list(d.__dict__.keys())
            else:
                # for numpy.object arrays or matlab structs, use dir() and filter
                fields = [a for a in dir(d) if not a.startswith('_')][:200]
            print(fields)
        except Exception as e:
            print("  (couldn't enumerate fields):", e)

# Try to find likely marker/label fields anywhere in the mat
candidates = ['mrk','mrk_pos','mrk_y','y','pos','classlabel','trial','event','events','eventpos','label','labels','markers']
found = {}
for k in top_keys:
    if k.lower() in candidates:
        found[k] = mat[k]
    # also inspect contents if struct-like
    val = mat[k]
    try:
        # if struct with attributes
        attrs = []
        if hasattr(val, '__dict__'):
            attrs = list(val.__dict__.keys())
        elif isinstance(val, np.ndarray) and val.dtype == object:
            attrs = ['object-array']
        else:
            # show shape/type
            attrs = [f"type={type(val)}, shape={getattr(val,'shape',None)}"]
        if any(c in ''.join(attrs).lower() for c in candidates):
            found[k] = val
    except Exception:
        pass

print("\nCandidate marker/label keys found at top-level (possibly):")
if len(found)==0:
    print("  none discovered automatically. Below is a compact dump of a few keys to help us map:")
    for k in top_keys[:30]:
        v = mat[k]
        try:
            desc = f"type={type(v)}, shape={getattr(v,'shape',None)}"
        except:
            desc = str(type(v))
        print(f"  {k}: {desc}")
else:
    for k,v in found.items():
        print("  ", k, "->", type(v), getattr(v,'shape',None))
        # if small numeric arrays, print contents
        if isinstance(v, np.ndarray) and v.size < 50:
            print("    contents:", v)
        elif hasattr(v, '__dict__'):
            print("    fields:", list(v.__dict__.keys()))

print("\nIf nothing obvious, paste the output here (the top-level keys list and the preview).")


Found 18 mat files. Using first: A01E.mat
Loading: C:\Users\roger\Desktop\vs_code\brain_intent_decoding\BNCI_folder\A01E.mat

Top-level keys in .mat:
['data']

mat['data'] type: <class 'numpy.ndarray'>
ndim: 1 shape: (9,)

Inspecting attributes/fields of mat['data'] (first-level):
['T', 'all', 'any', 'argmax', 'argmin', 'argpartition', 'argsort', 'astype', 'base', 'byteswap', 'choose', 'clip', 'compress', 'conj', 'conjugate', 'copy', 'ctypes', 'cumprod', 'cumsum', 'data', 'device', 'diagonal', 'dot', 'dtype', 'dump', 'dumps', 'fill', 'flags', 'flat', 'flatten', 'getfield', 'imag', 'item', 'itemset', 'itemsize', 'mT', 'max', 'mean', 'min', 'nbytes', 'ndim', 'newbyteorder', 'nonzero', 'partition', 'prod', 'ptp', 'put', 'ravel', 'real', 'repeat', 'reshape', 'resize', 'round', 'searchsorted', 'setfield', 'setflags', 'shape', 'size', 'sort', 'squeeze', 'std', 'strides', 'sum', 'swapaxes', 'take', 'to_device', 'tobytes', 'tofile', 'tolist', 'tostring', 'trace', 'transpose', 'var', 'view']

C

In [6]:
# Inspect internal fields of BNCI .mat 'data' entries (run this cell)
from scipy.io import loadmat
import numpy as np, glob, os, pprint

BNCI_ROOT = r"C:\Users\roger\Desktop\vs_code\brain_intent_decoding\BNCI_folder"  # adjust if needed
mat_files = sorted(glob.glob(os.path.join(BNCI_ROOT, "*.mat")))
if len(mat_files) == 0:
    mat_files = sorted(glob.glob(os.path.join(BNCI_ROOT, "**", "*.mat"), recursive=True))
print("Using first file:", mat_files[0])

mf = mat_files[0]
mat = loadmat(mf, squeeze_me=True, struct_as_record=False)
print("Top-level keys:", [k for k in mat.keys() if not k.startswith("__")])

data_arr = mat.get('data', None)
if data_arr is None:
    raise RuntimeError("No 'data' key found in .mat")

print("\nType of mat['data']:", type(data_arr), "shape:", getattr(data_arr, 'shape', None))
# If it's a 1-D array of mat_struct, show info for first element
if isinstance(data_arr, np.ndarray) and data_arr.size>0:
    first = data_arr.flat[0]
    print("\nFirst element type:", type(first))
    # list possible field names
    fields = []
    # mat_struct from scipy typically exposes attributes via .__dict__ or use dir()
    try:
        if hasattr(first, '__dict__') and isinstance(first.__dict__, dict) and len(first.__dict__)>0:
            fields = list(first.__dict__.keys())
        else:
            # fallback - use dir and filter
            fields = [a for a in dir(first) if not a.startswith('_')][:200]
    except Exception as e:
        fields = [a for a in dir(first) if not a.startswith('_')][:200]
    print("\nFields found on first element (count={}):".format(len(fields)))
    pprint.pprint(fields)

    # For each field, try to print small summary (type, shape/len, a small sample)
    print("\nField summaries (first element):")
    for f in fields:
        try:
            val = getattr(first, f)
        except Exception:
            try:
                val = first[f]
            except Exception:
                val = None
        if val is None:
            print(f" - {f}: <unreadable>")
            continue
        t = type(val)
        shape = getattr(val, 'shape', None)
        # basic numeric arrays: print dtype/min/max/first elements
        if isinstance(val, np.ndarray) and np.issubdtype(val.dtype, np.number):
            s = f"dtype={val.dtype}, shape={val.shape}"
            try:
                mn, mx = float(np.min(val)), float(np.max(val))
                sample = val.ravel()[:8].tolist()
                print(f" - {f}: {s}, min/max={mn:.3g}/{mx:.3g}, sample={sample}")
            except Exception:
                print(f" - {f}: {s}, sample unavailable")
        elif isinstance(val, (list, tuple)):
            print(f" - {f}: list/tuple len={len(val)}; sample types/distinct={set(type(x) for x in val) if len(val)<=10 else 'varies'}")
        else:
            # some fields are nested mat_structs; print attributes if present
            try:
                subfields = list(val.__dict__.keys()) if hasattr(val, '__dict__') else None
            except Exception:
                subfields = None
            if subfields:
                print(f" - {f}: matstruct with fields {subfields}")
            else:
                # fallback summary
                desc = getattr(val, 'shape', None) or str(type(val))
                print(f" - {f}: {desc}")

    # Now produce presence summary across all elements
    print("\n\nPresence summary of these fields across all elements in mat['data']:")
    all_fields = set(fields)
    for elem in data_arr.flat:
        try:
            if hasattr(elem, '__dict__') and isinstance(elem.__dict__, dict):
                all_fields.update(elem.__dict__.keys())
            else:
                all_fields.update([a for a in dir(elem) if not a.startswith('_')][:200])
        except Exception:
            pass
    all_fields = sorted(list(all_fields))
    summary = {}
    for f in all_fields:
        cnt = 0
        vals = []
        for elem in data_arr.flat:
            try:
                v = getattr(elem, f, None)
            except Exception:
                try:
                    v = elem[f]
                except Exception:
                    v = None
            if v is not None:
                cnt += 1
                # collect small shape/type info
                try:
                    shape = getattr(v, 'shape', None)
                    t = type(v)
                    vals.append((t, shape))
                except Exception:
                    vals.append((type(v), None))
        summary[f] = (cnt, vals[:3])
    # print compact
    for f in all_fields:
        c, sample_info = summary[f]
        if c>0:
            print(f" - {f}: present in {c}/{data_arr.size} elements; examples: {sample_info}")
else:
    print("mat['data'] is not an ndarray or empty; value:", data_arr)

print("\n\nIf you paste the output above, I'll map the exact fields to 'EEG' and 'markers' and give extraction code.")


Using first file: C:\Users\roger\Desktop\vs_code\brain_intent_decoding\BNCI_folder\A01E.mat
Top-level keys: ['data']

Type of mat['data']: <class 'numpy.ndarray'> shape: (9,)

First element type: <class 'scipy.io.matlab._mio5_params.mat_struct'>

Fields found on first element (count=9):
['_fieldnames',
 'X',
 'trial',
 'y',
 'fs',
 'classes',
 'artifacts',
 'gender',
 'age']

Field summaries (first element):
 - _fieldnames: list/tuple len=8; sample types/distinct={<class 'str'>}
 - X: dtype=float64, shape=(34291, 25), min/max=-88.9/136, sample=[11.23046875, -27.24609375, 6.103515625, 4.8828125, 2.05078125, -2.63671875, 3.02734375, 3.857421875]
 - trial: dtype=uint8, shape=(0,), sample unavailable
 - y: dtype=uint8, shape=(0,), sample unavailable
 - fs: <class 'int'>
 - classes: (4,)
 - artifacts: dtype=uint8, shape=(0,), sample unavailable
 - gender: <class 'str'>
 - age: <class 'int'>


Presence summary of these fields across all elements in mat['data']:
 - X: present in 9/9 elements;

In [8]:
# Small diagnostic: print the useful fields inside each element of mat['data']
from scipy.io import loadmat
import glob, os, numpy as np, pprint

BNCI_ROOT = r"C:\Users\roger\Desktop\vs_code\brain_intent_decoding\BNCI_folder"  # update if needed
mat_files = sorted(glob.glob(os.path.join(BNCI_ROOT, "*.mat")))
mf = mat_files[0]
print("Inspecting:", mf)
mat = loadmat(mf, squeeze_me=True, struct_as_record=False)
data_arr = mat['data']
print("data array shape:", getattr(data_arr,'shape',None))

def safe_attr(obj, name):
    try:
        return getattr(obj, name)
    except Exception:
        try:
            return obj[name]
        except Exception:
            return None

# Print full details for the first element
first = data_arr.flat[0]
print("\n--- FIRST ELEMENT DETAILS ---")
for fld in ['_fieldnames', 'X', 'fs', 'trial', 'y', 'classes', 'age', 'gender', 'artifacts']:
    v = safe_attr(first, fld)
    print(f"\nField: {fld}")
    if v is None:
        print("  <missing>")
        continue
    print("  type:", type(v))
    try:
        print("  shape/type:", getattr(v,'shape', None), getattr(v,'dtype', None))
    except Exception:
        pass
    # small preview
    if isinstance(v, (list, tuple, np.ndarray)) and np.asarray(v).size<=20:
        print("  preview:", np.asarray(v))
    elif isinstance(v, np.ndarray) and np.issubdtype(v.dtype, np.number):
        arr = np.asarray(v)
        print("  preview (first 8):", arr.ravel()[:8])
    else:
        # mat_struct nested - show its attributes
        try:
            sub = v.__dict__ if hasattr(v,'__dict__') else None
            if sub:
                print("  subfields:", list(sub.keys()))
        except Exception:
            pass

# Now summarize presence/len info across all elements
print("\n--- SUMMARY ACROSS ALL ELEMENTS ---")
summary_fields = ['X','fs','trial','y','classes','_fieldnames']
for fld in summary_fields:
    present = 0
    shapes = []
    for elem in data_arr.flat:
        val = safe_attr(elem, fld)
        if val is not None:
            present += 1
            try:
                shapes.append(getattr(val,'shape', None))
            except Exception:
                shapes.append(type(val))
    print(f"{fld}: present in {present}/{data_arr.size} elements; sample shapes/types: {shapes[:5]}")

# Print channel names if available in _fieldnames of first element
fn = safe_attr(first, '_fieldnames')
if fn is not None:
    print("\n_channel names from _fieldnames (first element):")
    try:
        print(np.asarray(fn))
    except Exception:
        print(fn)

print("\nDone. Paste the entire output here and I'll provide the exact extraction code (epochs + labels) to merge BNCI into your preprocessed dataset.")


Inspecting: C:\Users\roger\Desktop\vs_code\brain_intent_decoding\BNCI_folder\A01E.mat
data array shape: (9,)

--- FIRST ELEMENT DETAILS ---

Field: _fieldnames
  type: <class 'list'>
  shape/type: None None
  preview: ['X' 'trial' 'y' 'fs' 'classes' 'artifacts' 'gender' 'age']

Field: X
  type: <class 'numpy.ndarray'>
  shape/type: (34291, 25) float64
  preview (first 8): [ 11.23046875 -27.24609375   6.10351562   4.8828125    2.05078125
  -2.63671875   3.02734375   3.85742188]

Field: fs
  type: <class 'int'>
  shape/type: None None

Field: trial
  type: <class 'numpy.ndarray'>
  shape/type: (0,) uint8
  preview: []

Field: y
  type: <class 'numpy.ndarray'>
  shape/type: (0,) uint8
  preview: []

Field: classes
  type: <class 'numpy.ndarray'>
  shape/type: (4,) object
  preview: ['left hand' 'right hand' 'feet' 'tongue']

Field: age
  type: <class 'int'>
  shape/type: None None

Field: gender
  type: <class 'str'>
  shape/type: None None

Field: artifacts
  type: <class 'numpy.ndarray'

In [11]:
# ===== BNCI extraction cell (handles trial-less 'classes' case) =====
# Paste into your advanced notebook. Requires: numpy, scipy, scipy.io, mne (optional), tqdm
import os, glob, numpy as np
from scipy.io import loadmat
from scipy.signal import resample
from tqdm import tqdm

# ---------- USER SETTINGS ----------
BNCI_ROOT = r"C:\Users\roger\Desktop\vs_code\brain_intent_decoding\BNCI_folder"  # update if needed
BASELINE_PREPRO = "preprocessed.npz"     # your baseline (EEGBCI) preprocessed file
OUT_BNCI = "preprocessed_BNCI.npz"       # will be written
# If baseline epoch length differs from BNCI trial length, this controls how we trim/resample
# We'll infer target epoch length from BASELINE_PREPRO automatically below.

# ---------- helpers ----------
def safe_getattr(o, name):
    try:
        return getattr(o, name)
    except Exception:
        try:
            return o[name]
        except Exception:
            return None

def extract_epochs_from_element(elem, target_n_times, target_sfreq):
    """
    elem: mat_struct element -> we expect elem.X (n_samples x n_ch), elem.fs (scalar), elem.classes or elem.trial/elem.y
    Returns: epochs (n_trials, n_ch, target_n_times) and labels (n_trials,)
    """
    X = safe_getattr(elem, "X")  # X shape (n_samples, n_ch) in this .mat format
    if X is None:
        raise ValueError("No X field found in element")
    X = np.array(X, dtype=np.float32)
    # note: in file X is (n_samples, n_channels) -> we will convert to (n_epochs, n_ch, n_times) later
    file_fs = safe_getattr(elem, "fs") or None
    classes = safe_getattr(elem, "classes")
    trial = safe_getattr(elem, "trial")
    y = safe_getattr(elem, "y")

    n_samples, n_ch = X.shape
    # Case A: trial / y contain onsets or trial-wise arrays (rare here) - handle if present and non-empty
    if isinstance(trial, (list, tuple, np.ndarray)) and np.asarray(trial).size > 0 and isinstance(y, (list, np.ndarray)) and np.asarray(y).size>0:
        # attempt to interpret trial as start indices or list of arrays
        try:
            trial_arr = np.asarray(trial)
            # if trial is list of arrays each trial data: stack them
            if trial_arr.ndim == 1 and trial_arr.size > 0 and np.asarray(trial_arr[0]).ndim == 2:
                epochs = [np.array(t, dtype=np.float32) for t in trial_arr]
                labels = np.asarray(y).astype(int).ravel()
            else:
                # otherwise we don't have direct per-trial arrays here -> fallback to classes below
                epochs = None; labels = None
        except Exception:
            epochs = None; labels = None
    else:
        epochs = None; labels = None

    # Case B: use classes array and split X sequentially into n_trials segments.
    if epochs is None:
        if classes is None:
            # try y as labels (possible)
            if y is not None and np.asarray(y).size>0:
                classes = np.asarray(y).ravel()
        if classes is None:
            # cannot determine trials
            return None, None
        classes = np.asarray(classes).ravel()
        n_trials = len(classes)
        if n_trials <= 0:
            return None, None
        # compute samples per trial (must be integer)
        if n_samples % n_trials != 0:
            # if not divisible, try rounding segments of approx equal size
            samples_per_trial = int(np.floor(n_samples / n_trials))
        else:
            samples_per_trial = n_samples // n_trials
        # extract sequential segments
        epochs = []
        labels = []
        for i in range(n_trials):
            st = i * samples_per_trial
            ed = st + samples_per_trial
            if ed > n_samples:
                break
            seg = X[st:ed, :]  # shape (samples_per_trial, n_ch)
            # transpose to (n_ch, n_times)
            seg = seg.T.astype(np.float32)
            epochs.append(seg)
            labels.append(int(classes[i]))
        epochs = np.stack(epochs, axis=0)  # (n_trials, n_ch, n_times_file)
        labels = np.array(labels, dtype=int)

    # Now we have epochs shape (n_trials, n_ch, n_times_file). We need to resample in time to target_n_times
    n_trials, n_ch, n_times_file = epochs.shape
    if file_fs is None:
        file_fs = float(safe_getattr(elem, "fs") or target_sfreq)
    # If sample count differs from target_n_times, resample each epoch in time axis
    if n_times_file != target_n_times:
        epochs_res = np.zeros((n_trials, n_ch, target_n_times), dtype=np.float32)
        for ti in range(n_trials):
            for ch in range(n_ch):
                epochs_res[ti, ch, :] = resample(epochs[ti, ch, :], target_n_times)
        epochs = epochs_res
    return epochs, labels

# ---------- infer target epoch length & sfreq from baseline preprocessed ----------

if not os.path.exists(BASELINE_PREPRO):
    raise FileNotFoundError(f"Baseline preprocessed file '{BASELINE_PREPRO}' not found. Save baseline first.")

d = np.load(BASELINE_PREPRO, allow_pickle=True)
X0 = d['X']
y0 = d['y'].astype(int)
meta0 = {}
if 'meta' in d:
    meta_raw = d['meta']
    try:
        meta0 = meta_raw.item() if meta_raw.shape==() else dict(meta_raw)
    except Exception:
        meta0 = {}

target_n_ch = X0.shape[1]
target_n_times = X0.shape[2]
target_sfreq = float(meta0.get('sfreq', 250.0))
print("Baseline epoch shape:", X0.shape, "target_sfreq:", target_sfreq)

# ---------- iterate BNCI .mat files and extract by element ----------
mat_files = sorted(glob.glob(os.path.join(BNCI_ROOT, "*.mat")))
extracted_epochs = []
extracted_labels = []
file_info = []
for mf in mat_files:
    try:
        mat = loadmat(mf, squeeze_me=True, struct_as_record=False)
    except Exception as e:
        print("Failed to load", mf, ":", e); continue
    data_arr = mat.get('data', None)
    if data_arr is None:
        print("No data entry in", mf); continue
    # iterate elements
    for elem in data_arr.flat:
        epochs, labels = extract_epochs_from_element(elem, target_n_times, target_sfreq)
        if epochs is None or labels is None:
            # skip this element
            continue
        # epochs shape (n_trials, n_ch_file, target_n_times)
        n_trials, n_ch_file, _ = epochs.shape
        file_info.append((mf, n_trials, n_ch_file))
        extracted_epochs.append(epochs)
        extracted_labels.append(labels)

# concatenate
if len(extracted_epochs)==0:
    print("No epochs extracted from BNCI files automatically. See file_info:", file_info)
else:
    X_bnci = np.concatenate(extracted_epochs, axis=0)   # (N_epochs_bnci, n_ch_file, target_n_times)
    y_bnci = np.concatenate(extracted_labels, axis=0)
    print("Extracted BNCI epochs shape:", X_bnci.shape, "labels shape:", y_bnci.shape)
    # rebase bnci labels to 0..C-1
    y_bnci = y_bnci - y_bnci.min()
    print("BNCI label counts:", dict(zip(*np.unique(y_bnci, return_counts=True))))

    # Save BNCI-only preprocessed file
    np.savez_compressed(OUT_BNCI, X=X_bnci.astype(np.float32), y=y_bnci.astype(int), meta={'sfreq': target_sfreq})
    print("Saved BNCI preprocessed to", OUT_BNCI)

    # Show channel mismatch with baseline
    print("\nBaseline channels:", target_n_ch, "BNCI file channels:", X_bnci.shape[1])
    if X_bnci.shape[1] != target_n_ch:
        print("Channel count mismatch. To merge with baseline you must either:")
        print("  1) align channel names and pick common subset (best, requires channel names), OR")
        print("  2) retrain on BNCI-only dataset (we saved preprocessed_BNCI.npz), OR")
        print("  3) as quick hack, truncate baseline channels to first N or BNCI channels to first N (not recommended).")
    else:
        print("Channel counts match; you can concatenate with baseline X0 directly and save combined file.")

# =======================================================
# If you want merging (automatic) despite mismatch, uncomment one of the two options below:
# OPTION A (recommended if you have channel name lists): write an alignment routine (I can add it if you paste your baseline channel names)
# OPTION B (quick hack, NOT recommended): truncate baseline or BNCI channels to min(target_n_ch, n_ch_file)
# Example quick-hack merge (uncomment to use):
#
# if len(extracted_epochs)>0:
#     min_ch = min(target_n_ch, X_bnci.shape[1])
#     X0_trunc = X0[:, :min_ch, :]
#     X_bnci_trunc = X_bnci[:, :min_ch, :]
#     X_comb = np.concatenate([X0_trunc, X_bnci_trunc], axis=0)
#     y_comb = np.concatenate([y0 - y0.min(), y_bnci], axis=0)
#     np.savez_compressed("preprocessed_combined_quickhack.npz", X=X_comb.astype(np.float32), y=y_comb.astype(int), meta={'sfreq':target_sfreq})
#     print("Saved quick-hack merged file: preprocessed_combined_quickhack.npz")
#
# =======================================================


Baseline epoch shape: (30, 64, 561) target_sfreq: 250.0


ValueError: invalid literal for int() with base 10: 'left hand'

In [13]:
# ===== Fixed BNCI extraction (handles string class names like "left hand") =====
import os, glob
import numpy as np
from scipy.io import loadmat
from scipy.signal import resample
from tqdm import tqdm

# ---------- USER SETTINGS ----------
BNCI_ROOT = r"C:\Users\roger\Desktop\vs_code\brain_intent_decoding\BNCI_folder"  # update if needed
BASELINE_PREPRO = "preprocessed.npz"     # your baseline (EEGBCI) preprocessed file
OUT_BNCI = "preprocessed_BNCI.npz"       # will be written

# ---------- helpers ----------
def safe_getattr(o, name):
    try:
        return getattr(o, name)
    except Exception:
        try:
            return o[name]
        except Exception:
            return None

def normalize_label(x):
    """Turn a label (str or number) into a normalized string key for mapping."""
    if x is None:
        return None
    if isinstance(x, bytes):
        x = x.decode('utf-8', errors='ignore')
    if isinstance(x, str):
        return x.strip().lower()
    # numeric -> return as string of int
    try:
        return str(int(x))
    except Exception:
        return str(x)

# Global label map across all files (keeps labels consistent)
global_label_map = {}
next_label_id = 0

def map_label_to_int(raw_label):
    global global_label_map, next_label_id
    key = normalize_label(raw_label)
    if key is None:
        return None
    if key not in global_label_map:
        global_label_map[key] = next_label_id
        next_label_id += 1
    return global_label_map[key]

def extract_epochs_from_element(elem, target_n_times, target_sfreq):
    """
    elem: mat_struct element -> expect elem.X (n_samples x n_ch), elem.fs, elem.classes or elem.trial/elem.y
    Returns: epochs (n_trials, n_ch, target_n_times) and labels (n_trials,)
    """
    X = safe_getattr(elem, "X")
    if X is None:
        return None, None
    X = np.array(X, dtype=np.float32)
    file_fs = safe_getattr(elem, "fs") or None
    classes = safe_getattr(elem, "classes")
    trial = safe_getattr(elem, "trial")
    y = safe_getattr(elem, "y")

    n_samples, n_ch = X.shape
    epochs = None; labels = None

    # If trial/y are present and look like per-trial arrays, try to use them
    if isinstance(trial, (list, tuple, np.ndarray)) and np.asarray(trial).size > 0 and isinstance(y, (list, np.ndarray)) and np.asarray(y).size>0:
        try:
            trial_arr = np.asarray(trial)
            if trial_arr.ndim == 1 and trial_arr.size > 0 and np.asarray(trial_arr[0]).ndim == 2:
                # each element is a trial array
                epochs = [np.array(t, dtype=np.float32) for t in trial_arr]
                labels = np.asarray(y).astype(int).ravel()
        except Exception:
            epochs = None; labels = None

    # Fallback: use classes (may be strings)
    if epochs is None:
        if classes is None:
            if y is not None and np.asarray(y).size>0:
                classes = np.asarray(y).ravel()
        if classes is None:
            return None, None
        classes_arr = np.asarray(classes).ravel()
        # If classes are strings (dtype=object or str), map them via normalize->global map
        # If classes are numeric, use them directly but still rebase to 0..C-1 afterwards
        # Build label ints per trial
        label_ints = []
        for lab in classes_arr:
            # if string-like, map via global map
            if isinstance(lab, (str, bytes)) or (isinstance(lab, np.ndarray) and lab.dtype.type is np.str_):
                li = map_label_to_int(lab)
            else:
                # numeric-like (e.g., uint8)
                try:
                    li = int(lab)
                except Exception:
                    li = map_label_to_int(lab)
            label_ints.append(li)
        label_ints = np.array(label_ints, dtype=int)
        # If numeric labels look non-zero-indexed and small, we will rebase later globally.
        n_trials = len(label_ints)
        if n_trials <= 0:
            return None, None
        # split X sequentially into n_trials segments
        if n_samples % n_trials == 0:
            samples_per_trial = n_samples // n_trials
        else:
            samples_per_trial = int(np.floor(n_samples / n_trials))
        trials_list = []
        labels_list = []
        for i in range(n_trials):
            st = i * samples_per_trial
            ed = st + samples_per_trial
            if ed > n_samples: break
            seg = X[st:ed, :].T.astype(np.float32)  # transpose -> (n_ch, n_times_file)
            trials_list.append(seg)
            labels_list.append(label_ints[i])
        if len(trials_list) == 0:
            return None, None
        epochs = np.stack(trials_list, axis=0)  # (n_trials, n_ch, n_times_file)
        labels = np.array(labels_list, dtype=int)

    # Now epochs shape (n_trials, n_ch, n_times_file)
    n_trials, n_ch_file, n_times_file = epochs.shape
    # determine file sampling freq
    file_fs = float(file_fs) if file_fs is not None else target_sfreq
    # resample if needed (in time axis)
    if n_times_file != target_n_times:
        epochs_res = np.zeros((n_trials, n_ch_file, target_n_times), dtype=np.float32)
        for ti in range(n_trials):
            for ch in range(n_ch_file):
                epochs_res[ti, ch, :] = resample(epochs[ti, ch, :], target_n_times)
        epochs = epochs_res

    return epochs, labels

# ---------- infer baseline epoch params ----------
if not os.path.exists(BASELINE_PREPRO):
    raise FileNotFoundError(f"Baseline preprocessed file '{BASELINE_PREPRO}' not found. Save baseline first.")

d = np.load(BASELINE_PREPRO, allow_pickle=True)
X0 = d['X']; y0 = d['y'].astype(int)
meta0 = {}
if 'meta' in d:
    meta_raw = d['meta']
    try:
        meta0 = meta_raw.item() if meta_raw.shape==() else dict(meta_raw)
    except Exception:
        meta0 = {}
target_n_ch = X0.shape[1]
target_n_times = X0.shape[2]
target_sfreq = float(meta0.get('sfreq', 250.0))
print("Baseline epoch shape:", X0.shape, "target_sfreq:", target_sfreq)

# ---------- iterate BNCI .mat files ----------
mat_files = sorted(glob.glob(os.path.join(BNCI_ROOT, "*.mat")))
extracted_epochs = []
extracted_labels = []
file_info = []
for mf in mat_files:
    try:
        mat = loadmat(mf, squeeze_me=True, struct_as_record=False)
    except Exception as e:
        print("Failed to load", mf, ":", e); continue
    data_arr = mat.get('data', None)
    if data_arr is None:
        print("No data entry in", mf); continue
    for elem in data_arr.flat:
        epochs, labels = extract_epochs_from_element(elem, target_n_times, target_sfreq)
        if epochs is None or labels is None:
            continue
        extracted_epochs.append(epochs)
        extracted_labels.append(labels)
        file_info.append((mf, epochs.shape[0], epochs.shape[1]))

# concatenate
if len(extracted_epochs) == 0:
    print("No epochs extracted. file_info:", file_info)
else:
    X_bnci = np.concatenate(extracted_epochs, axis=0)   # (N_epochs_bnci, n_ch_file, target_n_times)
    y_bnci = np.concatenate(extracted_labels, axis=0)
    print("Extracted BNCI epochs shape:", X_bnci.shape, "labels shape:", y_bnci.shape)
    # print and save the global mapping used for textual labels
    print("Global label map (text->int):", global_label_map)
    # Save BNCI-only preprocessed
    np.savez_compressed(OUT_BNCI, X=X_bnci.astype(np.float32), y=y_bnci.astype(int), meta={'sfreq': target_sfreq})
    print("Saved BNCI preprocessed to", OUT_BNCI)
    # channel mismatch info
    print("Baseline channels:", target_n_ch, "BNCI file channels:", X_bnci.shape[1])
    if X_bnci.shape[1] != target_n_ch:
        print("Channel count mismatch detected. To merge safely we should align channel names or pick intersection.")
    else:
        print("Channel counts match; you can concatenate directly.")


Baseline epoch shape: (30, 64, 561) target_sfreq: 250.0
Extracted BNCI epochs shape: (640, 25, 561) labels shape: (640,)
Global label map (text->int): {'left hand': 0, 'right hand': 1, 'feet': 2, 'tongue': 3}
Saved BNCI preprocessed to preprocessed_BNCI.npz
Baseline channels: 64 BNCI file channels: 25
Channel count mismatch detected. To merge safely we should align channel names or pick intersection.


In [15]:
# ===== Channel alignment + merge helper =====
# 1) Tries to extract channel name lists (BNCI .mat and baseline meta)
# 2) If both exist: align by intersection and save preprocessed_combined.npz
# 3) If not: prints options and offers quick-hack truncation (unrecommended)

import os, glob, numpy as np
from scipy.io import loadmat

BASE_PREPRO = "preprocessed.npz"
BNCI_PREPRO = "preprocessed_BNCI.npz"
OUT_COMBINED = "preprocessed_combined.npz"
OUT_QUICK = "preprocessed_combined_quickhack.npz"
BNCI_ROOT = r"C:\Users\roger\Desktop\vs_code\brain_intent_decoding\BNCI_folder"  # update if different

# ---------- load preprocessed files ----------
if not os.path.exists(BASE_PREPRO) or not os.path.exists(BNCI_PREPRO):
    raise FileNotFoundError("Both baseline and BNCI preprocessed files must exist in cwd.")

d0 = np.load(BASE_PREPRO, allow_pickle=True)
X0 = d0['X']; y0 = d0['y'].astype(int); meta0 = {}
if 'meta' in d0:
    mr = d0['meta']; meta0 = mr.item() if hasattr(mr, 'shape') and mr.shape==() else dict(mr)

d1 = np.load(BNCI_PREPRO, allow_pickle=True)
X1 = d1['X']; y1 = d1['y'].astype(int); meta1 = {}
if 'meta' in d1:
    mr = d1['meta']; meta1 = mr.item() if hasattr(mr, 'shape') and mr.shape==() else dict(mr)

print("Baseline shape:", X0.shape, "BNCI shape:", X1.shape)
print("Baseline meta keys:", list(meta0.keys()), "BNCI meta keys:", list(meta1.keys()))

# ---------- attempt to discover BNCI channel names from .mat files ----------
def try_get_bnci_ch_names(mat_folder):
    names = None
    files = sorted(glob.glob(os.path.join(mat_folder, "*.mat")))
    for f in files:
        try:
            m = loadmat(f, squeeze_me=True, struct_as_record=False)
            if 'data' in m:
                data = m['data'].flat[0]  # first element likely representative
                # common fields that might contain channel names: 'clab','chan','labels','label','_fieldnames','clab'
                for cand in ('clab','chan','chanlocs','labels','label','_fieldnames','ch_names'):
                    val = getattr(data, cand, None) if hasattr(data, cand) else (m['data'].flat[0].__dict__.get(cand) if hasattr(m['data'].flat[0], '__dict__') and cand in m['data'].flat[0].__dict__ else None)
                    if val is not None:
                        try:
                            arr = np.array(val)
                            # only accept plausible channel lists (len between 10 and 128)
                            if arr.size > 5 and arr.size < 200:
                                names = [str(x).strip() for x in arr.ravel().tolist()]
                                return names
                        except Exception:
                            pass
        except Exception:
            continue
    return None

bnci_ch_names = try_get_bnci_ch_names(BNCI_ROOT)
baseline_ch_names = None
# try common meta keys
for k in ('ch_names','channels','channel_names','chan_names'):
    if k in meta0:
        baseline_ch_names = list(meta0[k])
        break

print("Discovered BNCI ch names:", bool(bnci_ch_names), "Baseline ch names in meta:", bool(baseline_ch_names))
if bnci_ch_names:
    print("Example BNCI names (first 20):", bnci_ch_names[:20])
if baseline_ch_names:
    print("Example baseline names (first 20):", baseline_ch_names[:20])

# ---------- If both have names, align by intersection ----------
if bnci_ch_names and baseline_ch_names:
    set0 = [n.lower() for n in baseline_ch_names]
    set1 = [n.lower() for n in bnci_ch_names]
    common = [c for c in baseline_ch_names if c.lower() in set1]
    if len(common) == 0:
        print("No overlapping channel names found between datasets despite names existing.")
    else:
        print("Found", len(common), "common channel names. Aligning and saving merged dataset.")
        # reorder baseline and bnci to common ordering
        idx0 = [i for i,n in enumerate(baseline_ch_names) if n in common]
        idx1 = [bnci_ch_names.index(n) for n in common]
        X0_al = X0[:, idx0, :]; X1_al = X1[:, idx1, :]
        y0_adj = y0 - y0.min()
        # concat
        X_comb = np.concatenate([X0_al, X1_al], axis=0)
        y_comb = np.concatenate([y0_adj, y1], axis=0)
        np.savez_compressed(OUT_COMBINED, X=X_comb.astype(np.float32), y=y_comb.astype(int), meta={'sfreq': float(meta0.get('sfreq', meta1.get('sfreq',250.0))), 'ch_names': common})
        print("Saved aligned combined dataset to", OUT_COMBINED, "with shape", X_comb.shape)
        raise SystemExit("Done - merged with aligned channel names. Use OUT_COMBINED for experiments.")

# ---------- If we reach here: no usable channel-name alignment found ----------
print("\nNo automatic channel-name alignment possible. Options:")

print("\nOPTION 1 (RECOMMENDED): Do experiments separately:\n - run benchmarks on BNCI-only (preprocessed_BNCI.npz) and baseline-only (preprocessed.npz).\n   BNCI has 640 epochs (good for deep models); baseline is tiny (30 epochs) - you'll usually train on BNCI and evaluate there.\n   I will provide a BNCI-only benchmark cell next if you want.\n")

print("OPTION 2 (QUICK-HACK, NOT RECOMMENDED): Truncate to min channels and merge anyway.")
print("  This throws away channels from the larger dataset and may hurt performance. Use only to prototype quickly.\n")
print("To create quick-hack merged file (uncomment and run the small block below), it will keep the first min_ch channels from each dataset:")

min_ch = min(X0.shape[1], X1.shape[1])
print(f"  quick-hack min_ch = {min_ch} (baseline has {X0.shape[1]}, BNCI has {X1.shape[1]})")
print("\nQuick-hack merge code (copy-paste to run if you accept the tradeoff):\n")
print(f"""
# QUICK-HACK MERGE - USE IF YOU ACCEPT CHANNEL TRUNCATION
import numpy as np
d0 = np.load("{BASE_PREPRO}", allow_pickle=True); d1 = np.load("{BNCI_PREPRO}", allow_pickle=True)
X0 = d0['X']; y0 = d0['y'].astype(int); X1 = d1['X']; y1 = d1['y'].astype(int)
min_ch = {min_ch}
X0_trunc = X0[:, :min_ch, :]; X1_trunc = X1[:, :min_ch, :]
X_comb = np.concatenate([X0_trunc, X1_trunc], axis=0)
y_comb = np.concatenate([y0 - y0.min(), y1], axis=0)
np.savez_compressed("{OUT_QUICK}", X=X_comb.astype(np.float32), y=y_comb.astype(int), meta={{'sfreq': {float(meta0.get('sfreq', meta1.get('sfreq',250.0)))}}})
print("Saved quick-hack merged to {OUT_QUICK} with shape", X_comb.shape)
""")

print("\nTell me which option you choose:\n - 'separate'  -> I will give BNCI-only benchmark cell next (recommended)\n - 'quickhack' -> I will run the quick-hack merge code for you (you accept channel truncation)\n - OR if you have baseline channel names available, paste them here and I will create a proper channel-alignment merge.")


Baseline shape: (30, 64, 561) BNCI shape: (640, 25, 561)
Baseline meta keys: [] BNCI meta keys: ['sfreq']
Discovered BNCI ch names: True Baseline ch names in meta: False
Example BNCI names (first 20): ['X', 'trial', 'y', 'fs', 'classes', 'artifacts', 'gender', 'age']

No automatic channel-name alignment possible. Options:

OPTION 1 (RECOMMENDED): Do experiments separately:
 - run benchmarks on BNCI-only (preprocessed_BNCI.npz) and baseline-only (preprocessed.npz).
   BNCI has 640 epochs (good for deep models); baseline is tiny (30 epochs) - you'll usually train on BNCI and evaluate there.
   I will provide a BNCI-only benchmark cell next if you want.

OPTION 2 (QUICK-HACK, NOT RECOMMENDED): Truncate to min channels and merge anyway.
  This throws away channels from the larger dataset and may hurt performance. Use only to prototype quickly.

To create quick-hack merged file (uncomment and run the small block below), it will keep the first min_ch channels from each dataset:
  quick-hack 