In [8]:
import os
import numpy as np
import h5py
import pandas as pd
from scipy.signal import resample
from sklearn.utils import shuffle
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

In [9]:
import mne
from mne.preprocessing import ICA
try:
    from mne_icalabel import label_components
except Exception:
    label_components = None

In [10]:
SAMPLE_RATE = 200  # fs
# SAMPLE_LEN = 1.0   # sample seconds
# OVERLAPPING = 0.8  # overlapping seconds
sub_folder_path = str(SAMPLE_RATE) + 'Hz'
sub_folder_path

'200Hz'

In [11]:
## Load participants.tsv file
# root dirNencki-Symfonia EEG
root = "NSERP/"
participants_path = os.path.join(root, "participants.tsv")
participants = pd.read_csv(participants_path, sep="\t")
participants

Unnamed: 0,participant_id,sex,age,medication use,medication type,caffeine uptake,stress,rest,menstrual cycle,UMACL/UWIST Niezadowolony Dissatisfied,...,ARSQ_41,ARSQ_42,ARSQ_43,ARSQ_44,ARSQ_45,ARSQ_46,ARSQ_47,ARSQ_48,ARSQ_49,ARSQ_50
0,sub-01,F,22,1,ethinylestradiolum/ drospirenonum,0,2,4,10.0,4,...,1,2,4,1,2,3,4,4,2,5
1,sub-02,F,21,0,,1,5,3,21.0,3,...,1,1,1,1,1,4,1,3,1,5
2,sub-03,M,27,0,,0,3,5,,3,...,1,1,5,1,1,3,2,2,2,5
3,sub-04,M,26,0,,0,3,3,,3,...,1,1,1,1,1,3,3,3,3,5
4,sub-05,M,25,0,,0,4,1,,3,...,1,1,1,1,4,4,2,2,1,5
5,sub-06,M,26,0,,0,4,3,,3,...,1,1,1,3,2,1,3,1,1,4
6,sub-07,M,25,0,,0,1,3,,2,...,1,2,1,1,1,4,5,5,2,5
7,sub-08,M,33,1,levothyroxinum natricum,1,2,4,,4,...,1,1,4,1,1,2,3,2,2,5
8,sub-09,F,20,0,,0,4,2,20.0,3,...,2,1,1,1,1,3,1,1,1,5
9,sub-10,F,20,0,,0,4,3,16.0,4,...,1,1,1,1,1,4,3,1,2,5


In [12]:
# Test for bad channels, sampling freq and shape
# The data is BrainVision format (Brain Products) not EEGLAB .set/.fdt format

bad_channel_list, sampling_freq_list, data_shape_list = [], [], []

# Loop through each subject folder
for sub in os.listdir(root):
    if 'sub-' in sub:
        sub_path = os.path.join(root, sub, 'eeg')
        if not os.path.exists(sub_path):
            print(f"❌ Missing folder: {sub_path}")
            continue
        files = os.listdir(sub_path)
        # Look for the .vhdr file (BrainVision header file)
        for file in files:
            if file.endswith('.vhdr'):
                file_path = os.path.join(sub_path, file)
                print(f"✅ Reading: {file_path}")
                raw = mne.io.read_raw_brainvision(file_path, preload=True)

                # Collect bad channels
                bad_channel_list.append(raw.info['bads'])
                # Collect sampling frequency
                sampling_freq_list.append(raw.info['sfreq'])
                # Collect EEG data shape
                data_shape_list.append(raw.get_data().shape)


✅ Reading: NSERP/sub-01\eeg\sub-01_task-msit_eeg.vhdr
Extracting parameters from NSERP/sub-01\eeg\sub-01_task-msit_eeg.vhdr...
Setting channel info structure...
Reading 0 ... 1437859  =      0.000 ...  1437.859 secs...
✅ Reading: NSERP/sub-01\eeg\sub-01_task-oddball_eeg.vhdr
Extracting parameters from NSERP/sub-01\eeg\sub-01_task-oddball_eeg.vhdr...
Setting channel info structure...
Reading 0 ... 1371319  =      0.000 ...  1371.319 secs...
✅ Reading: NSERP/sub-01\eeg\sub-01_task-rest_eeg.vhdr
Extracting parameters from NSERP/sub-01\eeg\sub-01_task-rest_eeg.vhdr...
Setting channel info structure...
Reading 0 ... 732699  =      0.000 ...   732.699 secs...
✅ Reading: NSERP/sub-01\eeg\sub-01_task-srt_eeg.vhdr
Extracting parameters from NSERP/sub-01\eeg\sub-01_task-srt_eeg.vhdr...
Setting channel info structure...
Reading 0 ... 428819  =      0.000 ...   428.819 secs...
✅ Reading: NSERP/sub-02\eeg\sub-02_task-msit_eeg.vhdr
Extracting parameters from NSERP/sub-02\eeg\sub-02_task-msit_eeg.vhd

In [13]:
from collections import Counter

print(bad_channel_list)
print(data_shape_list[0])
print("Channel number counter:", Counter(i[0] for i in data_shape_list))
print("Sampling rate counter:", Counter(sampling_freq_list))

[[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []]
(127, 1437860)
Channel number counter: Counter({127: 167})
Sampling rate counter: Counter({1000.0: 167})


In [14]:
# Initialize common channel list
common_channels = []

# Loop through subject folders
for sub in os.listdir(root):
    if 'sub-' in sub:
        sub_path = os.path.join(root, sub, 'eeg/')
        for file in os.listdir(sub_path):
            # Change: look for BrainVision .vhdr files
            if file.endswith('.vhdr'):
                file_path = os.path.join(sub_path, file)
                raw = mne.io.read_raw_brainvision(file_path, preload=True)
                current_channels = set(raw.info['ch_names'])

                if not common_channels:
                    common_channels = current_channels
                else:
                    common_channels &= current_channels  # Intersection

# Convert set to list
common_channels = list(common_channels)
# Print result
print(common_channels)
print("Common channels number: ", len(common_channels))

Extracting parameters from NSERP/sub-01\eeg/sub-01_task-msit_eeg.vhdr...
Setting channel info structure...
Reading 0 ... 1437859  =      0.000 ...  1437.859 secs...
Extracting parameters from NSERP/sub-01\eeg/sub-01_task-oddball_eeg.vhdr...
Setting channel info structure...
Reading 0 ... 1371319  =      0.000 ...  1371.319 secs...
Extracting parameters from NSERP/sub-01\eeg/sub-01_task-rest_eeg.vhdr...
Setting channel info structure...
Reading 0 ... 732699  =      0.000 ...   732.699 secs...
Extracting parameters from NSERP/sub-01\eeg/sub-01_task-srt_eeg.vhdr...
Setting channel info structure...
Reading 0 ... 428819  =      0.000 ...   428.819 secs...
Extracting parameters from NSERP/sub-02\eeg/sub-02_task-msit_eeg.vhdr...
Setting channel info structure...
Reading 0 ... 1308539  =      0.000 ...  1308.539 secs...
Extracting parameters from NSERP/sub-02\eeg/sub-02_task-oddball_eeg.vhdr...
Setting channel info structure...
Reading 0 ... 1345819  =      0.000 ...  1345.819 secs...
Extract

In [41]:
MSIT_feature_path = 'Processed/' + sub_folder_path + '/NSERP-MSIT/Feature'
if not os.path.exists(MSIT_feature_path):
    os.makedirs(MSIT_feature_path)
    
MSIT_label_path = 'Processed/' + sub_folder_path + '/NSERP-MSIT/Label'
if not os.path.exists(MSIT_label_path):
    os.makedirs(MSIT_label_path)
    
ODD_feature_path = 'Processed/' + sub_folder_path + '/NSERP-ODD/Feature'
if not os.path.exists(ODD_feature_path):
    os.makedirs(ODD_feature_path)
    
ODD_label_path = 'Processed/' + sub_folder_path + '/NSERP-ODD/Label'
if not os.path.exists(ODD_label_path):
    os.makedirs(ODD_label_path)

In [47]:
def data_preprocessing(
    raw: mne.io.Raw,
    common_channels: list,
    sample_rate: int = 250,
    notch_freq: float = 60.0,
    l_freq: float = 0.5,
    h_freq: float = 40.0,
    do_bad_interp: bool = True,
    verbose: bool = True,
):
    """
    Preprocessing steps ：
      1) choose common channels and reorder
      2) Set Montage 
      3) 60 Hz Notch（before band pass）
      4) bandpass filter（default 0.5–40 Hz）
      5) interpolate bad channels（if do_bad_interp is True）
      6) re-reference to average
      7) ICA（on 1 Hz high-pass filtered copy）
      8) downsample to 250 Hz
    """

    # Remove 'boundary' or 'segment' events before anything else
    # Events like "boundary" or "new segment" inserted during file conversion are not real trials and must be excluded from annotations before epoching.
    raw.annotations.delete([
        i for i, desc in enumerate(raw.annotations.description)
        if 'boundary' in desc.lower() or 'segment' in desc.lower()
    ])
    if verbose:
        print("✔ Cleaned: Removed 'boundary' or 'segment' annotations")

    # 1. select common channels and reorder to given order
    keep = [ch for ch in common_channels if ch in raw.ch_names]
    raw.pick_channels(keep)
    raw.reorder_channels(keep)
    if verbose:
        print(f"✔ Step 2: Picked common channels ({len(keep)}): {keep}")
        
    # 2. Set Montage
    # List of non-standard channels not recognized by 'standard_1020' montage
    """nonstandard_channels = [
        'FCC4h', 'CPP4h', 'POO10h', 'CPP3h', 'CPP5h', 'AFF1h', 'TTP8h',
        'FCC5h', 'FFT9h', 'PPO2h', 'PPO6h', 'FFC3h', 'PPO9h', 'PPO10h',
        'FFC6h', 'CCP2h', 'FTT7h', 'CCP3h', 'CPP2h', 'FFT7h', 'FFC2h',
        'FCC2h', 'AFF5h', 'TPP9h', 'FFT10h', 'PPO1h', 'POO2', 'FFC4h',
        'FTT10h', 'FTT9h', 'PPO5h', 'POO9h', 'FTT8h', 'OI1h', 'CPP1h',
        'FCC3h', 'FFT8h', 'CCP1h', 'FFC5h', 'TPP7h', 'FCC1h', 'POO1',
        'TTP7h', 'AFF2h', 'OI2h', 'CCP5h', 'TPP10h', 'CCP4h', 'FCC6h',
        'CPP6h', 'AFF6h', 'TPP8h','TP9 LEFT EAR', 'FFC1h', 'CCP6h', 'AFp1', 'AFp2', 'TP10 RIGHT EAR'
    ]"""
    
    nonstandard_channels = [
        'TP9 LEFT EAR', 'TP10 RIGHT EAR', 'O9', 'O10'
    ]

    # Drop non-standard channels (if they exist in the dataset)
    raw.drop_channels([ch for ch in nonstandard_channels if ch in raw.ch_names])

    # Set standard 10-20 montage and ignore any channels that are missing positions
    raw.set_montage('standard_1005')
        
    # 3. Notch
    if notch_freq is not None:
        raw.notch_filter(freqs=[notch_freq], picks="eeg", verbose=False)
        if verbose:
            print(f"✔ Step 3: Notch @ {notch_freq} Hz")
        
    # 4. Bandpass Filter (0.5–40 Hz)
    raw.filter(l_freq=l_freq, h_freq=h_freq, picks="eeg", verbose=False)
    if verbose:
        print(f"✔ Step 4: Band-pass {l_freq}–{h_freq} Hz")
        
    # 5. Interpolate bad channels
    if do_bad_interp and raw.info.get("bads"):
        raw.interpolate_bads(reset_bads=True, verbose=False)
        if verbose:
            print(f"✔ Step 5: Interpolated bads: {raw.info.get('bads', [])}")
    else:
        if verbose:
            print("ℹ Step 5: No bads to interpolate (set raw.info['bads'] first if needed)")
            
    # 6) Re-reference to average
    raw.set_eeg_reference("average", verbose=False)
    if verbose:
        print("✔ Step 6: Average reference")

    print()
    
    # 7) ICA (fit ICLabel on 1 Hz high-pass filtered copy, then apply to original)
    raw_for_ica = raw.copy().filter(l_freq=1.0, h_freq=None, picks="eeg", verbose=False)
    ica = ICA(n_components=None, method="fastica", random_state=97, max_iter="auto")
    ica.fit(raw_for_ica)

    excluded = []
    if 'label_components' in globals() and label_components is not None:
        try:
            ic_labels = label_components(raw_for_ica, ica, method="iclabel")
            labels = ic_labels["labels"]
            probs = ic_labels["y_pred_proba"]
            thresholds = {
                "eye blink": 0.7,
                "muscle artifact": 0.6,
                "heart beat": 0.5,
                "line noise": 0.8,
                "channel noise": 0.9,
            }
            for i, lab in enumerate(labels):
                if lab in thresholds:
                    p = probs[i].max() if probs is not None else 1.0
                    if p >= thresholds[lab]:
                        excluded.append(i)
        except Exception as e:
            if verbose:
                print(f"⚠ ICLabel failed ({e}). Skipping auto exclusion.")
    else:
        if verbose:
            print("ℹ ICLabel not available; fitted ICA but no auto component exclusion.")

    if excluded:
        ica.exclude = sorted(set(excluded))
        raw = ica.apply(raw.copy())
        if verbose:
            print(f"✔ Step 7: ICA applied. Excluded comps: {ica.exclude}")
    else:
        if verbose:
            print("ℹ Step 7: No ICA components excluded.")

    # 8) downsample to 250 Hz
    if raw.info["sfreq"] != sample_rate:
        raw.resample(sample_rate, npad="auto", verbose=False)
    if verbose:
        print(f"✔ Step 8: Resampled to {sample_rate} Hz")
        
    return raw


In [43]:
mapping_dict = {
    # SRT: only 5; 
    # Oddball: 5,6,7 for standard,target,deviant; 
    # MSIT: 5,6,7,8 for F0, FS, 00, S0 according to paper
    5:0, 6:1, 7:2, 8:3
}


def epoch_and_make_xy(
    raw: mne.io.Raw,
    events_tsv_path: str,
    tmin: float = -0.5,
    tmax: float = 1.5,
    baseline=(-0.5, 0),
    task_id: int = 1,
    subject_id: int = 1,
):

    # Read and parse events.tsv
    ev = pd.read_csv(events_tsv_path, sep="\t")
    ev = ev[ev["event_type"].astype(str).str.match(r"^S\s*\d+$")].copy()
    ev["code"] = ev["event_type"].str.extract(r"(\d+)").astype(int)

    # Select stimulus events based on task type
    stim = ev[(ev["code"].isin([5, 6, 7, 8])) & (ev["trial_type"] == "stimulus")].reset_index(drop=True)
    labels = stim["code"].map(mapping_dict).astype(int)
    labels = labels.values

    sfreq = raw.info["sfreq"]
    print(f"Current sampling frequency: {sfreq} Hz")

    # Convert stimulus events to sample indices
    onset_samples = np.round(stim["onset"].values * sfreq).astype(int)

    # Check epoch time boundaries to avoid exceeding raw data limits
    min_sample = int(-tmin * sfreq)
    max_sample = int(len(raw) - tmax * sfreq)
    valid_mask = (onset_samples >= min_sample) & (onset_samples <= max_sample)
    onset_samples = onset_samples[valid_mask]
    labels = labels[valid_mask]
    events = np.c_[onset_samples, np.zeros_like(onset_samples), np.ones_like(onset_samples).astype(int)]

    # Extract epochs
    picks = mne.pick_types(raw.info, eeg=True, eog=False, exclude="bads")
    epochs = mne.Epochs(
        raw, events, event_id=dict(stim=1),
        tmin=tmin, tmax=tmax,
        baseline=baseline, picks=picks,
        proj=False, preload=True, reject=None, verbose=False
    )

    # Get indices of valid trials
    good_idx = epochs.selection  # Map indices back to positions in stim/labels
    labels = labels[good_idx]    # Trim labels to match EEG epochs

    # Get EEG data (N, C, T)
    target_len = int((tmax - tmin) * sfreq)
    data = epochs.get_data()
    if data.shape[-1] > target_len:
        data = data[..., :target_len]
    elif data.shape[-1] < target_len:
        pad = target_len - data.shape[-1]
        data = np.pad(data, ((0, 0), (0, 0), (0, pad)), mode="edge")

    # Transpose to (N, T, C)
    X = np.transpose(data, (0, 2, 1))

    # Construct y
    y = np.column_stack([
        np.full_like(labels, task_id),
        labels,
        np.full_like(labels, subject_id),
    ])

    return X, y


In [48]:
import re
import os
task_id = 0
def get_subject_id(path: str) -> int:
    # match string like "sub-xxx" 
    match = re.search(r"sub-(\d+)", path)
    if match:
        return int(match.group(1).lstrip("0"))  
    else:
        raise ValueError(f"No subject_id found in path: {path}")
    
    
for sub in os.listdir(root):
    if 'sub-' in sub:
        sub_path = os.path.join(root, sub, 'eeg/')
        # print(sub_path)
        subject_id = get_subject_id(sub_path)
        
        for file in os.listdir(sub_path):
            if 'msit_eeg.vhdr' in file or 'oddball_eeg.vhdr' in file or "srt_eeg.vhdr" in file: 
                # check task
                # only use visual task here
                if "msit_eeg.vhdr" in file:
                    task_id = 0
                    feature_path = MSIT_feature_path
                    label_path = MSIT_label_path
                if "oddball_eeg.vhdr" in file: 
                    task_id = 1
                    feature_path = ODD_feature_path
                    label_path = ODD_label_path
                if "srt_eeg.vhdr" in file: 
                    continue
                # load eeg data and preprocess
                print(sub_path)
                
                vhdr_path = os.path.join(sub_path, file)
                events_file = file.split(".")[0].split("_")[0] + "_" + file.split(".")[0].split("_")[1] + "_events.tsv"
                events_file_path = os.path.join(sub_path, events_file)
                
                print(vhdr_path)
                raw = mne.io.read_raw_brainvision(vhdr_path, preload=True)                
                raw.pick_types(eeg=True)   # only keep EEG channels
                raw = data_preprocessing(raw, common_channels, SAMPLE_RATE, notch_freq=50, l_freq=0.5, h_freq=40, verbose=True)
                print()
                
                # segment and make X, y
                
                print(f"Start epoching file {file} making X, y...")
                X, y = epoch_and_make_xy(
                    raw, events_file_path,
                    tmin=-0.5, tmax=1.0, baseline=(-0.5, 0),  #  -200ms - 800ms
                    task_id=task_id, subject_id=subject_id, 
                )
                print(f"File {file} epoch into trial X shape: {X.shape}, y shape: {y.shape}")

                # save X, y to npy files
                np.save(feature_path + '/feature_{:03d}.npy'.format(subject_id), X)
                np.save(label_path + '/label_{:03d}.npy'.format(subject_id), y)
                print("------------------------------------------------\n")


NSERP/sub-41\eeg/
NSERP/sub-41\eeg/sub-41_task-msit_eeg.vhdr
Extracting parameters from NSERP/sub-41\eeg/sub-41_task-msit_eeg.vhdr...
Setting channel info structure...
Reading 0 ... 1373839  =      0.000 ...  1373.839 secs...
✔ Cleaned: Removed 'boundary' or 'segment' annotations
✔ Step 2: Picked common channels (127): ['AFz', 'AF3', 'TTP8h', 'FFT7h', 'AFp1', 'CPP3h', 'F5', 'POz', 'CPP5h', 'FTT7h', 'PO10', 'FT9', 'P10', 'CCP4h', 'P3', 'POO9h', 'AF8', 'OI1h', 'FCC1h', 'FC2', 'C5', 'PPO6h', 'C3', 'CPP1h', 'Fz', 'P5', 'PO9', 'Fp2', 'FCC6h', 'FFC2h', 'FT10', 'P1', 'FCC4h', 'FCC3h', 'F6', 'C1', 'AFF5h', 'CP3', 'TPP8h', 'CP6', 'PO8', 'CP5', 'AF4', 'TPP7h', 'FFC4h', 'FTT8h', 'PPO9h', 'F8', 'POO1', 'P7', 'AFF1h', 'TP10 RIGHT EAR', 'O1', 'P2', 'F10', 'CP4', 'T8', 'FFT9h', 'Cz', 'PPO2h', 'POO10h', 'FTT10h', 'O10', 'FTT9h', 'AFF6h', 'TP8', 'TTP7h', 'Oz', 'TPP9h', 'CCP1h', 'FFC6h', 'PO7', 'CCP5h', 'FCC5h', 'CP1', 'TPP10h', 'FC4', 'Fp1', 'FC1', 'P8', 'F7', 'FFC1h', 'P6', 'PPO1h', 'F3', 'FT7', 'OI2h

## Load and check the processed data

In [51]:
# Test the saved npy file
# example
feature_path = MSIT_feature_path   
label_path = MSIT_label_path

print(f"Loading MIST+ trials:\n")
total_samples = 0
for feature_file, label_file in zip(os.listdir(feature_path), os.listdir(label_path)):
    sub_id = int(re.search(r'\d+', feature_file).group())
    feature_file_path = os.path.join(feature_path, feature_file)
    label_file_path = os.path.join(label_path, label_file)
    X = np.load(feature_file_path)
    y = np.load(label_file_path)
    print(f"Subject {sub_id}: X shape: {X.shape}, y shape: {y.shape}")
    if X.shape[0] != y.shape[0]:
        raise(f"Subject {sub_id} data and label length mismatch: " 
                f"{X.shape[0]} vs {y.shape[0]}")
    total_samples += np.load(feature_file_path).shape[0]
    sub_id += 1
print("\nTotal number of trials:", total_samples)

Loading MIST+ trials:

Subject 1: X shape: (406, 300, 123), y shape: (406, 3)
Subject 2: X shape: (397, 300, 123), y shape: (397, 3)
Subject 3: X shape: (402, 300, 123), y shape: (402, 3)
Subject 4: X shape: (403, 300, 123), y shape: (403, 3)
Subject 5: X shape: (406, 300, 123), y shape: (406, 3)
Subject 6: X shape: (393, 300, 123), y shape: (393, 3)
Subject 7: X shape: (397, 300, 123), y shape: (397, 3)
Subject 8: X shape: (404, 300, 123), y shape: (404, 3)
Subject 9: X shape: (386, 300, 123), y shape: (386, 3)
Subject 10: X shape: (400, 300, 123), y shape: (400, 3)
Subject 11: X shape: (401, 300, 123), y shape: (401, 3)
Subject 12: X shape: (393, 300, 123), y shape: (393, 3)
Subject 13: X shape: (395, 300, 123), y shape: (395, 3)
Subject 14: X shape: (401, 300, 123), y shape: (401, 3)
Subject 15: X shape: (388, 300, 123), y shape: (388, 3)
Subject 16: X shape: (392, 300, 123), y shape: (392, 3)
Subject 17: X shape: (388, 300, 123), y shape: (388, 3)
Subject 18: X shape: (396, 300, 12

In [52]:
# Test the saved npy file
# example
feature_path = ODD_feature_path   
label_path = ODD_label_path

print(f"Loading oddball trials:\n")
total_samples = 0
for feature_file, label_file in zip(os.listdir(feature_path), os.listdir(label_path)):
    sub_id = int(re.search(r'\d+', feature_file).group())
    feature_file_path = os.path.join(feature_path, feature_file)
    label_file_path = os.path.join(label_path, label_file)
    X = np.load(feature_file_path)
    y = np.load(label_file_path)
    print(f"Subject {sub_id}: X shape: {X.shape}, y shape: {y.shape}")
    if X.shape[0] != y.shape[0]:
        raise(f"Subject {sub_id} data and label length mismatch: " 
                f"{X.shape[0]} vs {y.shape[0]}")
    total_samples += np.load(feature_file_path).shape[0]
    sub_id += 1
print("\nTotal number of trials:", total_samples)

Loading oddball trials:

Subject 1: X shape: (660, 300, 123), y shape: (660, 3)
Subject 2: X shape: (660, 300, 123), y shape: (660, 3)
Subject 3: X shape: (660, 300, 123), y shape: (660, 3)
Subject 4: X shape: (660, 300, 123), y shape: (660, 3)
Subject 5: X shape: (660, 300, 123), y shape: (660, 3)
Subject 6: X shape: (660, 300, 123), y shape: (660, 3)
Subject 7: X shape: (660, 300, 123), y shape: (660, 3)
Subject 8: X shape: (660, 300, 123), y shape: (660, 3)
Subject 9: X shape: (660, 300, 123), y shape: (660, 3)
Subject 10: X shape: (660, 300, 123), y shape: (660, 3)
Subject 11: X shape: (660, 300, 123), y shape: (660, 3)
Subject 12: X shape: (660, 300, 123), y shape: (660, 3)
Subject 13: X shape: (660, 300, 123), y shape: (660, 3)
Subject 14: X shape: (660, 300, 123), y shape: (660, 3)
Subject 15: X shape: (660, 300, 123), y shape: (660, 3)
Subject 16: X shape: (660, 300, 123), y shape: (660, 3)
Subject 17: X shape: (660, 300, 123), y shape: (660, 3)
Subject 18: X shape: (660, 300, 