In [1]:
import numpy as np
import scipy as sp
from pathlib import Path

# Data loading

In [2]:
# Load all .mat files and extract relevant data
path = Path('.').glob('*.mat')
all_subjects = [sp.io.loadmat(f, squeeze_me=True)['subjectData'] for f in path]
subject_names = [s['subjectId'].item() for s in all_subjects]
columns = all_subjects[0].dtype.names
print("Columns: ", columns)
print("Subject Names: ", subject_names)
print("Loaded data for", len(all_subjects), "subjects.")

Columns:  ('INIT_DELAY', 'MI_DURATION', 'subjectId', 'fs', 'trialsData', 'trialsLabels')
Subject Names:  ['PAT021_A', 'PATID15', 'PATID16', 'PATID26', 'PAT013', 'PAT015']
Loaded data for 6 subjects.


In [3]:
# Assert that certain fields are consistent across all subjects in order to discard them
for field in ['INIT_DELAY', 'MI_DURATION', 'fs']:
    assert all(s[field] == all_subjects[0][field] for s in all_subjects)

fs = all_subjects[0]['fs']
init_delay = all_subjects[0]['INIT_DELAY']
mi_duration = all_subjects[0]['MI_DURATION']
print("Common parameters across all subjects:")
print("Sampling frequency: "+ str(fs)+" Hz")
print("Initial delay: "+ str(init_delay)+" s")
print("Motor imagery duration: "+ str(mi_duration)+" s")

Common parameters across all subjects:
Sampling frequency: 256 Hz
Initial delay: 0 s
Motor imagery duration: 6 s


In [None]:
# Extract EEG data and labels for all subjects
X = []  # (subject, trials, timepoints, channels)
y = []  # (subject, trials)

for i, subject in enumerate(all_subjects):
    # Unwrap scalar-wrapped arrays (common EEG data loading artifact)
    trials = np.atleast_1d(subject['trialsData'])[0]  # Shape: (n_trials,)
    trials_labels = np.atleast_1d(subject['trialsLabels'])[0]  # Shape: (n_trials,)
    
    subject_X = np.array([trial for trial in trials])  # Shape: (n_trials, n_timepoints, n_channels)

    X.append(subject_X)
    y.append(trials_labels)

print("Extracted data shapes for each subject:")
for i, (name, subj_X) in enumerate(zip(subject_names, X)):
    n_trials, n_t, n_ch = subj_X.shape
    print(f"Subject {i+1} ({name}): {n_trials} trials, {n_t} timepoints, {n_ch} channels")

Extracted data shapes for each subject:
Subject 1 (PAT021_A): 1236 trials, 1536 timepoints, 16 channels
Subject 2 (PATID15): 1960 trials, 1536 timepoints, 16 channels
Subject 3 (PATID16): 434 trials, 1536 timepoints, 16 channels
Subject 4 (PATID26): 1464 trials, 1536 timepoints, 16 channels
Subject 5 (PAT013): 494 trials, 1536 timepoints, 16 channels
Subject 6 (PAT015): 1126 trials, 1536 timepoints, 16 channels


# Save to Pickle file

In [5]:
import pickle 

with open('extract.p', 'wb') as f:
    pickle.dump((X, y), f)