In [16]:
import numpy as np
import mne
from mne.datasets import eegbci
from sklearn.pipeline import Pipeline
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from mne.decoding import CSP

### 1. Setup and Configuration

This section defines the parameters for our analysis. We specify the subject ID and the corresponding experimental run numbers for the different motor imagery tasks as detailed on the PhysioNet dataset description page.

In [17]:
subject_id = 1
runs_lr = [4, 8, 12] # right or left hand fists data
runs_feet = [6, 10, 14] # feet or both fists data

## 2. Data Loading

Here, we use the `mne.datasets.eegbci.load_data` function to automatically download the required `.edf` files from the public PhysioNet repository. We are downloading two distinct sets of experiments: one for left vs. right hand imagery and another that includes feet imagery.

In [18]:
print("Downloading data...")
fnames_lr = eegbci.load_data(subject_id, runs = runs_lr, update_path = True)
print("Download complete.")

Downloading data...
Download complete.


In [19]:
print("Downloading data...")
fnames_feet = eegbci.load_data(subject_id, runs = runs_feet, update_path = True)
print("Download complete.")

Downloading data...
Download complete.


## 3. Reading and Combining Raw Data

The downloaded `.edf` files are loaded into MNE's core data structure, the `Raw` object. Since each task (e.g., left/right fist) consists of multiple recording sessions (runs), we concatenate them to create a single, continuous data stream for each task type.

In [20]:
raws_lr = [mne.io.read_raw_edf(f, preload = True) for f in fnames_lr]

Extracting EDF parameters from /Users/Mohammad/mne_data/MNE-eegbci-data/files/eegmmidb/1.0.0/S001/S001R04.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 19999  =      0.000 ...   124.994 secs...
Extracting EDF parameters from /Users/Mohammad/mne_data/MNE-eegbci-data/files/eegmmidb/1.0.0/S001/S001R08.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 19999  =      0.000 ...   124.994 secs...
Extracting EDF parameters from /Users/Mohammad/mne_data/MNE-eegbci-data/files/eegmmidb/1.0.0/S001/S001R12.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 19999  =      0.000 ...   124.994 secs...


In [21]:
raws_feet = [mne.io.read_raw_edf(f, preload = True) for f in fnames_feet]

Extracting EDF parameters from /Users/Mohammad/mne_data/MNE-eegbci-data/files/eegmmidb/1.0.0/S001/S001R06.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 19999  =      0.000 ...   124.994 secs...
Extracting EDF parameters from /Users/Mohammad/mne_data/MNE-eegbci-data/files/eegmmidb/1.0.0/S001/S001R10.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 19999  =      0.000 ...   124.994 secs...
Extracting EDF parameters from /Users/Mohammad/mne_data/MNE-eegbci-data/files/eegmmidb/1.0.0/S001/S001R14.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 19999  =      0.000 ...   124.994 secs...


In [22]:
#Concatenate the list of Raw objects for each task into a single, continuous Raw object.
raw_lr = mne.concatenate_raws(raws_lr)
raw_feet = mne.concatenate_raws(raws_feet)

## 4. Signal Preprocessing and Epoching

This is the most critical data cleaning and preparation stage.

### Signal Filtering
Raw EEG data is noisy. We apply two main filters:
1.  **Band-Pass Filter (8-35 Hz):** We isolate the frequency bands most associated with motor control and imagery, known as the *mu* (μ) and *beta* (β) rhythms. This removes slow signal drifts and high-frequency noise.
2.  **Notch Filter (50 Hz):** This specifically targets and removes electrical noise from the power grid, which is a common and powerful source of interference.

### Epoching
We slice the continuous signal into discrete time windows called **epochs** or **trials**. Each epoch is locked to a specific event (e.g., the cue to imagine moving the left fist). By creating these labeled trials, we transform the data into a format suitable for supervised machine learning. We use a time window from -1s to +4s to also capture the brain's preparatory activity before the cue.

In [23]:
def process_and_epoch(raw, event_id):
    raw.filter(l_freq=8., h_freq=35.)
    raw.notch_filter(freqs=50)
    
    # Extract events from annotations. 'T1' and 'T2' are markers in the data
    # corresponding to the start of different tasks.
    events, _ = mne.events_from_annotations(raw, event_id={'T1': 1, 'T2': 2})

    # starting 1 second before the cue to capture preparatory brain activity.
    epochs = mne.Epochs(raw, events, event_id, tmin=-1., tmax=4., preload=True, baseline=None, picks='eeg')
    return epochs

In [24]:
epochs_lr = process_and_epoch(raw_lr, event_id={'left_fist': 1, 'right_fist': 2})
epochs_feet = process_and_epoch(raw_feet, event_id={'feet': 2})

Filtering raw data in 3 contiguous segments
Setting up band-pass filter from 8 - 35 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 8.00
- Lower transition bandwidth: 2.00 Hz (-6 dB cutoff frequency: 7.00 Hz)
- Upper passband edge: 35.00 Hz
- Upper transition bandwidth: 8.75 Hz (-6 dB cutoff frequency: 39.38 Hz)
- Filter length: 265 samples (1.656 s)

Filtering raw data in 3 contiguous segments
Setting up band-stop filter from 49 - 51 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandstop filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 49.38
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 49.12 Hz)
- Upper passband edg

In [25]:
# Combine all processed epochs from all tasks into a single object for the classifier.
epochs = mne.concatenate_epochs([epochs_lr, epochs_feet])

Not setting metadata
69 matching events found
No baseline correction applied


  epochs = mne.concatenate_epochs([epochs_lr, epochs_feet])


## 5. Feature and Label Preparation

We extract the final processed data and corresponding labels from the `Epochs` object. This prepares them for direct use with scikit-learn's machine learning models. The data is now a 3D NumPy array (`trials x channels x timepoints`), and the labels are a 1D vector.

In [26]:
data = epochs.get_data() # The data is a 3D numpy array: (n_epochs, n_channels, n_times).
labels = epochs.events[:, -1]

## 5. Hyperparameter Tuning with GridSearchCV

Now we search for the optimal model settings. The goal of **hyperparameter tuning** is to find the combination of model parameters that yields the highest performance.

We use `GridSearchCV` from scikit-learn, which performs an exhaustive search over a specified parameter grid. It trains and evaluates a model for every possible combination using cross-validation and reports which combination was the best.

We will test:
* Different numbers of **CSP components**.
* Two different classifiers: **LDA** and **SVM**.
* For the SVM, different `kernel` types (`linear`, `rbf`), `C` values (regularization strength), and `gamma` values (for the `rbf` kernel).


In [27]:
# Pipeline with a placeholder for the classifier
pipeline = Pipeline([
    ('CSP', CSP(reg=None, log=True)),
    ('Classifier', LDA())
])

In [28]:
# Define the expanded parameters to test
param_grid = [
    {
        'CSP__n_components': [4, 6, 8, 10], # Expanded the range for CSP
        'Classifier': [LDA()]
    },
    {
        'CSP__n_components': [4, 6, 8, 10],
        'Classifier': [SVC(kernel='linear')],
        'Classifier__C': [0.1, 1, 10, 100] # Expanded the range for C
    },
    {
        'CSP__n_components': [4, 6, 8, 10],
        'Classifier': [SVC(kernel='rbf')],
        'Classifier__C': [0.1, 1, 10, 100],
        'Classifier__gamma': ['scale', 'auto', 0.1, 1] # ADDED GAMMA: Very important for RBF
    }
]



In [29]:
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# n_jobs=-1 uses all available CPU cores to speed up the process.
grid_search = GridSearchCV(pipeline, param_grid, cv=cv, n_jobs=-1, verbose=1)
grid_search.fit(data, labels)

Fitting 5 folds for each of 84 candidates, totalling 420 fits
Computing rank from data with rank=None
Computing rank from data with rank=None
Computing rank from data with rank=None
Computing rank from data with rank=None
Computing rank from data with rank=None
Computing rank from data with rank=None
Computing rank from data with rank=None
Computing rank from data with rank=None
Computing rank from data with rank=None
Computing rank from data with rank=None
    Using tolerance 0.00038 (2.2e-16 eps * 64 dim * 2.7e+10  max singular value)
    Estimated rank (data): 64
    data: rank 64 computed from 64 data channels with 0 projectors
    Using tolerance 0.00038 (2.2e-16 eps * 64 dim * 2.7e+10  max singular value)
    Using tolerance 0.00037 (2.2e-16 eps * 64 dim * 2.6e+10  max singular value)
    Using tolerance 0.00037 (2.2e-16 eps * 64 dim * 2.6e+10  max singular value)
    Estimated rank (data): 64
    data: rank 64 computed from 64 data channels with 0 projectors
    Estimated rank (



Estimating class=2 covariance using EMPIRICAL
Done.
Estimating class=2 covariance using EMPIRICAL
Done.
Computing rank from data with rank=None
Computing rank from data with rank=None
Estimating class=2 covariance using EMPIRICAL
Done.
Computing rank from data with rank=None
Computing rank from data with rank=None
Computing rank from data with rank=None
    Using tolerance 0.00038 (2.2e-16 eps * 64 dim * 2.7e+10  max singular value)
    Estimated rank (data): 64
    data: rank 64 computed from 64 data channels with 0 projectors
Reducing data rank from 64 -> 64
Estimating class=1 covariance using EMPIRICAL
Done.
Estimating class=2 covariance using EMPIRICAL
Done.
    Using tolerance 0.00038 (2.2e-16 eps * 64 dim * 2.7e+10  max singular value)
    Estimated rank (data): 64
    data: rank 64 computed from 64 data channels with 0 projectors
    Using tolerance 0.00038 (2.2e-16 eps * 64 dim * 2.7e+10  max singular value)
Reducing data rank from 64 -> 64
Estimating class=1 covariance using E

0,1,2
,estimator,Pipeline(step...tAnalysis())])
,param_grid,"[{'CSP__n_components': [4, 6, ...], 'Classifier': [LinearDiscriminantAnalysis()]}, {'CSP__n_components': [4, 6, ...], 'Classifier': [SVC(kernel='linear')], 'Classifier__C': [0.1, 1, ...]}, ...]"
,scoring,
,n_jobs,-1
,refit,True
,cv,StratifiedKFo... shuffle=True)
,verbose,1
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,n_components,8
,reg,
,log,True
,cov_est,'concat'
,transform_into,'average_power'
,norm_trace,False
,cov_method_params,
,rank,
,component_order,'mutual_info'

0,1,2
,C,10
,kernel,'rbf'
,degree,3
,gamma,0.1
,coef0,0.0
,shrinking,True
,probability,False
,tol,0.001
,cache_size,200
,class_weight,


In [30]:
print("\n--- Grid Search Results ---")
print(f"Best parameters found: {grid_search.best_params_}")
print(f"Best cross-validation score: {grid_search.best_score_:.4f}")


--- Grid Search Results ---
Best parameters found: {'CSP__n_components': 8, 'Classifier': SVC(), 'Classifier__C': 10, 'Classifier__gamma': 0.1}
Best cross-validation score: 0.6967
