# Preprocess Motor Imagery Data

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import mne
import seaborn as sns

# Load & Segment the Data

In [3]:
df = pd.read_csv('../data/MI/sidharth_motor_imagery_mov1=leftfootup_mov2=rightfootup_10-27-24.csv')

In [12]:
stim_labels, stim_idxs = np.unique(df['stim'], return_index=True)

# Preprocess

In [84]:
data = df.iloc[:, 1:-1].values

sfreq = 256
ch_names = list(df.keys())[1:-1]
ch_types = ['eeg'] * len(ch_names)

# Create MNE Info object
info = mne.create_info(ch_names=ch_names, sfreq=sfreq, ch_types=ch_types)

# Create RawArray
raw = mne.io.RawArray(data.T, info)

Creating RawArray with float64 data, n_channels=8, n_times=292401
    Range : 0 ... 292400 =      0.000 ...  1142.188 secs
Ready.


In [85]:
# Band-pass filter between 1 and 40 Hz
raw.filter(l_freq=1.0, h_freq=40.0)

Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 1 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 1.00
- Lower transition bandwidth: 1.00 Hz (-6 dB cutoff frequency: 0.50 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 845 samples (3.301 s)



[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.1s finished


Unnamed: 0,General,General.1
,MNE object type,RawArray
,Measurement date,Unknown
,Participant,Unknown
,Experimenter,Unknown
,Acquisition,Acquisition
,Duration,00:19:03 (HH:MM:SS)
,Sampling frequency,256.00 Hz
,Time points,292401
,Channels,Channels
,EEG,8


# Segment Data

In [95]:
filtered_data = raw.get_data().T

In [97]:
segmented_data = []

for i in range(len(stim_idxs) - 1):
    if i == len(stim_idxs) - 2:
        segmented_data.append(filtered_data[stim_idxs[i]:])
    else:
        segmented_data.append(filtered_data[stim_idxs[i]:stim_idxs[i + 1]])

segmented_data = segmented_data[11:] # First 11 trials are to prepare the subject we can remove that data

In [102]:
X = []
y = []

for i, stim in enumerate(stim_labels[11:-1]):
    str_stim = str(int(stim))

    if str_stim[-2] == '5': 
        y.append(0 if str_stim[-1] == '1' else 1)
        X.append(segmented_data[i][:2500])

X = np.array(X).transpose(0, 2, 1)
y = np.array(y)

# Train the ML Model

In [104]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from mne.decoding import CSP
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.pipeline import make_pipeline

In [105]:
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2)

In [106]:
pipeline = make_pipeline(CSP(n_components=8), LDA()) # Creation of the pipeline only takes one line of code! (thanks to the hard work of developers creating these libraries)
pipeline.fit(train_X, train_y)

Computing rank from data with rank=None
    Using tolerance 40 (2.2e-16 eps * 8 dim * 2.3e+16  max singular value)
    Estimated rank (data): 8
    data: rank 8 computed from 8 data channels with 0 projectors
Reducing data rank from 8 -> 8
Estimating class=0 covariance using EMPIRICAL
Done.
Estimating class=1 covariance using EMPIRICAL
Done.


In [107]:
pipeline.score(test_X, test_y) # Performs better! Can we improve this even more?

0.25