In [1]:
from sklearn.base import BaseEstimator
from mne.time_frequency import psd_array_welch, psd_welch
import numpy as np
import mne

class ConcatenateChannelsPSD(BaseEstimator):
    def __init__(self):
        super(ConcatenateChannelsPSD).__init__()

    def fit(self, x, y=None):
        return self

    def transform(self, x, y=None):
        n = len(x)
        return x.reshape(n, -1)


class GetEpochsData(BaseEstimator):
    def __init__(self):
        super(GetEpochsData).__init__()

    def fit(self, x, y=None):
        return self

    def transform(self, x, y=None):
        return x.get_data()


class PSD(BaseEstimator):
    BANDS_DICT = {
#         "delta": (1, 4),
#         "theta": (4, 8),
#         "mu": (8, 13),
        "mu": (8, 13),
#         "beta": (13, 25),
        "beta": (13, 25),
#         "gamma": (25, 40)
    }
    def __init__(self, **kwargs):
        super(PSD).__init__()
        self.kwargs = kwargs
        
    def set_params(**params):
        for param in params:
            assert params in ["picks", "n_fft", "n_overlap", "n_per_seg"]
        self.kwargs.update(params)
    
    def get_params(self, *args, **kwargs):
        return self.kwargs
        
    def fit(self, x, y=None):
        return self

    def transform(self, x, y=None):
        if isinstance(x, list):
            x = mne.concatenate_epochs(x)
            psds, freqs = psd_welch(x, ** self.kwargs)
        if isinstance(x, mne.Epochs):
#             psds, freqs = psd_welch(x, ** self.kwargs)
            x = x.get_data()
        if isinstance(x, np.ndarray):
            psds, freqs = psd_array_welch(x, ** self.kwargs)
        if ("average" in self.kwargs) and (self.kwargs["average"] is None):
            psds = psds.sum(axis=3)
        self.freqs = freqs

        band_spectras = list()
        for band, (lfreq, hfreq) in self.BANDS_DICT.items():
            band_spectra = psds[:, :, (freqs >= lfreq) & (freqs < hfreq)]
            band_spectras.append(
                band_spectra.sum(axis=2, keepdims=True)
            )
        
        band_spectras = np.concatenate(band_spectras, axis=2)
            
        return band_spectras

# PSD + LDA

In [None]:
from mne.decoding import CSP
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.metrics import balanced_accuracy_score
from ica_benchmark.io.load import OpenBMI_Dataset
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import pandas as pd

tmin = 1
tmax = 3.5
channels = ["FC" + str(s) for s in [5, 3, 1, 2, 4, 6]]
channels += ["C" + str(s) for s in [5, 3, 1, 2, 4, 6]]
channels += ["CP" + str(s) for s in [5, 3, 1, 2, 4, 6]]

results = list()

for i in range(1, 55):
    train_epochs, _ = OpenBMI_Dataset.load_from_filepath(
        f"/home/paulo/Documents/datasets/OpenBMI/edf/session1/{i}_train.edf",
        tmin=tmin,
        tmax=tmax,
        reject=False
    )
    test_epochs, _ = OpenBMI_Dataset.load_from_filepath(
        f"/home/paulo/Documents/datasets/OpenBMI/edf/session1/{i}_test.edf",
        tmin=tmin,
        tmax=tmax,
        reject=False
    )

    train_epochs.load_data()
    test_epochs.load_data()
    
    iir_params = dict(
        order=5
    )
    train_epochs = train_epochs.pick(channels).filter(8, 30)
    test_epochs = test_epochs.pick(channels).filter(8, 30)
    
    x_train = train_epochs.get_data()
    x_test = test_epochs.get_data()
    y_train = train_epochs.events[:, 2]
    y_test = test_epochs.events[:, 2]
    
    len_size = 1000
    lr = make_pipeline(
        PSD(
            sfreq=train_epochs.info["sfreq"],
            n_fft=1 * len_size,
            n_overlap=len_size // 4,
            n_per_seg=1 * len_size,
            average="mean",
            window="hamming",
        ),
        ConcatenateChannelsPSD(),
        StandardScaler(),
        LinearDiscriminantAnalysis(n_components=1)
    ).fit(x_train, y_train)
    train_pred = lr.predict(x_train)
    test_pred = lr.predict(x_test)

    train_acc = balanced_accuracy_score(y_train, train_pred)
    test_acc = balanced_accuracy_score(y_test, test_pred)
    
    print(train_acc, test_acc)
    
    results.append([i, train_acc, test_acc])
    
    del train_epochs, test_epochs

In [None]:
pd.DataFrame(results, columns=["uid", "train_acc", "test_acc"]).describe()

# CSP + LDA

In [None]:
from mne.decoding import CSP
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.metrics import balanced_accuracy_score
from ica_benchmark.io.load import OpenBMI_Dataset
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import pandas as pd

tmin = 1
tmax = 3.5
channels = ["FC" + str(s) for s in [5, 3, 1, 2, 4, 6]]
channels += ["C" + str(s) for s in [5, 3, 1, 2, 4, 6]]
channels += ["CP" + str(s) for s in [5, 3, 1, 2, 4, 6]]

results = list()

for i in range(1, 55):
    train_epochs, _ = OpenBMI_Dataset.load_from_filepath(
        f"/home/paulo/Documents/datasets/OpenBMI/edf/session1/{i}_train.edf",
        tmin=tmin,
        tmax=tmax,
        reject=False
    )
    test_epochs, _ = OpenBMI_Dataset.load_from_filepath(
        f"/home/paulo/Documents/datasets/OpenBMI/edf/session1/{i}_test.edf",
        tmin=tmin,
        tmax=tmax,
        reject=False
    )

    train_epochs.load_data()
    test_epochs.load_data()
    
    iir_params = dict(
        order=5
    )
    train_epochs = train_epochs.pick(channels).filter(8, 30)
    test_epochs = test_epochs.pick(channels).filter(8, 30)
    
    x_train = train_epochs.get_data()
    x_test = test_epochs.get_data()
    y_train = train_epochs.events[:, 2]
    y_test = test_epochs.events[:, 2]
    
    csp = CSP(n_components=len(channels), log=True)
    csp.fit(x_train, y_train)

    x_test = csp.transform(x_test)
    x_train = csp.transform(x_train)

    lr = LinearDiscriminantAnalysis().fit(x_train, y_train)

    train_pred = lr.predict(x_train)
    test_pred = lr.predict(x_test)

    train_acc = balanced_accuracy_score(y_train, train_pred)
    test_acc = balanced_accuracy_score(y_test, test_pred)
    
    print(train_acc, test_acc)
    
    results.append([i, train_acc, test_acc])
    
    del train_epochs, test_epochs

In [None]:
results_df = pd.DataFrame(results, columns=["uid", "train_acc", "test_acc"])

In [None]:
results_df.describe()

In [None]:
import matplotlib.pyplot as plt

(results_df.test_acc * 100).hist(bins=(range(30, 101, 5)))
plt.show()

In [None]:
txt = """uid Se1 Se2 Se1 Se2 Se1 Se2 Se1 Se2 Se1 Se2 Se1 Se2 Se1 Se2 Se1 Se2
s1E 71.3 77.2 61.0 83.0 66.0 78.0 84.0 84.0 80.0 90.0 69.4 88.9 11.8 17.9 97.5 91.0
s2 95.3 91.2 96.0 86.0 100 97.0 100 99.0 100 96.0 100 100 22.6 22.6 99.5 98.5
s3 94.7 98.3 95.0 94.0 94.0 95.0 93.0 94.0 93.0 95.0 100 100 22.6 22.6 87.5 98.0
s4M 51.4 56.5 53.0 57.0 52.0 61.0 48.0 53.0 45.0 66.0 91.7 94.4 18.9 20.0 100 100
s5E 93.3 78.8 94.0 81.0 95.0 82.0 93.0 84.0 96.0 84.0 91.7 72.2 18.9 12.6 81.5 98.5
s6 72.5 78.3 77.0 88.0 77.0 85.0 76.0 89.0 52.0 89.0 100 100 22.6 22.6 99.5 100
s7M 60.3 72.7 49.0 71.0 59.0 64.0 54.0 71.0 53.0 80.0 100 100 22.6 22.6 100 99.5
s8MS 58.6 65.1 57.0 66.0 62.0 68.0 55.0 84.0 60.0 55.0 94.4 97.2 20.0 21.1 84.0 77.0
s9 82.1 76.4 86.0 71.0 90.0 70.0 73.0 70.0 89.0 69.0 100 100 22.6 22.6 92.0 98.0
s10M 61.8 68.8 65.0 61.0 64.0 65.0 45.0 54.0 43.0 52.0 100 100 22.6 22.6 96.0 97.0
s11M 54.7 53.6 47.0 50.0 50.0 50.0 49.0 48.0 51.0 50.0 100 100 22.6 22.6 100 100
s12M 56.1 58.1 46.0 58.0 48.0 58.0 56.0 50.0 54.0 54.0 100 97.2 22.6 21.1 100 97.0
s13M 70.0 60.5 56.0 54.0 57.0 54.0 50.0 54.0 50.0 59.0 100 100 22.6 22.6 98.5 94.0
s14M 60.9 58.4 58.0 48.0 65.0 55.0 68.0 53.0 69.0 51.0 100 100 22.6 22.6 99.5 93.0
s15M 57.9 65.1 55.0 57.0 57.0 58.0 56.0 60.0 53.0 69.0 100 100 22.6 22.6 99.5 99.5
s16M 63.8 60.5 53.0 69.0 54.0 56.0 45.0 63.0 53.0 63.0 100 100 22.6 22.6 100.0 100
s17M 80.1 75.1 83.0 42.0 90.0 45.0 88.0 54.0 81.0 55.0 94.4 91.7 20.0 18.9 98.0 98.0
s18 82.4 90.8 92.0 82.0 93.0 95.0 91.0 93.0 91.0 88.0 100 100 22.6 22.6 98.5 100
s19 83.4 83.5 82.0 89.0 85.0 83.0 89.0 89.0 83.0 82.0 100 100 22.6 22.6 98.5 98.5
s20M 51.6 76.7 59.0 73.0 53.0 79.0 50.0 82.0 52.0 62.0 100 100 22.6 22.6 99.0 95.0
s21 97.8 99.5 98.0 100 99.0 100 98.0 100 98.0 100 100 100 22.6 22.6 98.5 100
s22 86.2 78.3 77.0 85.0 91.0 92.0 92.0 65.0 94.0 90.0 94.4 88.9 20.0 17.9 94.5 90.0
s23MES 63.1 78.0 54.0 68.0 51.0 57.0 55.0 55.0 58.0 53.0 86.1 80.6 16.9 15.1 53.0 44.5
s24M 54.9 57.6 49.0 54.0 48.0 66.0 50.0 45.0 51.0 51.0 100 94.4 22.6 20.0 99.0 98.5
s25M 51.7 51.2 54.0 57.0 52.0 59.0 61.0 70.0 59.0 86.0 100 88.9 22.6 17.9 100.0 95.5
s26M 59.2 46.4 49.0 44.0 58.0 44.0 52.0 48.0 45.0 48.0 86.1 94.4 16.9 20.0 98.0 99.5
s27M 52.9 62.7 56.0 70.0 55.0 62.0 47.0 55.0 44.0 51.0 100 100 22.6 22.6 99.5 99.5
s28 92.3 91.3 94.0 97.0 99.0 99.0 98.0 98.0 100 99.0 100 97.2 22.6 21.1 93.0 97.5
s29 85.5 98.0 99.0 98.0 99.0 98.0 99.0 99.0 98.0 98.0 97.2 100 21.1 22.6 95.0 89.0
s30 75.1 64.1 76.0 66.0 83.0 65.0 82.0 57.0 84.0 55.0 86.1 94.4 16.9 20.0 100 100
s31M 67.5 63.6 58.0 57.0 67.0 57.0 77.0 58.0 51.0 58.0 100 100 22.6 22.6 100 100
s32 77.3 96.1 56.0 97.0 53.0 99.0 53.0 98.0 57.0 99.0 100 100 22.6 22.6 97.5 97.0
s33 98.1 91.0 99.0 89.0 99.0 92.0 99.0 100 99.0 100 100 97.2 22.6 21.1 92.5 91.5
s34MS 53.0 50.1 48.0 47.0 44.0 45.0 46.0 49.0 48.0 55.0 91.7 97.2 18.9 21.1 84.0 93.5
s35M 52.6 66.1 52.0 52.0 55.0 54.0 55.0 61.0 54.0 58.0 100 97.2 22.6 21.1 100 98.5
s36 96.9 98.4 97.0 94.0 99.0 94.0 98.0 98.0 98.0 100 100 91.7 22.6 18.9 100 100
s37 95.4 97.3 93.0 81.0 95.0 95.0 97.0 93.0 97.0 93.0 80.6 100 15.1 22.6 98.0 99.5
s38M 55.2 63.1 56.0 52.0 59.0 53.0 51.0 57.0 53.0 52.0 97.2 100 21.1 22.6 99.5 97.5
s39M 88.0 61.9 64.0 52.0 79.0 49.0 90.0 61.0 86.0 81.0 86.1 94.4 16.9 20.0 98.5 97.5
s40M 49.7 61.8 46.0 58.0 57.0 56.0 44.0 62.0 47.0 64.0 94.4 100 20.0 22.6 87.0 100
s41M 52.9 52.4 62.0 48.0 57.0 42.0 62.0 51.0 65.0 54.0 100 100 22.6 22.6 100 98.0
s42M 53.4 69.2 47.0 63.0 48.0 75.0 58.0 73.0 51.0 77.0 100 97.2 22.6 21.1 96.5 77.5
s43 86.5 81.0 77.0 86.0 90.0 90.0 87.0 89.0 91.0 95.0 100 100 22.6 22.6 99.0 100
s44 96.0 98.5 99.0 100 100 100 100 99.0 100 99.0 100 100 22.6 22.6 99.5 100
s45 92.5 95.0 93.0 99.0 94.0 99.0 95.0 98.0 93.0 100 91.7 97.2 18.9 21.1 96.5 99.5
s46M 52.7 75.7 53.0 58.0 53.0 62.0 53.0 83.0 42.0 78.0 100 94.4 22.6 20.0 92.0 93.0
s47MS 45.2 77.3 44.0 59.0 51.0 59.0 53.0 69.0 52.0 63.0 100 100 22.6 22.6 37.5 93.5
s48M 64.2 52.7 50.0 49.0 51.0 59.0 52.0 52.0 54.0 56.0 100 100 22.6 22.6 99.0 99.5
s49M 69.6 68.8 63.0 62.0 70.0 59.0 54.0 60.0 57.0 52.0 97.2 100 21.1 22.6 100 100
s50M 61.7 60.0 59.0 58.0 59.0 55.0 58.0 48.0 58.0 50.0 91.7 100 18.9 22.6 100 97.0
s51M 68.3 58.9 71.0 52.0 65.0 48.0 59.0 52.0 62.0 49.0 91.7 88.9 18.9 17.9 94.5 90.0
s52 72.6 78.7 72.0 72.0 69.0 77.0 74.0 72.0 75.0 54.0 100 100 22.6 22.6 98.5 95.0
s53M 60.0 62.8 50.0 54.0 49.0 57.0 52.0 54.0 49.0 54.0 100 100 22.6 22.6 100 99.0
s54M 58.2 49.0 53.0 45.0 52.0 47.0 53.0 55.0 51.0 54.0 100 100 22.6 22.6 95.0 93.0
mean 70.1 72.2 67.3 68.6 69.6 69.7 68.8 70.6 67.9 71.0 96.6 97.0 21.1 21.3 94.9 95.5
std 16.2 15.4 18.3 17.6 19.0 18.5 19.8 18.6 20.3 18.8 6.2 5.4 2.4 2.1 10.9 8.6
"""

with open("openbmi.csv", "w") as f:
    f.write(txt.replace(" ",  ","))

In [None]:
import matplotlib.pyplot as plt
pd.read_csv("openbmi.csv")["Se1.1"].hist(bins=(range(30, 101, 5)))
plt.show()

# ICA + CSP + LDA

In [None]:
from mne.decoding import CSP
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.metrics import balanced_accuracy_score
from ica_benchmark.io.load import OpenBMI_Dataset
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import pandas as pd
from ica_benchmark.processing.ica import get_ica_instance

tmin = 1
tmax = 3.5
channels = ["FC" + str(s) for s in [5, 3, 1, 2, 4, 6]]
channels += ["C" + str(s) for s in [5, 3, 1, 2, 4, 6]]
channels += ["CP" + str(s) for s in [5, 3, 1, 2, 4, 6]]

results = list()

for i in range(1, 55):
    print(i)
    train_epochs, _ = OpenBMI_Dataset.load_from_filepath(
        f"/home/paulo/Documents/datasets/OpenBMI/edf/session1/{i}_train.edf",
        tmin=tmin,
        tmax=tmax,
        reject=False
    )
    test_epochs, _ = OpenBMI_Dataset.load_from_filepath(
        f"/home/paulo/Documents/datasets/OpenBMI/edf/session1/{i}_test.edf",
        tmin=tmin,
        tmax=tmax,
        reject=False
    )

    train_epochs.load_data()
    test_epochs.load_data()
    
    iir_params = dict(
        order=5
    )
    train_epochs = train_epochs.pick(channels).filter(8, 30)
    test_epochs = test_epochs.pick(channels).filter(8, 30)
    
    ica = get_ica_instance("ext_infomax")
    ica.fit(train_epochs)
    
    train_epochs = ica.transform(train_epochs)
    test_epochs = ica.transform(test_epochs)

    x_train = train_epochs.get_data()
    x_test = test_epochs.get_data()
    y_train = train_epochs.events[:, 2]
    y_test = test_epochs.events[:, 2]
    
    csp = CSP(n_components=len(channels), log=True)
    csp.fit(x_train, y_train)

#     x_train = csp.transform(train_epochs.get_data())
#     x_test = csp.transform(test_epochs.get_data())
#     y_train = train_epochs.events[:, 2]
#     y_test = test_epochs.events[:, 2]
    x_test = csp.transform(x_test)
    x_train = csp.transform(x_train)

    lr = LinearDiscriminantAnalysis().fit(x_train, y_train)
#     lr = make_pipeline(
#         StandardScaler(),
#         SequentialFeatureSelector(
#             LinearDiscriminantAnalysis(n_components=1),
#             n_features_to_select=10
#         ),
#         LinearDiscriminantAnalysis(n_components=1)
#     ).fit(x_train, y_train)
    train_pred = lr.predict(x_train)
    test_pred = lr.predict(x_test)

    train_acc = balanced_accuracy_score(y_train, train_pred)
    test_acc = balanced_accuracy_score(y_test, test_pred)
    
    print(train_acc, test_acc)
    
    results.append([i, train_acc, test_acc])
    
    del train_epochs, test_epochs

In [None]:
results_df = pd.DataFrame(results, columns=["uid", "train_acc", "test_acc"])
results_df.describe()

# Session 1 + Session2 training
# Session 1 + Session2 testing
# CSP + LDA


In [None]:
from mne.decoding import CSP
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.metrics import balanced_accuracy_score
from ica_benchmark.io.load import OpenBMI_Dataset
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import pandas as pd
from mne import concatenate_epochs

tmin = 1
tmax = 3.5
channels = ["FC" + str(s) for s in [5, 3, 1, 2, 4, 6]]
channels += ["C" + str(s) for s in [5, 3, 1, 2, 4, 6]]
channels += ["CP" + str(s) for s in [5, 3, 1, 2, 4, 6]]

results = list()

for i in range(1, 55):
    session_1_1_epochs, _ = OpenBMI_Dataset.load_from_filepath(
        f"/home/paulo/Documents/datasets/OpenBMI/edf/session1/{i}_train.edf",
        tmin=tmin,
        tmax=tmax,
        reject=False
    )
    session_1_2_epochs, _ = OpenBMI_Dataset.load_from_filepath(
        f"/home/paulo/Documents/datasets/OpenBMI/edf/session1/{i}_test.edf",
        tmin=tmin,
        tmax=tmax,
        reject=False
    )
    session_2_1_epochs, _ = OpenBMI_Dataset.load_from_filepath(
        f"/home/paulo/Documents/datasets/OpenBMI/edf/session2/{i}_train.edf",
        tmin=tmin,
        tmax=tmax,
        reject=False
    )
    session_2_2_epochs, _ = OpenBMI_Dataset.load_from_filepath(
        f"/home/paulo/Documents/datasets/OpenBMI/edf/session2/{i}_test.edf",
        tmin=tmin,
        tmax=tmax,
        reject=False
    )
    
    train_epochs = concatenate_epochs([session_1_1_epochs, session_1_2_epochs])
    test_epochs = concatenate_epochs([session_2_1_epochs, session_2_2_epochs])
    

    train_epochs.load_data()
    test_epochs.load_data()
    
    iir_params = dict(
        order=5
    )
    train_epochs = train_epochs.pick(channels).filter(8, 30)
    test_epochs = test_epochs.pick(channels).filter(8, 30)
    
    x_train = train_epochs.get_data()
    x_test = test_epochs.get_data()
    y_train = train_epochs.events[:, 2]
    y_test = test_epochs.events[:, 2]
    
    csp = CSP(n_components=len(channels), log=True)
    csp.fit(x_train, y_train)

    x_test = csp.transform(x_test)
    x_train = csp.transform(x_train)

    lr = LinearDiscriminantAnalysis().fit(x_train, y_train)

    train_pred = lr.predict(x_train)
    test_pred = lr.predict(x_test)

    train_acc = balanced_accuracy_score(y_train, train_pred)
    test_acc = balanced_accuracy_score(y_test, test_pred)
    
    print(train_acc, test_acc)
    
    results.append([i, train_acc, test_acc])
    
    del train_epochs, test_epochs

In [None]:
pd.DataFrame(results, columns=["uid", "train_acc", "test_acc"]).describe()

In [2]:
from mne.decoding import CSP
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.metrics import balanced_accuracy_score
from ica_benchmark.io.load import OpenBMI_Dataset
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from ica_benchmark.processing.ica import get_ica_instance
import pandas as pd
from pathlib import Path

TMIN = 1
TMAX = 3.5
DEFAULT_CHANNELS = ["FC" + str(s) for s in [5, 3, 1, 2, 4, 6]]
DEFAULT_CHANNELS += ["C" + str(s) for s in [5, 3, 1, 2, 4, 6]]
DEFAULT_CHANNELS += ["CP" + str(s) for s in [5, 3, 1, 2, 4, 6]]

DATASET_PATH = Path("/home/paulo/Documents/datasets/OpenBMI/edf/")

DEFAULT_CLF = make_pipeline(
    CSP(n_components=len(DEFAULT_CHANNELS), log=True),
    StandardScaler(),
    LinearDiscriminantAnalysis(),
)

def experiment(clf, session=1, tmin=TMIN, tmax=TMAX, channels=None, lowpass=30, highpass=8, get_data=True):
    channels = channels or DEFAULT_CHANNELS

    results = list()

    for i in range(1, 55):
        train_epochs, _ = OpenBMI_Dataset.load_from_filepath(
            DATASET_PATH / f"session{session}/{i}_train.edf",
            tmin=tmin,
            tmax=tmax,
            reject=False
        )
        test_epochs, _ = OpenBMI_Dataset.load_from_filepath(
            DATASET_PATH / f"session{session}/{i}_test.edf",
            tmin=tmin,
            tmax=tmax,
            reject=False
        )

        train_epochs = train_epochs.load_data().pick(channels).filter(highpass, lowpass)
        test_epochs = test_epochs.load_data().pick(channels).filter(highpass, lowpass)

        if get_data:
            x_train, x_test = train_epochs.get_data(), test_epochs.get_data()
        else:
            x_train, x_test = train_epochs, test_epochs
        y_train, y_test = train_epochs.events[:, 2], test_epochs.events[:, 2]

        clf.fit(x_train, y_train)

        train_pred = clf.predict(x_train)
        test_pred = clf.predict(x_test)

        train_acc = balanced_accuracy_score(y_train, train_pred)
        test_acc = balanced_accuracy_score(y_test, test_pred)

        results.append([i, train_acc, test_acc])

        del train_epochs, test_epochs
        
    return pd.DataFrame(results, columns=["uid", "train_acc", "test_acc"])

In [None]:
results_df = experiment(DEFAULT_CLF)
results_df.describe()

In [4]:
clf = make_pipeline(
    get_ica_instance("ext_infomax"),
    GetEpochsData(),
    CSP(n_components=len(DEFAULT_CHANNELS), log=True),
    StandardScaler(),
    LinearDiscriminantAnalysis(),
)
train_epochs, _ = OpenBMI_Dataset.load_from_filepath(
    DATASET_PATH / f"session1/1_train.edf",
    tmin=1,
    tmax=3.5,
    reject=False
)
train_epochs.load_data()
x_train = train_epochs
y_train = train_epochs.events[:, 2]

In [7]:
x = get_ica_instance("fastica").fit(train_epochs).transform(train_epochs)

In [8]:
x = GetEpochsData().fit(x).transform(x)

In [9]:
x = CSP(n_components=len(DEFAULT_CHANNELS), log=True).fit(x, y_train).transform(x)

In [10]:
x = StandardScaler().fit(x).transform(x)

In [11]:
x = LinearDiscriminantAnalysis().fit(x, y_train).predict(x)

In [13]:
(x == y_train).mean()

1.0

In [19]:
clf = make_pipeline(
#     get_ica_instance("fastica"),
    GetEpochsData(),
    CSP(n_components=len(DEFAULT_CHANNELS), log=True),
    StandardScaler(),
    LinearDiscriminantAnalysis(),
).fit(train_epochs.pick(DEFAULT_CHANNELS), y_train)
(y_train == clf.predict(train_epochs.pick(DEFAULT_CHANNELS))).mean()

0.84

In [21]:
from ica_benchmark.processing.ica import get_ica_instance
class ICASklearnWrapper():
    
    def __init__(self, ica, fit_kwargs=None):
        super(ICASklearnWrapper).__init__()
        self.ica = ica
        self.kwargs = fit_kwargs or dict()
    
    def fit(self, x, y=None):
        return self.ica.fit(x, **self.kwargs)
    
    def transform(self, x):
        return self.ica.transform(x)

results_df = experiment(
    make_pipeline(
        ICASklearnWrapper(get_ica_instance("ext_infomax")),
        GetEpochsData(),
        CSP(n_components=len(DEFAULT_CHANNELS), log=True),
        StandardScaler(),
        LinearDiscriminantAnalysis(),
    ),
    get_data=False
)
results_df.describe()

Unnamed: 0,uid,train_acc,test_acc
count,54.0,54.0,54.0
mean,27.5,0.882778,0.660185
std,15.732133,0.079915,0.173461
min,1.0,0.62,0.38
25%,14.25,0.8325,0.5225
50%,27.5,0.895,0.575
75%,40.75,0.9375,0.8125
max,54.0,1.0,0.97


In [22]:
len_size = 1000

results_df = experiment(
    make_pipeline(
        ICASklearnWrapper(get_ica_instance("ext_infomax")),
        GetEpochsData(),
        PSD(
            sfreq=train_epochs.info["sfreq"],
            n_fft=1 * len_size,
            n_overlap=len_size // 4,
            n_per_seg=1 * len_size,
            average="mean",
            window="hamming",
        ),
        ConcatenateChannelsPSD(),
        StandardScaler(),
        LinearDiscriminantAnalysis()
    ),
    get_data=False
)
results_df.describe()

Unnamed: 0,uid,train_acc,test_acc
count,54.0,54.0,54.0
mean,27.5,0.847407,0.615926
std,15.732133,0.080028,0.132088
min,1.0,0.71,0.44
25%,14.25,0.78,0.52
50%,27.5,0.85,0.56
75%,40.75,0.91,0.7025
max,54.0,1.0,0.96


In [24]:
from sklearn.svm import SVC
from sklearn.decomposition import PCA

results_df = experiment(
    make_pipeline(
        CSP(n_components=len(DEFAULT_CHANNELS), log=True),
        PCA(n_components=10),
        StandardScaler(),
        SVC(C=.1),
    ),
    get_data=True
)
results_df.describe()

Unnamed: 0,uid,train_acc,test_acc
count,54.0,54.0,54.0
mean,27.5,0.884815,0.644259
std,15.732133,0.06776,0.157727
min,1.0,0.69,0.4
25%,14.25,0.83,0.52
50%,27.5,0.895,0.595
75%,40.75,0.9275,0.7275
max,54.0,1.0,1.0


In [None]:
DEFAULT_CHANNELS