In [4]:
import os
from os import listdir
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from mne.decoding import CSP
from sklearn.base import BaseEstimator, TransformerMixin
from scipy.signal import butter, lfilter

# all subjects csv files folder

DATA_FOLDER = "output"

# subject wise performance result csv

SUBJECT_WISE_PERFORMANCE_METRIC_CSV_Filter = "csp_lda_subject_performance_metric_Filter.csv"


# list all the subject wise csv files
def find_csv_filenames(path_to_dir, suffix=".csv"):
    files = listdir(path_to_dir)
    return [os.path.join(path_to_dir, files) for files in files if files.endswith(suffix)]



# load dataframe from csv
def load_data(filename):
    # read csv file
    df = pd.read_csv(filename)
    return df


# Pre-process data with Lowerpass bank filter and model building

class FilterBank(BaseEstimator, TransformerMixin):

# obtained from https://www.kaggle.com/eilbeigi/visual/data
# author: fornax, alexandre

    """Filterbank TransformerMixin.
    Return signal processed by a bank of butterworth filters.
    """

    def __init__(self, filters='LowpassBank'):
        """init."""
        if filters == 'LowpassBank':
            self.freqs_pairs = [[0.5], [1], [2], [3], [4], [5], [7], [9], [15],
                                [30]]
        else:
            self.freqs_pairs = filters
        self.filters = filters


    def transform(self, X, y=None):
        """Transform. Apply filters."""
        X_tot = None
        for freqs in self.freqs_pairs:
            if len(freqs) == 1:
                b, a = butter(5, freqs[0] / 250.0, btype='lowpass')
            else:
                if freqs[1] - freqs[0] < 3:
                    b, a = butter(3, np.array(freqs) / 250.0, btype='bandpass')
                else:
                    b, a = butter(5, np.array(freqs) / 250.0, btype='bandpass')
            X_filtered = lfilter(b, a, X, axis=0)
            X_tot = X_filtered if X_tot is None else np.c_[X_tot, X_filtered]

        return X_tot

def preprocessData(data):
    """Preprocess data with filterbank."""
    fb = FilterBank()
    return fb.transform(data)


#initializing the model

if __name__ == '__main__':
    filenames = find_csv_filenames(DATA_FOLDER)

    for name in filenames:
        extracted_filename = os.path.basename(name)
        # split the filename by first underscore
        lhs, rhs = extracted_filename.split("_", 1)

        subject_df = load_data(name)

        # pop out label from dataframe
        y = subject_df.pop("class")

        # pre-processing data using Lower pass filter
        X = preprocessData(subject_df.values)
        print(X.shape)

        # data splitting into training & testing dataset
        x_tr, x_ts, y_tr, y_ts = train_test_split(X, y.values, test_size=0.2, shuffle=True)

        # Reshape and normalize training data
        x_tr = x_tr.reshape(x_tr.shape[0], 8, 20)
        x_tr = x_tr / 255.0

        x_ts = x_ts.reshape(x_ts.shape[0], 8, 20)
        x_ts = x_ts / 255.0

        # Common Spatial Pattern (CSP)- feature extraction technique
        csp = CSP(n_components=3)

        print(x_tr.shape, y_tr.shape)

        x_tr = csp.fit_transform(x_tr, y_tr)
        x_ts = csp.transform(x_ts)

        # Assemble a classifier
        lda = LinearDiscriminantAnalysis()

        # fit classifier
        lda.fit(x_tr, y_tr)

        train_accuracy = lda.score(x_tr, y_tr)
        test_accuracy = lda.score(x_ts, y_ts)

        print("TRAIN ACCURACY SCORE: ", train_accuracy)
        print("TEST ACCURACY SCORE: ", test_accuracy)

        test_pred= lda.predict(x_ts)

(120120, 160)
(96096, 8, 20) (96096,)
Computing rank from data with rank=None
    Using tolerance 0.083 (2.2e-16 eps * 8 dim * 4.7e+13  max singular value)
    Estimated rank (mag): 8
    MAG: rank 8 computed from 8 data channels with 0 projectors
Reducing data rank from 8 -> 8
Estimating covariance using EMPIRICAL
Done.
Computing rank from data with rank=None
    Using tolerance 0.093 (2.2e-16 eps * 8 dim * 5.2e+13  max singular value)
    Estimated rank (mag): 8
    MAG: rank 8 computed from 8 data channels with 0 projectors
Reducing data rank from 8 -> 8
Estimating covariance using EMPIRICAL
Done.
TRAIN ACCURACY SCORE:  0.512747668997669
TEST ACCURACY SCORE:  0.5201881451881452
(120120, 160)
(96096, 8, 20) (96096,)
Computing rank from data with rank=None
    Using tolerance 0.11 (2.2e-16 eps * 8 dim * 6.2e+13  max singular value)
    Estimated rank (mag): 8
    MAG: rank 8 computed from 8 data channels with 0 projectors
Reducing data rank from 8 -> 8
Estimating covariance using EMPI

(120120, 160)
(96096, 8, 20) (96096,)
Computing rank from data with rank=None
    Using tolerance 0.17 (2.2e-16 eps * 8 dim * 9.5e+13  max singular value)
    Estimated rank (mag): 8
    MAG: rank 8 computed from 8 data channels with 0 projectors
Reducing data rank from 8 -> 8
Estimating covariance using EMPIRICAL
Done.
Computing rank from data with rank=None
    Using tolerance 0.18 (2.2e-16 eps * 8 dim * 1e+14  max singular value)
    Estimated rank (mag): 8
    MAG: rank 8 computed from 8 data channels with 0 projectors
Reducing data rank from 8 -> 8
Estimating covariance using EMPIRICAL
Done.
TRAIN ACCURACY SCORE:  0.5300740925740925
TEST ACCURACY SCORE:  0.53005328005328
(120120, 160)
(96096, 8, 20) (96096,)
Computing rank from data with rank=None
    Using tolerance 1.4 (2.2e-16 eps * 8 dim * 7.9e+14  max singular value)
    Estimated rank (mag): 8
    MAG: rank 8 computed from 8 data channels with 0 projectors
Reducing data rank from 8 -> 8
Estimating covariance using EMPIRICAL


(120120, 160)
(96096, 8, 20) (96096,)
Computing rank from data with rank=None
    Using tolerance 0.11 (2.2e-16 eps * 8 dim * 6.2e+13  max singular value)
    Estimated rank (mag): 8
    MAG: rank 8 computed from 8 data channels with 0 projectors
Reducing data rank from 8 -> 8
Estimating covariance using EMPIRICAL
Done.
Computing rank from data with rank=None
    Using tolerance 0.1 (2.2e-16 eps * 8 dim * 5.8e+13  max singular value)
    Estimated rank (mag): 8
    MAG: rank 8 computed from 8 data channels with 0 projectors
Reducing data rank from 8 -> 8
Estimating covariance using EMPIRICAL
Done.
TRAIN ACCURACY SCORE:  0.5190122377622378
TEST ACCURACY SCORE:  0.5192307692307693
(120120, 160)
(96096, 8, 20) (96096,)
Computing rank from data with rank=None
    Using tolerance 0.089 (2.2e-16 eps * 8 dim * 5e+13  max singular value)
    Estimated rank (mag): 8
    MAG: rank 8 computed from 8 data channels with 0 projectors
Reducing data rank from 8 -> 8
Estimating covariance using EMPIRIC

(120120, 160)
(96096, 8, 20) (96096,)
Computing rank from data with rank=None
    Using tolerance 0.14 (2.2e-16 eps * 8 dim * 7.9e+13  max singular value)
    Estimated rank (mag): 8
    MAG: rank 8 computed from 8 data channels with 0 projectors
Reducing data rank from 8 -> 8
Estimating covariance using EMPIRICAL
Done.
Computing rank from data with rank=None
    Using tolerance 0.14 (2.2e-16 eps * 8 dim * 8e+13  max singular value)
    Estimated rank (mag): 8
    MAG: rank 8 computed from 8 data channels with 0 projectors
Reducing data rank from 8 -> 8
Estimating covariance using EMPIRICAL
Done.
TRAIN ACCURACY SCORE:  0.5384719447219447
TEST ACCURACY SCORE:  0.5441641691641692
(120120, 160)
(96096, 8, 20) (96096,)
Computing rank from data with rank=None
    Using tolerance 0.079 (2.2e-16 eps * 8 dim * 4.4e+13  max singular value)
    Estimated rank (mag): 8
    MAG: rank 8 computed from 8 data channels with 0 projectors
Reducing data rank from 8 -> 8
Estimating covariance using EMPIRI

(120120, 160)
(96096, 8, 20) (96096,)
Computing rank from data with rank=None
    Using tolerance 0.34 (2.2e-16 eps * 8 dim * 1.9e+14  max singular value)
    Estimated rank (mag): 8
    MAG: rank 8 computed from 8 data channels with 0 projectors
Reducing data rank from 8 -> 8
Estimating covariance using EMPIRICAL
Done.
Computing rank from data with rank=None
    Using tolerance 0.082 (2.2e-16 eps * 8 dim * 4.6e+13  max singular value)
    Estimated rank (mag): 8
    MAG: rank 8 computed from 8 data channels with 0 projectors
Reducing data rank from 8 -> 8
Estimating covariance using EMPIRICAL
Done.
TRAIN ACCURACY SCORE:  0.4960872460872461
TEST ACCURACY SCORE:  0.4926739926739927
(120120, 160)
(96096, 8, 20) (96096,)
Computing rank from data with rank=None
    Using tolerance 0.59 (2.2e-16 eps * 8 dim * 3.3e+14  max singular value)
    Estimated rank (mag): 8
    MAG: rank 8 computed from 8 data channels with 0 projectors
Reducing data rank from 8 -> 8
Estimating covariance using EMPI

In [5]:
from joblib import dump, load
dump(lda, 'tetris_model.joblib')

['tetris_model.joblib']