In [1]:
import mne
from mne.decoding import Scaler, Vectorizer, CSP, cross_val_multiscore
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import glob
from scipy import io, stats

In [2]:
from sklearn.model_selection import train_test_split, cross_val_score, ShuffleSplit
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.feature_selection import SelectKBest
from sklearn.base import BaseEstimator, TransformerMixin

In [3]:
def channel_selection(X, y):
    n_trials, n_channels, n_samples = X.shape
    
    # Calculate corrcoef of each channel pair for each trials
    corrcoefs = np.empty((n_trials, n_channels, n_channels))
    
    for trial in range(n_trials):
        corrcoefs[trial] = np.corrcoef(X[trial])
    
    corr = np.mean(corrcoefs, axis=0)
    
    channel_scores = np.empty(n_channels)
    for channel in range(n_channels):
        channel_scores[channel] = np.sum(corr[channel] > 0.5)
    
    median_score = np.median(channel_scores)
    
    return channel_scores > median_score

In [4]:
def feature_extraction(X):
    X_transformed = np.concatenate((
        np.mean(X, axis=-1),
        np.amax(X, axis=-1),
        np.amin(X, axis=-1),
        np.std(X, axis=-1),
        stats.skew(X, axis=-1),
        stats.kurtosis(X, axis=-1)
    ), axis=-1)  
    
    return X_transformed 

In [5]:
from skrebate import ReliefF

class ReliefF_Selector(BaseEstimator, TransformerMixin):
    def __init__(self):
        self.reliefF = ReliefF(n_neighbors=10)
        pass

    def fit(self, X, y=None):
        self.reliefF.fit(X, y)
        return self

    def transform(self, X, y=None):
        return X[:, self.reliefF.top_features_[:20]]

In [6]:
from mrmr import mrmr_classif

class MRMR_Selector(BaseEstimator, TransformerMixin):
    def __init__(self):
        pass

    def fit(self, X, y=None):
        data = pd.DataFrame(X, copy=True)
        labels = pd.Series(y, copy=True)
        self.selected_features = mrmr_classif(X=data, y=labels, K=20, show_progress=False)
        
        return self

    def transform(self, X, y=None):
        return X[:, self.selected_features]

In [7]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import chi2 

class Chi2_Selector(BaseEstimator, TransformerMixin):
    def __init__(self):
        self.chi2_selector = SelectKBest(chi2, k=20) 
        self.scaler = MinMaxScaler()
        pass

    def fit(self, X, y=None):
        self.chi2_selector.fit(self.scaler.fit_transform(X), y)
        return self

    def transform(self, X, y=None):
        return self.chi2_selector.transform(self.scaler.fit_transform(X))

In [8]:
from sklearn.feature_selection import f_classif

class ANOVA_Selector(BaseEstimator, TransformerMixin):
    def __init__(self):
        self.anova_selector = SelectKBest(f_classif, k=20) 
        pass

    def fit(self, X, y=None):
        self.anova_selector.fit(X, y)

        return self

    def transform(self, X, y=None):
        return self.anova_selector.transform(X)

In [9]:
class KruskalWallis_Selector(BaseEstimator, TransformerMixin):
    def __init__(self):
        self.p_values = []
        pass

    def fit(self, X, y=None):
        for idx in range(X.shape[1]):
            _, p_value = stats.kruskal(X[:, idx], y)
            self.p_values.append(p_value)

        self.selected_features = np.argsort(self.p_values)[:20]
        return self

    def transform(self, X, y=None):
        return X[:, self.selected_features]

In [34]:
dataset = 'A'

subjects = ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10',
            '11', '12', '13', '14', '15', '16', '17', '18', '19', '20',
            '21', '22', '23', '24', '25', '26', '27', '28', '29']

good_subjects = ['01', '02', '03', '09', '10',
                '11', '12', '13', '14', '15', '16', '17', '18', '19', '20',
                '21', '23', '24', '25', '26', '27', '28', '29']
bad_subjects = ["04", "05", "06", "07", "08", "22"]

In [37]:
scores = []

for subject in subjects:
    epochs_eeg = mne.read_epochs(f"epochs_eeg/Dataset_{dataset}/subject_{subject}_epo.fif", preload=True, verbose=False)
    # epochs_fnirs = mne.read_epochs(f"epochs_fnirs/Dataset_{dataset}/subject_{subject}_epo.fif", preload=True, verbose=False)

    epochs_eeg.crop(tmin=0, tmax=10).pick(picks=["eeg"], exclude='bads')
    # epochs_fnirs.crop(tmin=0, tmax=10).pick(picks=["hbo"], exclude='bads')

    X_eeg = epochs_eeg.get_data()
    y = epochs_eeg.events[:, -1]

    # X_fnirs = epochs_fnirs.get_data()
    # y = epochs_fnirs.events[:, -1]

    cv = ShuffleSplit(5, test_size=0.2, random_state=42)
    cv_split = cv.split(y)

    scaler = StandardScaler()
    selector = ReliefF_Selector()
    classifier = LinearDiscriminantAnalysis()

    cv_scores = []

    for train_idx, test_idx in cv_split:
        # selected_eeg_channels = np.where(channel_selection(X_eeg[train_idx], y[train_idx]))[0]
        # selected_fnirs_channels = np.where(channel_selection(X_fnirs[train_idx], y[train_idx]))[0]

        X_transformed = feature_extraction(X_eeg[:, :])
        # X_transformed = feature_extraction(X_fnirs[:, :])

        # X_transformed = np.concatenate((X_transformed_eeg, X_transformed_fnirs), axis=-1)
    
        scaler.fit(X_transformed[train_idx])
        X_transformed = scaler.transform(X_transformed)
        
        # selector.fit(X_transformed[train_idx], y[train_idx])
        # X_transformed = selector.transform(X_transformed)

        classifier.fit(X_transformed[train_idx], y[train_idx])
        y_pred = classifier.predict(X_transformed[test_idx])

        score = accuracy_score(y[test_idx], y_pred)   

        cv_scores.append(score)
    
    print(f"Subject_{subject}: {(np.mean(cv_scores)*100):.2f} ± {(np.std(cv_scores)*100):.2f} %")

    scores.append(np.mean(cv_scores))

print(f"Score: {(np.mean(scores)*100):.2f} ± {(np.std(scores)*100):.2f} %")
# np.save(f"scores/Dataset_{dataset}/WOCS_fNIRS_WOFS_LDA.npy", scores)
    

Subject_01: 66.67 ± 11.79 %
Subject_02: 63.33 ± 17.95 %
Subject_03: 58.33 ± 5.27 %
Subject_04: 41.67 ± 9.13 %
Subject_05: 48.33 ± 12.25 %
Subject_06: 48.33 ± 13.33 %
Subject_07: 51.67 ± 12.25 %
Subject_08: 43.33 ± 15.28 %
Subject_09: 76.67 ± 9.72 %
Subject_10: 56.67 ± 14.34 %
Subject_11: 66.67 ± 10.54 %
Subject_12: 56.67 ± 13.33 %
Subject_13: 68.33 ± 9.72 %
Subject_14: 68.33 ± 12.25 %
Subject_15: 60.00 ± 12.25 %
Subject_16: 80.00 ± 20.14 %
Subject_17: 61.67 ± 16.33 %
Subject_18: 60.00 ± 15.28 %
Subject_19: 90.00 ± 8.16 %
Subject_20: 63.33 ± 11.30 %
Subject_21: 65.00 ± 6.24 %
Subject_22: 46.67 ± 6.67 %
Subject_23: 80.00 ± 6.67 %
Subject_24: 60.00 ± 6.24 %
Subject_25: 78.33 ± 6.67 %
Subject_26: 63.33 ± 8.50 %
Subject_27: 86.67 ± 11.30 %
Subject_28: 63.33 ± 8.50 %
Subject_29: 56.67 ± 6.24 %
Score: 63.10 ± 12.06 %
