In [1]:
import mne
import warnings
import numpy as np
import pandas as pd

np.random.seed(23)

mne.set_log_level(verbose='warning') #to avoid info at terminal
warnings.filterwarnings(action = "ignore", category = DeprecationWarning )
warnings.filterwarnings(action = "ignore", category = FutureWarning )

In [2]:
# The root dir
root_dir = "./ds003626"

# Sampling rate
fs = 256

# Select the useful par of each trial. Time in seconds
t_start = 1.5
t_end = 3.5

In [3]:
import mne
mne.utils.set_config('MNE_USE_CUDA', 'true')
mne.cuda.init_cuda(verbose=True)

Now using CUDA device 0
Enabling CUDA with 9.24 GB available memory


In [5]:
from aux.pre_process import get_subjects_data_and_label, get_subjects_data_and_label2

condition = "Inner"

data, labels = get_subjects_data_and_label2(root_dir, condition, t_start = t_start, t_end = t_end, fs = fs)

100%|██████████| 200/200 [03:31<00:00,  1.06s/it]
100%|██████████| 200/200 [03:30<00:00,  1.05s/it]
100%|██████████| 100/100 [01:45<00:00,  1.05s/it]
100%|██████████| 200/200 [03:29<00:00,  1.05s/it]
100%|██████████| 200/200 [03:30<00:00,  1.05s/it]
100%|██████████| 200/200 [03:29<00:00,  1.05s/it]
100%|██████████| 200/200 [03:29<00:00,  1.05s/it]
100%|██████████| 200/200 [03:30<00:00,  1.05s/it]
100%|██████████| 100/100 [01:45<00:00,  1.05s/it]
100%|██████████| 200/200 [03:29<00:00,  1.05s/it]
100%|██████████| 200/200 [03:29<00:00,  1.05s/it]
100%|██████████| 200/200 [03:30<00:00,  1.05s/it]
100%|██████████| 200/200 [03:33<00:00,  1.07s/it]
100%|██████████| 200/200 [03:33<00:00,  1.07s/it]
100%|██████████| 200/200 [03:32<00:00,  1.06s/it]
100%|██████████| 200/200 [03:34<00:00,  1.07s/it]
100%|██████████| 200/200 [03:35<00:00,  1.08s/it]
100%|██████████| 140/140 [02:30<00:00,  1.07s/it]
100%|██████████| 200/200 [03:34<00:00,  1.07s/it]
100%|██████████| 200/200 [03:34<00:00,  1.07s/it]


In [8]:
print("Number of subjects", len(data))
print("Data shape: [trials x channels x samples]")
print("Shape", data[0].shape) # Trials, channels, samples

print("Labels")
print(len(labels)) # Time stamp, class , condition, session

Number of subjects 10
Data shape: [trials x channels x samples]
Shape (200, 27648)
Labels
10


In [15]:
data_array=np.vstack(data)
label_array=np.hstack(labels)
features_array=data_array
print(data_array.shape, label_array.shape)

(2236, 27648) (2236,)


In [16]:
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
from sklearn import metrics, model_selection
from sklearn.metrics import roc_auc_score, recall_score, precision_score, roc_curve
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report

# This section contains the function to support the model evaluation


def run_cross_validation(classifier, x_tr, y_tr):
    k_fold = model_selection.KFold(n_splits=10)
    results = model_selection.cross_val_score(classifier, x_tr, y_tr, cv=k_fold, scoring='accuracy')
    print('{:<50} {:.4f}'.format("Cross validation average accuracy with 10-fold:", (results.mean())))

def run_accuracy(y_tst, y_p):
    print('{:<50} {:.4f}'.format("Accuracy", (metrics.accuracy_score(y_tst, y_p))))

def plot_confusion_matrix(y_tst, y_pred, y_labels):
    lbs = y_labels.unique()
    confusion_matrix = metrics.confusion_matrix(y_tst, y_pred)
    matrix_df = pd.DataFrame(confusion_matrix)
    ax = plt.axes()
    sns.set(font_scale=1.3)
    plt.figure(figsize=(10,7))
    sns.heatmap(matrix_df, annot=True, fmt="g", ax=ax, cmap="magma")
    ax.set_title('Confusion Matrix')
    ax.set_xlabel("Predicted label", fontsize =15)
    ax.set_ylabel("True Label", fontsize=15)
    ax.set_yticklabels(list(lbs), rotation = 0)
    plt.show()

# Extract importance
def print_importance(classifier, x_tr):
    importance = pd.DataFrame({'feature': x_tr.columns, 'importance' : np.round(classifier.feature_importances_, 3)})
    importance.sort_values('importance', ascending=False, inplace = True)
    print(importance)

In [17]:
# Random Forest
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

def run_random_forest():
    print("Random Forest")
    # Split dataset into training set and test set
    x_train, x_test, y_train, y_test = train_test_split(features_array, label_array, test_size=0.30) # 70% test and 30% training
    clf = RandomForestClassifier(n_estimators=100)

    # Train the model
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)

    run_accuracy(y_test, y_pred)
    run_cross_validation(clf, x_train, y_train)
    print(classification_report(y_test, y_pred))

run_random_forest()

Random Forest
Accuracy                                           0.2355
Cross validation average accuracy with 10-fold:    0.2249
              precision    recall  f1-score   support

           0       0.30      0.20      0.24       183
           1       0.25      0.25      0.25       169
           2       0.22      0.29      0.25       156
           3       0.20      0.21      0.20       163

    accuracy                           0.24       671
   macro avg       0.24      0.24      0.24       671
weighted avg       0.24      0.24      0.24       671



In [18]:
# Neural Network
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report

def run_neural_network():
    print("Neural Network")
    # Split dataset into training set and test set
    x_train, x_test, y_train, y_test = train_test_split(features_array, label_array, test_size=0.20) # 80% test and 20% training
    clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1)

    # Train the model
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)

    run_accuracy(y_test, y_pred)
    run_cross_validation(clf, x_train, y_train)
    print(classification_report(y_test, y_pred))
    #plot_confusion_matrix(y_test, y_pred, label_array)

run_neural_network()

Neural Network
Accuracy                                           0.2165


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Cross validation average accuracy with 10-fold:    0.2344
              precision    recall  f1-score   support

           0       0.18      0.03      0.05       137
           1       0.00      0.00      0.00       103
           2       0.00      0.00      0.00       106
           3       0.22      0.91      0.35       102

    accuracy                           0.22       448
   macro avg       0.10      0.24      0.10       448
weighted avg       0.11      0.22      0.10       448



In [19]:
# Multiclass Random Forest
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.multiclass import OneVsRestClassifier

def run_random_forest_multi_class():
    print("Random Forest")
    # Split dataset into training set and test set
    x_train, x_test, y_train, y_test = train_test_split(features_array, label_array, test_size=0.20) # 80% test and 20% training
    rf = RandomForestClassifier(random_state=42, max_features='auto', n_estimators= 200, max_depth=8, criterion='gini')
    clf = OneVsRestClassifier(rf)

    # Train the model
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)

    run_accuracy(y_test, y_pred)
    run_cross_validation(clf, x_train, y_train)
    print(classification_report(y_test, y_pred))
    #print_importance(clf, x_train)
    #plot_confusion_matrix(y_test, y_pred)

run_random_forest_multi_class()

Random Forest
Accuracy                                           0.2254
Cross validation average accuracy with 10-fold:    0.2137
              precision    recall  f1-score   support

           0       0.25      0.21      0.23       112
           1       0.36      0.15      0.21       134
           2       0.18      0.24      0.20       102
           3       0.20      0.34      0.25       100

    accuracy                           0.23       448
   macro avg       0.25      0.23      0.22       448
weighted avg       0.26      0.23      0.22       448



In [20]:
# Random Forest
from sklearn.ensemble import RandomForestClassifier

def run_random_forest(X, y):
    print("Random Forest")
    # Split dataset into training set and test set
    x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.20) # 80% test and 20% training
    clf = RandomForestClassifier(random_state=42, max_features='auto', n_estimators= 200, max_depth=8, criterion='gini')

    # Train the model
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)

    run_accuracy(y_test, y_pred)
    run_cross_validation(clf, x_train, y_train)
    print(classification_report(y_test, y_pred))
    #print_importance(clf, x_train)
    #plot_confusion_matrix(y_test, y_pred)

run_random_forest(features_array, label_array)

Random Forest
Accuracy                                           0.2634
Cross validation average accuracy with 10-fold:    0.2209
              precision    recall  f1-score   support

           0       0.28      0.21      0.24       114
           1       0.23      0.30      0.26       103
           2       0.30      0.21      0.25       121
           3       0.26      0.34      0.30       110

    accuracy                           0.26       448
   macro avg       0.27      0.27      0.26       448
weighted avg       0.27      0.26      0.26       448



In [None]:
from sklearn.svm import LinearSVC
from sklearn.multiclass import OneVsRestClassifier
from sklearn.model_selection import train_test_split

def run_linear_svc_multi_class():
    print("Linear SVC")
    # Split dataset into training set and test set
    x_train, x_test, y_train, y_test = train_test_split(features_array, label_array, test_size=0.20) # 80% test and 20% training
    rf = LinearSVC(random_state=0, max_iter=10000)
    clf = OneVsRestClassifier(rf)

    # Train the model
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)

    run_accuracy(y_test, y_pred)
    run_cross_validation(clf, x_train, y_train)
    print(classification_report(y_test, y_pred))

run_linear_svc_multi_class()

Linear SVC




Accuracy                                           0.2746


In [None]:
import tensorflow as tf
from matplotlib import rcParams
tf.random.set_seed(42)

def plot_result(history):
    rcParams['figure.figsize'] = (18, 8)
    rcParams['axes.spines.top'] = False
    rcParams['axes.spines.right'] = False

    plt.plot(
    np.arange(1, 101),
        history.history['loss'], label='Loss'
    )
    plt.plot(
        np.arange(1, 101),
        history.history['accuracy'], label='Accuracy'
    )
    plt.plot(
        np.arange(1, 101),
        history.history['precision'], label='Precision'
    )
    plt.plot(
        np.arange(1, 101),
        history.history['recall'], label='Recall'
    )
    plt.title('Evaluation metrics', size=20)
    plt.xlabel('Epoch', size=14)
    plt.legend()

def run_prediction(model, x_test):
    predictions = model.predict(x_test)

def run_tensor_flow():
    x_train, x_test, y_train, y_test = train_test_split(features_array, label_array, test_size=0.20) # 80% test and 20% training
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])

    model.compile(
        loss=tf.keras.losses.binary_crossentropy,
        optimizer=tf.keras.optimizers.Adam(lr=0.03),
        metrics=[
            tf.keras.metrics.BinaryAccuracy(name='accuracy'),
            tf.keras.metrics.Precision(name='precision'),
            tf.keras.metrics.Recall(name='recall')
        ]
    )
    history = model.fit(x_train, y_train, epochs=100)

    plot_result(history)
    run_prediction(model, x_test)

run_tensor_flow()