In [1]:
# Initialize the libs and download the dataset
!pip3 install mne -q
!pip3 install awscli
!aws s3 sync --no-sign-request s3://openneuro.org/ds003626 ds003626/



In [2]:
import mne
import warnings
import numpy as np
import pandas as pd

np.random.seed(23)

mne.set_log_level(verbose='warning') #to avoid info at terminal
warnings.filterwarnings(action = "ignore", category = DeprecationWarning )
warnings.filterwarnings(action = "ignore", category = FutureWarning )

In [3]:
# The root dir
root_dir = "./ds003626"

# Sampling rate
fs = 256

# Select the useful par of each trial. Time in seconds
t_start = 1.5
t_end = 3.5

In [4]:
from aux.pre_process import get_subjects_data_and_label

condition = "Inner"

data, labels = get_subjects_data_and_label(root_dir, condition, t_start = t_start, t_end = t_end, fs = fs)

In [5]:
print("Number of subjects", len(data))
print("Data shape: [trials x channels x samples]")
print("Shape", data[0].shape) # Trials, channels, samples

print("Labels")
print(len(labels)) # Time stamp, class , condition, session

Number of subjects 10
Data shape: [trials x channels x samples]
Shape (200, 128, 512)
Labels
10


In [6]:
data_array=np.vstack(data)
label_array=np.hstack(labels)
print(data_array.shape, label_array.shape)

(2236, 128, 512) (2236,)


In [7]:
# Define all the features
from scipy import stats

def mean(x):
    return np.mean(x, axis=-1)

def std(x):
    return np.std(x, axis=-1)

def ptp(x):
    return np.ptp(x, axis=-1)

def var(x):
    return np.var(x, axis=-1)

def minim(x):
    return np.min(x, axis=-1)

def maxim(x):
    return np.max(x, axis=-1)

def argminim(x):
    return np. argmin(x, axis=-1)

def argmaxim(x):
    return np.argmax(x,axis=-1)

def rms(x):
    return np.sqrt(np.mean(x**2, axis=-1))

def abs_diff_signal(x):
    return np.sum(np.abs(np.diff(x, axis=-1)), axis=-1)

def skewness(x):
    return stats.skew(x, axis=-1)

def kurtosis(x):
    return stats.kurtosis(x, axis=-1)

def concatenate_features(x):
    return np.concatenate((mean(x), std(x), ptp(x), var(x), minim(x), maxim(x), argminim(x),
                          argmaxim(x), rms(x), abs_diff_signal(x), skewness(x), kurtosis(x)), axis=-1)

In [8]:
features=[]
for d in data_array:
    features.append(concatenate_features(d))

In [9]:
features_array=np.array(features)
features_array.shape # 1536 / 128 = 12 features

(2236, 1536)

In [15]:
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
from sklearn import metrics, model_selection
from sklearn.metrics import roc_auc_score, recall_score, precision_score, roc_curve
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report

# This section contains the function to support the model evaluation


def run_cross_validation(classifier, x_tr, y_tr):
    k_fold = model_selection.KFold(n_splits=10)
    results = model_selection.cross_val_score(classifier, x_tr, y_tr, cv=k_fold, scoring='accuracy')
    print('{:<50} {:.4f}'.format("Cross validation average accuracy with 10-fold:", (results.mean())))

def run_accuracy(y_tst, y_p):
    print('{:<50} {:.4f}'.format("Accuracy", (metrics.accuracy_score(y_tst, y_p))))

def plot_confusion_matrix(y_tst, y_pred, y_labels):
    lbs = y_labels.unique()
    confusion_matrix = metrics.confusion_matrix(y_tst, y_pred)
    matrix_df = pd.DataFrame(confusion_matrix)
    ax = plt.axes()
    sns.set(font_scale=1.3)
    plt.figure(figsize=(10,7))
    sns.heatmap(matrix_df, annot=True, fmt="g", ax=ax, cmap="magma")
    ax.set_title('Confusion Matrix')
    ax.set_xlabel("Predicted label", fontsize =15)
    ax.set_ylabel("True Label", fontsize=15)
    ax.set_yticklabels(list(lbs), rotation = 0)
    plt.show()

# Extract importance
def print_importance(classifier, x_tr):
    importance = pd.DataFrame({'feature': x_tr.columns, 'importance' : np.round(classifier.feature_importances_, 3)})
    importance.sort_values('importance', ascending=False, inplace = True)
    print(importance)

In [28]:
# Random Forest
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

def run_random_forest():
    print("Random Forest")
    # Split dataset into training set and test set
    x_train, x_test, y_train, y_test = train_test_split(features_array, label_array, test_size=0.30) # 70% test and 30% training
    clf = RandomForestClassifier(n_estimators=100)

    # Train the model
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)

    run_accuracy(y_test, y_pred)
    run_cross_validation(clf, x_train, y_train)
    print(classification_report(y_test, y_pred))
    evaluate_model(clf, x_train, x_test, y_train, y_test)

run_random_forest()

Random Forest
Accuracy                                           0.2668
Cross validation average accuracy with 10-fold:    0.2441
              precision    recall  f1-score   support

           0       0.26      0.21      0.23       183
           1       0.32      0.32      0.32       169
           2       0.23      0.30      0.26       156
           3       0.26      0.25      0.25       163

    accuracy                           0.27       671
   macro avg       0.27      0.27      0.27       671
weighted avg       0.27      0.27      0.27       671



  _warn_prf(average, modifier, msg_start, len(result))


AxisError: axis 1 is out of bounds for array of dimension 1

In [23]:
# Neural Network
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report

def run_neural_network():
    print("Neural Network")
    # Split dataset into training set and test set
    x_train, x_test, y_train, y_test = train_test_split(features_array, label_array, test_size=0.20) # 80% test and 20% training
    clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1)

    # Train the model
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)

    run_accuracy(y_test, y_pred)
    run_cross_validation(clf, x_train, y_train)
    print(classification_report(y_test, y_pred))
    #plot_confusion_matrix(y_test, y_pred, label_array)

run_neural_network()

Neural Network
Accuracy                                           0.2210
Cross validation average accuracy with 10-fold:    0.2562
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       117
           1       0.00      0.00      0.00       125
           2       0.00      0.00      0.00       107
           3       0.22      1.00      0.36        99

    accuracy                           0.22       448
   macro avg       0.06      0.25      0.09       448
weighted avg       0.05      0.22      0.08       448



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [22]:
# Multiclass Random Forest
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.multiclass import OneVsRestClassifier

def run_random_forest_multi_class():
    print("Random Forest")
    # Split dataset into training set and test set
    x_train, x_test, y_train, y_test = train_test_split(features_array, label_array, test_size=0.20) # 80% test and 20% training
    rf = RandomForestClassifier(random_state=42, max_features='auto', n_estimators= 200, max_depth=8, criterion='gini')
    clf = OneVsRestClassifier(rf)

    # Train the model
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)

    run_accuracy(y_test, y_pred)
    run_cross_validation(clf, x_train, y_train)
    print(classification_report(y_test, y_pred))
    #print_importance(clf, x_train)
    #plot_confusion_matrix(y_test, y_pred)

run_random_forest_multi_class()

Random Forest
Accuracy                                           0.2522
Cross validation average accuracy with 10-fold:    0.2344
              precision    recall  f1-score   support

           0       0.37      0.17      0.24       126
           1       0.27      0.33      0.30       111
           2       0.20      0.24      0.21       101
           3       0.23      0.27      0.25       110

    accuracy                           0.25       448
   macro avg       0.27      0.25      0.25       448
weighted avg       0.27      0.25      0.25       448



In [21]:
# Random Forest
from sklearn.ensemble import RandomForestClassifier

def run_random_forest(X, y):
    print("Random Forest")
    # Split dataset into training set and test set
    x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.20) # 80% test and 20% training
    clf = RandomForestClassifier(random_state=42, max_features='auto', n_estimators= 200, max_depth=8, criterion='gini')

    # Train the model
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)

    run_accuracy(y_test, y_pred)
    run_cross_validation(clf, x_train, y_train)
    print(classification_report(y_test, y_pred))
    #print_importance(clf, x_train)
    #plot_confusion_matrix(y_test, y_pred)

run_random_forest(features_array, label_array)

Random Forest
Accuracy                                           0.2165
Cross validation average accuracy with 10-fold:    0.2321
              precision    recall  f1-score   support

           0       0.24      0.16      0.19       122
           1       0.24      0.27      0.25       102
           2       0.20      0.19      0.19       117
           3       0.20      0.26      0.23       107

    accuracy                           0.22       448
   macro avg       0.22      0.22      0.22       448
weighted avg       0.22      0.22      0.21       448



In [None]:
from sklearn.svm import LinearSVC
from sklearn.multiclass import OneVsRestClassifier
from sklearn.model_selection import train_test_split

def run_linear_svc_multi_class():
    print("Linear SVC")
    # Split dataset into training set and test set
    x_train, x_test, y_train, y_test = train_test_split(features_array, label_array, test_size=0.30) # 70% test and 30% training
    rf = LinearSVC(random_state=0, max_iter=10000)
    clf = OneVsRestClassifier(rf)

    # Train the model
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)

    run_accuracy(y_test, y_pred)
    run_cross_validation(clf, x_train, y_train)
    print(classification_report(y_test, y_pred))

run_linear_svc_multi_class()

Linear SVC




Accuracy                                           0.2742


