In [1]:
# Init libraries
import warnings
import mne
import numpy as np
from sklearn.exceptions import ConvergenceWarning

np.random.seed(23)
mne.set_log_level(verbose='warning') #to avoid info at terminal
warnings.filterwarnings(action = "ignore", category = DeprecationWarning )
warnings.filterwarnings(action = "ignore", category = FutureWarning )
warnings.filterwarnings(action = "ignore", category = ConvergenceWarning )

In [2]:
# Project defaults
# The root dir
root_dir = "./ds003626"

# Sampling rate
fs = 256

# Select the useful par of each trial. Time in seconds
t_start = 1.5
t_end = 3.5

In [3]:
# Load dataset
from aux.pre_process import get_subjects_data_and_label

condition = "Inner"
data, labels = get_subjects_data_and_label(root_dir, condition, t_start = t_start, t_end = t_end, fs = fs)
data_array=np.vstack(data)
label_array=np.hstack(labels)

In [4]:
from scipy import integrate
# Define all the features
from scipy import stats
import antropy as ant

def mean(x):
    return np.mean(x, axis=-1)

def std(x):
    return np.std(x, axis=-1)

def ptp(x):
    return np.ptp(x, axis=-1)

def var(x):
    return np.var(x, axis=-1)

def minim(x):
    return np.min(x, axis=-1)

def maxim(x):
    return np.max(x, axis=-1)

def argminim(x):
    return np. argmin(x, axis=-1)

def argmaxim(x):
    return np.argmax(x,axis=-1)

def rms(x):
    return np.sqrt(np.mean(x**2, axis=-1))

def abs_diff_signal(x):
    return np.sum(np.abs(np.diff(x, axis=-1)), axis=-1)

def skewness(x):
    return stats.skew(x, axis=-1)

def kurtosis(x):
    return stats.kurtosis(x, axis=-1)

def f_minplusmax(x):
    return np.max(x, axis=-1) + np.min(x, axis=-1)

def f_maxminusmin(x):
    return np.max(x, axis=-1) - np.min(x, axis=-1)

def f_spec_entropy(x):
    return ant.spectral_entropy(x, fs, method="welch", normalize=True, axis=-1)

def f_integral(x):
    return integrate.simps(x, axis=-1)

def f_petrosian(x):
    return ant.petrosian_fd(x, axis=-1)

def f_katz(x):
    return ant.katz_fd(x, axis=-1)

def concatenate_features(x):
    # Uncomment the desired line to add the feature
    return np.concatenate((
        mean(x),
        std(x),
        ptp(x),
        var(x),
        minim(x),
        maxim(x),
        argminim(x),
        argmaxim(x),
        rms(x),
        abs_diff_signal(x),
        skewness(x),
        kurtosis(x),
        # f_minplusmax(x),
        # f_maxminusmin(x),
        # f_spec_entropy(x),
        # f_integral(x),
        # f_katz(x),
        # f_petrosian(x),
    ), axis=-1)

In [5]:
features=[]
for d in data_array:
    features.append(concatenate_features(d))
features_array=np.array(features)

In [None]:
#search for best hyperparameters and do inner cross validation

In [13]:
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import StratifiedKFold, cross_val_score, GridSearchCV


In [14]:
#from sklearn.multiclass import OneVsRestClassifier
#from sklearn.svm import LinearSVC
#from sklearn.neural_network import MLPClassifier
#from sklearn.ensemble import RandomForestClassifier

random_state = 42
splits = [0.10, 0.20, 0.30]

classifiers = [
    ["Random Forest", RandomForestClassifier(random_state=random_state, max_features='auto', n_estimators= 200, max_depth=8, criterion='gini')],
    ["Random Forest - OneVsRestClassifier", OneVsRestClassifier(RandomForestClassifier(random_state=random_state, max_features='auto', n_estimators= 200, max_depth=8, criterion='gini'))],
    ["Neural Network", MLPClassifier(random_state=random_state)],
    ["Linear SVC", LinearSVC(random_state=random_state, max_iter=10000)],
]

In [15]:
#from sklearn.model_selection import StratifiedKFold, cross_val_score
def run_cross_validation(classifier, x_tr, y_tr):
    # Changed to use StratifiedKFold
    outer_cv = StratifiedKFold(n_splits=3)
    results = cross_val_score(classifier, x_tr, y_tr, cv=outer_cv, scoring='accuracy')
    return results.mean()

In [16]:
from sklearn.svm import SVC
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.feature_selection import SelectFromModel, SelectKBest, chi2
from sklearn import metrics, model_selection
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# Run the experiment

# Prepare the data
X = features_array
y = label_array
X = MinMaxScaler().fit_transform(X)

# Feature Selection - There are other parameters we could set for Feature Selection
print("Old shape: ", X.shape)

# Select one fs here
fs = LinearSVC(C=0.01, penalty="l2", dual=False).fit(X, y)
# fs = SVC(kernel="linear").fit(X, y)
# fs = ExtraTreesClassifier(n_estimators=50).fit(X, y)

model = SelectFromModel(fs, prefit=True)
X = model.transform(X)

# or use this one
# X = SelectKBest(chi2, k=100).fit_transform(X, y)

print("New shape: ", X.shape)



Old shape:  (2236, 1536)
New shape:  (2236, 549)


In [29]:
# Random Forest

param_grid_rf = {
    'criterion' : ['gini', 'entropy'],
    'n_estimators' : [100, 150, 200],
    'max_depth' : [None, 1, 3, 5, 10],
    'min_samples_split' : [5, 10],
    'min_samples_leaf' : [5, 10]
}

random_state = 42
inner_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=random_state)

model_to_tune = RandomForestClassifier()
randomForest = GridSearchCV(estimator=model_to_tune, param_grid=param_grid_rf, cv=inner_cv)
randomForest.fit(X,y)
print(f"The best parameters found are: {randomForest.best_params_}")
print(f"The mean CV score of the best model is: {randomForest.best_score_:.3f}")

# This can be used to name the classifier with all the right parameters already in it
bestRandomForest = randomForest.best_estimator_

The best parameters found are: {'criterion': 'entropy', 'max_depth': 10, 'min_samples_leaf': 10, 'min_samples_split': 5, 'n_estimators': 100}
The mean CV score of the best model is: 0.265


In [26]:
# Random Forest one vs rest doesn't work in a Grid Search
# Leaving the code as part of documentation

param_grid_rf = {
    'criterion' : ['gini', 'entropy'],
    'n_estimators' : [100, 150, 200],
    'max_depth' : [None, 1, 3, 5, 10],
    'min_samples_split' : [5, 10],
    'min_samples_leaf' : [5, 10]
}

model_to_tune = RandomForestClassifier()
rf = GridSearchCV(estimator=model_to_tune, param_grid=param_grid_rf, cv=inner_cv)
model = OneVsRestClassifier(rf)
model.fit(X,y)
print(f"The best parameters found are: {model.best_params_}")
print(f"The mean CV score of the best model is: {model.best_score_:.3f}")

AttributeError: 'OneVsRestClassifier' object has no attribute 'best_params_'

In [30]:
#Linear SVC
from sklearn.pipeline import Pipeline
from sklearn.svm import LinearSVC
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler

SVCpipe = Pipeline([('scale', StandardScaler()),
                   ('SVC',LinearSVC())])

# Gridsearch to determine the value of C
param_grid_lSVC = {
    'SVC__C':np.arange(0.01,100,10)
}
linearSVC = GridSearchCV(SVCpipe,param_grid_lSVC,cv=inner_cv,return_train_score=True)
linearSVC.fit(X,y)
print(f"The best parameters found are: {linearSVC.best_params_}")
print(f"The mean CV score of the best model is: {linearSVC.best_score_:.3f}")

# This can be used to name the classifier with all the right parameters already in it
bestlinearSVC = linearSVC.best_estimator_
#bestlinearSVC.fit(X_train,y_train)
#bestlinearSVC.coef_ = bestlinearSVC.named_steps['SVC'].coef_
#bestlinearSVC.score(X_train,y_train)

The best parameters found are: {'SVC__C': 10.01}
The mean CV score of the best model is: 0.284


In [None]:
# Neural networks

param_grid_nn = {'solver': ['lbfgs'], 'max_iter': [1000,1100,1200,1300,1400,1500,1600,1700,1800,1900,2000 ], 'alpha': 10.0 ** -np.arange(1, 10), 'hidden_layer_sizes':np.arange(10, 15), 'random_state':[0,1,2,3,4,5,6,7,8,9]}
nn = GridSearchCV(MLPClassifier(), param_grid_nn, n_jobs=-1)
nn.fit(X,y)
print(f"The best parameters found are: {nn.best_params_}")
print(f"The mean CV score of the best model is: {nn.best_score_:.3f}")

# This can be used to name the classifier with all the right parameters already in it
bestnn = nn.best_estimator_

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("

In [None]:
for test_size in splits:
    print("\nSplit: Train:{}% Test:{}%".format(100 - (test_size * 100), test_size * 100))
    print('{:<40} {:<20} {:<15}'.format("Classifier", "Accuracy", "Cross validation"))

    # Stratify guarantees that the same proportion of the classes will be available in train and test
    x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=test_size, stratify=y)

    for cls in classifiers:
        cls[1].fit(x_train, y_train)
        y_pred = cls[1].predict(x_test)
        accuracy = metrics.accuracy_score(y_test, y_pred)
        cross_v = run_cross_validation(cls[1], x_train, y_train)
        print('{:<40} {:<20} {: