In [1]:
# Init libraries
import warnings
import mne
import numpy as np
from sklearn.exceptions import ConvergenceWarning

np.random.seed(23)
mne.set_log_level(verbose='warning') #to avoid info at terminal
warnings.filterwarnings(action = "ignore", category = DeprecationWarning )
warnings.filterwarnings(action = "ignore", category = FutureWarning )
warnings.filterwarnings(action = "ignore", category = ConvergenceWarning )

In [2]:
# Project defaults
# The root dir
root_dir = "./ds003626"

# Sampling rate
fs = 256

# Select the useful par of each trial. Time in seconds
t_start = 1.5
t_end = 3.5

In [3]:
# Load dataset
from aux.pre_process import get_subjects_data_and_label, get_subjects_data_label_group

condition = "Inner"
data, labels, groups = get_subjects_data_label_group(root_dir, condition, t_start = t_start, t_end = t_end, fs = fs)
len(data), len(labels), len(groups)

(10, 10, 10)

In [4]:
data_array=np.vstack(data)
label_array=np.hstack(labels)
group_array=np.hstack(groups)

In [5]:
data_array.shape, label_array.shape, group_array.shape

((2236, 128, 512), (2236,), (2236,))

In [6]:
from scipy import integrate
# Define all the features
from scipy import stats
import antropy as ant

def mean(x):
    return np.mean(x, axis=-1)

def std(x):
    return np.std(x, axis=-1)

def ptp(x):
    return np.ptp(x, axis=-1)

def var(x):
    return np.var(x, axis=-1)

def minim(x):
    return np.min(x, axis=-1)

def maxim(x):
    return np.max(x, axis=-1)

def argminim(x):
    return np. argmin(x, axis=-1)

def argmaxim(x):
    return np.argmax(x,axis=-1)

def rms(x):
    return np.sqrt(np.mean(x**2, axis=-1))

def abs_diff_signal(x):
    return np.sum(np.abs(np.diff(x, axis=-1)), axis=-1)

def skewness(x):
    return stats.skew(x, axis=-1)

def kurtosis(x):
    return stats.kurtosis(x, axis=-1)

def f_minplusmax(x):
    return np.max(x, axis=-1) + np.min(x, axis=-1)

def f_maxminusmin(x):
    return np.max(x, axis=-1) - np.min(x, axis=-1)

def f_spec_entropy(x):
    return ant.spectral_entropy(x, fs, method="welch", normalize=True, axis=-1)

def f_integral(x):
    return integrate.simps(x, axis=-1)

def f_petrosian(x):
    return ant.petrosian_fd(x, axis=-1)

def f_katz(x):
    return ant.katz_fd(x, axis=-1)

def concatenate_features(x):
    # Uncomment the desired line to add the feature
    return np.concatenate((
        mean(x),
        std(x),
        ptp(x),
        var(x),
        minim(x),
        maxim(x),
        argminim(x),
        argmaxim(x),
        rms(x),
        abs_diff_signal(x),
        skewness(x),
        kurtosis(x),
        # f_minplusmax(x),
        # f_maxminusmin(x),
        # f_spec_entropy(x),
        # f_integral(x),
        # f_katz(x),
        # f_petrosian(x),
    ), axis=-1)

In [7]:
features=[]
for d in data_array:
    features.append(concatenate_features(d))
features_array=np.array(features)

In [8]:
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier

random_state = 42
splits = [0.10, 0.20, 0.30]

In [14]:
def run_cross_validation(classifier, x_tr, y_tr):
    k_fold = model_selection.StratifiedKFold(n_splits=5)
    results = model_selection.cross_val_score(classifier, x_tr, y_tr, cv=k_fold, scoring='accuracy')
    return results.mean()

In [10]:
from sklearn.svm import SVC
from sklearn.feature_selection import SelectFromModel
from sklearn import metrics, model_selection
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

X = features_array
y = label_array
#X = MinMaxScaler().fit_transform(X)
X = StandardScaler().fit_transform(X)

# Select one fs here
fs = LinearSVC(C=0.01, penalty="l2", dual=False).fit(X, y)

model = SelectFromModel(fs, prefit=True)
X = model.transform(X)

print("New shape: ", X.shape)

New shape:  (2236, 681)


In [11]:
from sklearn.model_selection import StratifiedGroupKFold

inner_cv = StratifiedGroupKFold(n_splits=5)
outer_cv = StratifiedGroupKFold(n_splits=5)

In [12]:
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV

# Run Nested cross-validation

classifiers = [
    ["Linear SVC", LinearSVC(), {'C': [0.00001, 0.0001, 0.0005], 'dual': (True, False)}],
    ["SVC", SVC(), [{"kernel": ["rbf"], "gamma": [1e-3, 1e-4], "C": [1, 10, 100, 1000]}, {"kernel": ["linear"], "C": [1, 10, 100, 1000]}, ]],
]

for test_size in splits:
    print("\nSplit: Train:{}% Test:{}%".format(100 - (test_size * 100), test_size * 100))

    # Stratify guarantees that the same proportion of the classes will be available in train and test
    x_train, x_test, y_train, y_test, g_train, g_test = train_test_split(X, y, group_array, test_size=test_size, stratify=y)

    for cls in classifiers:
        print('{}: {} '.format("Classifier", cls[0]))
        clf = GridSearchCV(estimator=cls[1], param_grid=cls[2], cv=inner_cv, n_jobs=-1)
        clf.fit(x_train, y_train, groups=g_train)
        print(f"The best parameters found are: {clf.best_params_}")
        print(f"The mean CV score of the best model is: {clf.best_score_:.3f}")

        print("Grid scores on development set:\n")
        means = clf.cv_results_["mean_test_score"]
        stds = clf.cv_results_["std_test_score"]
        for mean, std, params in zip(means, stds, clf.cv_results_["params"]):
            print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))

        print("\nDetailed classification report:\n")
        print("The model is trained on the full development set.")
        print("The scores are computed on the full evaluation set.\n")
        y_true, y_pred = y_test, clf.predict(x_test)
        print(classification_report(y_true, y_pred))
        print()


Split: Train:90.0% Test:10.0%
Classifier: Linear SVC 
The best parameters found are: {'C': 0.0005, 'dual': True}
The mean CV score of the best model is: 0.296
Grid scores on development set:

0.271 (+/-0.024) for {'C': 1e-05, 'dual': True}
0.271 (+/-0.024) for {'C': 1e-05, 'dual': False}
0.278 (+/-0.043) for {'C': 0.0001, 'dual': True}
0.278 (+/-0.043) for {'C': 0.0001, 'dual': False}
0.296 (+/-0.051) for {'C': 0.0005, 'dual': True}
0.296 (+/-0.051) for {'C': 0.0005, 'dual': False}

Detailed classification report:

The model is trained on the full development set.
The scores are computed on the full evaluation set.

              precision    recall  f1-score   support

           0       0.30      0.27      0.28        56
           1       0.31      0.32      0.31        56
           2       0.35      0.38      0.36        56
           3       0.31      0.30      0.31        56

    accuracy                           0.32       224
   macro avg       0.32      0.32      0.32      

In [15]:
# Run the regular tests
classifiers = [
    ["Linear SVC", LinearSVC(random_state=random_state, max_iter=10000, C=0.0005)],
    ["SVC", SVC(random_state=random_state, max_iter=10000, C=10, kernel='linear')],
]

cv = StratifiedGroupKFold(n_splits=3)

for train_index, test_index in cv.split(X, y, group_array):
    print('{:<40} {:<20} {:<15}'.format("Classifier", "Accuracy", "Cross validation"))

    x_train, x_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    for cls in classifiers:
        cls[1].fit(x_train, y_train)
        y_pred = cls[1].predict(x_test)
        accuracy = metrics.accuracy_score(y_test, y_pred)
        cross_v = run_cross_validation(cls[1], x_train, y_train)
        print('{:<40} {:<20} {:<15}'.format(cls[0], accuracy, cross_v))

Classifier                               Accuracy             Cross validation
Linear SVC                               0.28295454545454546  0.29573746472758844
SVC                                      0.2943181818181818   0.28095289776427174
Classifier                               Accuracy             Cross validation
Linear SVC                               0.3045977011494253   0.29285714285714287
SVC                                      0.27155172413793105  0.2857142857142857
Classifier                               Accuracy             Cross validation
Linear SVC                               0.3106060606060606   0.2988587502511553
SVC                                      0.3151515151515151   0.3007615029134017
