In [1]:
from support import init_repo

# start the repository
init_repo()

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com


In [2]:
from support import get_subjects_data
# settings
fs = 256 # Sampling rate
condition = "INNER" # PRONOUNCED, INNER or VISUALIZED
random_state = 46

# Select the useful par of each trial. Time in seconds
t_start = 1.5 # start (in seconds)
t_end = 3.5 # end (in seconds)

data_array, label_array, group_array = get_subjects_data(condition=condition, t_start = t_start, t_end = t_end, fs = fs)
data_array.shape, label_array.shape, group_array.shape

((2236, 128, 512), (2236,), (2236,))

In [3]:
from features import f_mean, f_std, f_ptp, f_var, f_minim, f_maxim, f_argminim, f_argmaxim, f_rms, f_abs_diff_signal, \
    f_skewness, f_kurtosis, generate_features

func_list = [f_mean, f_std, f_ptp, f_var, f_minim, f_maxim, f_argminim, f_argmaxim, f_rms, f_abs_diff_signal, f_skewness, f_kurtosis]

features_array = generate_features(data_array, func_list)
features_array.shape

(2236, 1536)

In [4]:
def split_train_test(data, labels, groups, size):
    # Stratify guarantees that the same proportion of the classes will be available in train and test
    x_tr, x_ts, y_tr, y_ts, g_tr, g_ts = train_test_split(data, labels, groups, test_size=size, stratify=y, random_state=random_state)
    # Apply the scaler in the training data
    ss = StandardScaler()
    x_tr = ss.fit_transform(x_tr)
    x_ts = ss.transform(x_ts)
    return x_tr, x_ts, y_tr, y_ts, g_tr, g_ts

In [None]:
# Run nested cross-validation and re-run using the best parameters
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
from sklearn.model_selection import GridSearchCV, train_test_split, StratifiedGroupKFold
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC, SVC
from support import run_cross_validation, get_feature_selection_model, print_report_nested_cross_validation, print_report_classifier

X = features_array
y = label_array

feature_sm = get_feature_selection_model(X, y)

# Apply the Feature Selection Model without scaling the data
X = feature_sm.transform(X)
n_features_before = np.shape(features_array)
print("Feature transformation - number of features: Before {} - After {}".format(np.shape(features_array)[1], np.shape(X)[1]))

splits = [0.10, 0.20, 0.30]

# Run Nested cross-validation
inner_cv = StratifiedGroupKFold(n_splits=5)
outer_cv = StratifiedGroupKFold(n_splits=5)

classifiers = [
    ["Random Forest", RandomForestClassifier(), {'n_estimators': [200, 500, 1000, 2000], 'max_features': ['auto', 'sqrt', 'log2'], 'max_depth' : [4,5,6,7,8], 'criterion' :['gini', 'entropy']}],
    ["Neural Network", MLPClassifier(), {'solver': ['lbfgs'], 'max_iter': [200, 1000,1100,1200,1300,1400,1500,1600,1700,1800,1900,2000 ], 'alpha': 10.0 ** -np.arange(1, 10), 'hidden_layer_sizes': np.arange(10, 15)}],
    ["Linear SVC", LinearSVC(), {'C': [0.00001, 0.0001, 0.0005, 1, 10, 100, 1000], 'dual': (True, False)}],
    ["SVC", SVC(), [{"kernel": ["rbf"], "gamma": [1e-3, 1e-4], "C": [0.00001, 0.0001, 0.0005, 1, 10, 100, 1000]},
                    {"kernel": ["linear"], "C": [0.00001, 0.0001, 0.0005, 1, 10, 100, 1000]}, ]
     ]
]

for cls in classifiers:
    best_params = []
    best_scores = []

    for test_size in splits:
        x_train, x_test, y_train, y_test, g_train, g_test = split_train_test(X, y, group_array, test_size)
        clf = GridSearchCV(estimator=cls[1], param_grid=cls[2], cv=inner_cv, n_jobs=-1)
        clf.fit(x_train, y_train, groups=g_train)

        best_params.append(clf.best_params_)
        best_scores.append(clf.best_score_)

    # Get the best parameter
    best_param = best_params[np.argmax(best_scores)]

    acc_list = []
    cross_v_list = []
    # Run the same classifier using the best parameters
    for test_size in splits:
        x_train, x_test, y_train, y_test, g_train, g_test = split_train_test(X, y, group_array, test_size)
        best_param['random_state'] = random_state
        cls[1].set_params(**best_param)
        cls[1].fit(x_train, y_train)
        y_pred = cls[1].predict(x_test)
        acc_list.append(metrics.accuracy_score(y_test, y_pred))
        cross_v_list.append(run_cross_validation(cls[1], outer_cv, x_train, y_train, g_train))

    print('\n{}: {} '.format("Classifier", cls[0]))
    print_report_nested_cross_validation(splits, best_params, best_scores)
    print_report_classifier(splits, acc_list, cross_v_list)
    print(f_std(best_scores))

Feature transformation - number of features: Before 1536 - After 681


In [None]:
from sklearn.model_selection import LeavePGroupsOut, cross_val_score

# Run by subject
X = features_array
y = label_array

cv = StratifiedGroupKFold(n_splits=5)

feature_sm = get_feature_selection_model(X, y)

# Apply the Feature Selection Model without scaling the data
X = feature_sm.transform(X)
n_features_before = np.shape(features_array)
print("Feature transformation - number of features: Before {} - After {}".format(np.shape(features_array)[1], np.shape(X)[1]))

classifiers = [
    ["Random Forest", RandomForestClassifier(random_state=random_state, max_features='log2', n_estimators= 200, max_depth=8, criterion='entropy')],
    ["Neural Network", MLPClassifier(random_state=random_state, alpha=1e-09, hidden_layer_sizes=10, max_iter=1800, solver='lbfgs')],
    ["Linear SVC", LinearSVC(random_state=random_state, max_iter=10000, C=0.0005)],
    ["SVC", SVC(random_state=random_state, max_iter=10000, C=10, kernel='linear')],
]

for cls in classifiers:
    acc_list = []
    cross_v_list = []
    subject_list = []
    # Run the same classifier using the best parameters

    leave_pgo = LeavePGroupsOut(n_groups=9)

    for train_index, test_index in leave_pgo.split(X, y, group_array):
        subject_list.append(group_array[train_index[0]])
        x_train, x_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        ss = StandardScaler()
        x_train = ss.fit_transform(x_train)
        x_test = ss.transform(x_test)

        cls[1].fit(x_train, y_train)
        y_pred = cls[1].predict(x_test)
        acc_list.append(metrics.accuracy_score(y_test, y_pred))
        cross_v_list.append(run_cross_validation(cls[1], cv, x_test, y_test, group_array[test_index]))

    print('\n{}: {} '.format("Classifier", cls[0]))
    for index, sub in enumerate(subject_list):
        print("Subject: {} - Accuracy {} - Cross validation {}".format(sub, acc_list[index], cross_v_list[index]))

In [None]:
from sklearn.model_selection import LeaveOneGroupOut

# Leave One Out (LOO)
X = features_array
y = label_array

cv = StratifiedGroupKFold(n_splits=5)

feature_sm = get_feature_selection_model(X, y)

# Apply the Feature Selection Model without scaling the data
X = feature_sm.transform(X)
n_features_before = np.shape(features_array)
print("Feature transformation - number of features: Before {} - After {}".format(np.shape(features_array)[1], np.shape(X)[1]))

classifiers = [
    ["Random Forest", RandomForestClassifier(random_state=random_state, max_features='log2', n_estimators= 200, max_depth=8, criterion='entropy')],
    ["Neural Network", MLPClassifier(random_state=random_state, alpha=1e-09, hidden_layer_sizes=10, max_iter=1800, solver='lbfgs')],
    ["Linear SVC", LinearSVC(random_state=random_state, max_iter=10000, C=10)],
    ["SVC", SVC(random_state=random_state, max_iter=10000, C=10, kernel='linear')],
]

for cls in classifiers:
    acc_list = []
    cross_v_list = []
    group_list = []
    # Run the same classifier using the best parameters

    leave_oo = LeaveOneGroupOut()

    for train_index, test_index in leave_oo.split(X, y, group_array):
        group_list.append(group_array[test_index[0]])
        x_train, x_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        ss = StandardScaler()
        x_train = ss.fit_transform(x_train)
        x_test = ss.transform(x_test)

        cls[1].fit(x_train, y_train)
        y_pred = cls[1].predict(x_test)
        acc_list.append(metrics.accuracy_score(y_test, y_pred))
        cross_v_list.append(run_cross_validation(cls[1], cv, x_train, y_train, group_array[train_index]))

    print('\n{}: {} '.format("Classifier", cls[0]))
    for index, gp in enumerate(group_list):
        print("Group out: {} - Accuracy {} - Cross validation {}".format(gp, acc_list[index], cross_v_list[index]))