#### Import Packages

In [None]:
## Import packages needed for this script 
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
from itertools import combinations
from scipy import stats 

#### Load Data

In [None]:
# load arrays
data_array = np.load('Data/data_array.npy') 
label_array= np.load('Data/label_array.npy')
group_array= np.load('Data/group_array.npy')
epochs_times=np.load('Data/epochs_times.npy')
print(data_array.shape, label_array.shape, group_array.shape)

#### Feature Selection

In [None]:
## Feature selection of mean channels

In [None]:
channels = []
for x in data_array:
    channels.append(np.mean(x, axis=-1))
channels_array=np.array(channels) #X
channels_array.shape

In [None]:
## train set of Means 
train = pd.DataFrame(channels_array, columns=['AF1', 'AF2', 'AF7', 'AF8', 'AFZ', 'C1', 'C2', 'C3', 'C4',
       'C5', 'C6', 'CP1', 'CP2', 'CP3', 'CP4', 'CP5', 'CP6', 'CPZ', 'CZ', 'F1',
       'F2', 'F3', 'F4', 'F5', 'F6', 'F7', 'F8', 'FC1', 'FC2', 'FC3', 'FC4',
       'FC5', 'FC6', 'FCZ', 'FP1', 'FP2', 'FPZ', 'FT7', 'FT8', 'FZ', 'O1',
       'O2', 'OZ', 'P1', 'P2', 'P3', 'P4', 'P5', 'P6', 'P7', 'P8', 'PO1',
       'PO2', 'PO7', 'PO8', 'POZ', 'PZ', 'T7', 'T8', 'TP7', 'TP8', 'X', 'Y',
       'nd', 'stimulus'])
train['label'] = label_array
train.shape


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier

In [None]:
def feature_selection(X,y, model):
    x = X.values
    model.fit(x, y)
    scores = pd.DataFrame({'Feature': X.columns, 
                          'Importance Score': model.feature_importances_})
    return scores.sort_values('Importance Score', ascending=False)

In [None]:
X = train.iloc[:,:-2]
y = train['label'].values

In [None]:
# Feature selecttion Extra Trees Classifier 
feature_selection(X,y, ExtraTreesClassifier())

In [None]:
# Feature selection Random Forest Classifier
feature_selection(X,y, RandomForestClassifier())

In [None]:
## Visualization
list_of_columns = ['P1', 'CP2', 'FC2', 'FZ','CP2']

# selection of 2 columns at a time
combo_of_columns = list(combinations(list_of_columns,2))

repeat_labels = np.array(np.unique(label_array).tolist()*len(combo_of_columns)).reshape((len(combo_of_columns),2))

fig, axes = plt.subplots(len(combo_of_columns))
fig.set_figheight(50)

for idx, (features, cl) in enumerate(zip(combo_of_columns, repeat_labels)):
    for cls in cl:
        axes[idx].scatter(x=train.loc[train['label'] == cls,
        features[0]], y=train.loc[train['label']==cls, features[1]])
        axes[idx].set_title('Plot of {0} against {1}'.format(features[1], features[0]))
plt.show()

#### Setting environment for Classifiers

In [None]:
def mean(x):
    return np.mean(x, axis=-1)
def std(x):
    return np.std(x, axis=-1)
def ptp(x):
    return np.ptp(x, axis=-1)
def var(x):
    return np.var(x, axis=-1)
def minim(x):
    return np.min(x, axis=-1)
def maxim(x):
    return np.max(x, axis=-1)
def argminim(x):
    return np.argmin(x, axis=-1)
def argmaxim(x):
    return np.argmax(x, axis=-1)
def rms(x):
    return np.sqrt(np.mean(x**2, axis=-1))
def abs_diff_signal(x):
    return np.sum(np.abs(np.diff(x, axis=-1)), axis=-1)
def skewness(x):
    return stats.skew(x, axis=-1)
def kurtosis(x):
    return stats.kurtosis(x, axis=-1)

def concatenate_features(x):
    return np.concatenate((mean(x), std(x), ptp(x), var(x), minim(x), maxim(x),
                          argminim(x), argmaxim(x), rms(x), abs_diff_signal(x),
                          skewness(x), kurtosis(x)), axis=-1)

In [None]:
features = []
for d in data_array:
    features.append(concatenate_features(d))

In [None]:
features_array=np.array(features) #X
features_array.shape

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GroupKFold, GridSearchCV
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from mne.decoding import SlidingEstimator, cross_val_multiscore

In [None]:
scoring_method_list = ['accuracy', 'precision', 'recall', 'f1', 'roc_auc']

#Creates pipeline for each classifier and returns a dictionary with metrics used for analysis
def model_metric (classifier, X, y):
    scoring_dictionary = {}
    
    clf = make_pipeline(StandardScaler(), classifier)
    
    for scoring_method in scoring_method_list:
        time_decod = SlidingEstimator(clf, n_jobs=1, scoring=scoring_method, verbose=True)
        scores = cross_val_multiscore(time_decod, X, y,  cv=3, n_jobs=1) #groups=group_array,
        scoring_dictionary.update ( {scoring_method:np.mean(scores, axis=0) })
        
    return scoring_dictionary


#Unpacks a dictionary connecting each classifier to the dictionary of theirs metrics into a one-layer dictionary for dataframe use

def unpack_results(classifier_results_dictionary):
    unpacked_classifier_results = {}

    for classifier_name in classifier_results_dictionary:
        for scoring_method in classifier_results_dictionary[classifier_name]:
            unpacked_classifier_results.update({'{} {}'.format(classifier_name, scoring_method):classifier_results_dictionary[classifier_name][scoring_method]})

    return unpacked_classifier_results

In [None]:
# PAssed all data through classifier - raw test
features_array.shape

In [None]:
classifier_list = [LogisticRegression(solver='liblinear'), RandomForestClassifier(), SVC(kernel = 'linear', C = 1.0) ]
classifier_name_list = ['Logistic Regression', 'Random Forest', 'SVC']
classifier_zip_list = zip (classifier_list,classifier_name_list)

classifier_results_dictionary = {}

for classifier_info in classifier_zip_list:
    classifier, classifier_name = classifier_info
    
    #try:
    classifier_results_dictionary.update ({classifier_name:model_metric (classifier, data_array, label_array)})
    print ('Classifier {} is complete!'.format(classifier_name))
    #except:
    print ('Classifier {} has failed!!!'.format(classifier_name))

In [None]:
unpacked_classifier_results = unpack_results (classifier_results_dictionary)

In [None]:
# Passed data through - select channels

In [None]:
channel_list = ['AF1', 'AF2', 'AF7', 'AF8', 'AFZ', 'C1', 'C2', 'C3', 'C4',
       'C5', 'C6', 'CP1', 'CP2', 'CP3', 'CP4', 'CP5', 'CP6', 'CPZ', 'CZ', 'F1',
       'F2', 'F3', 'F4', 'F5', 'F6', 'F7', 'F8', 'FC1', 'FC2', 'FC3', 'FC4',
       'FC5', 'FC6', 'FCZ', 'FP1', 'FP2', 'FPZ', 'FT7', 'FT8', 'FZ', 'O1',
       'O2', 'OZ', 'P1', 'P2', 'P3', 'P4', 'P5', 'P6', 'P7', 'P8', 'PO1',
       'PO2', 'PO7', 'PO8', 'POZ', 'PZ', 'T7', 'T8', 'TP7', 'TP8', 'X', 'Y',
       'nd', 'stimulus']
list_of_columns = ['P1', 'CP2', 'FC2', 'FZ','POZ']
col_loc = [index for index, col_name in enumerate (channel_list) if col_name in list_of_columns]

X = data_array[:,col_loc]


In [None]:
classifier_list = [LogisticRegression(solver='liblinear'), RandomForestClassifier(), SVC(kernel = 'linear', C = 1.0) ]
classifier_name_list = ['Logistic Regression', 'Random Forest', 'SVC']
classifier_zip_list = zip (classifier_list,classifier_name_list)

classifier_results_dictionary = {}

for classifier_info in classifier_zip_list:
    classifier, classifier_name = classifier_info
    
    try:
        classifier_results_dictionary.update({classifier_name:model_metric (classifier, X, label_array)})
        print ('Classifier {} is complete!'.format(classifier_name))
    except:
        print ('Classifier {} has failed!!!'.format(classifier_name))

In [None]:
#Unpacking data for 5 channels

unpacked_classifier_results = unpack_results (classifier_results_dictionary)
all_channels_metrics = pd.DataFrame.from_dict (unpacked_classifier_results)



#### Metrics Results

In [None]:
#Unpacking data for 5 channels
all_channels_metrics.describe()

In [None]:
#all_channels_metrics = pd.DataFrame.from_dict (unpacked_classifier_results)
all_channels_metrics.describe()

#### Hyperparameter optimization

In [None]:
### Random forest using all channels
clf = RandomForestClassifier()
gkf = GroupKFold(5)
pipe=Pipeline([('scaler', StandardScaler()), ('clf', clf)])
param_grid={'clf__max_features':['sqrt','log2', None],
        'clf__min_samples_leaf':[1, 2, 3],
        'clf__max_depth':[None]}

gscv=GridSearchCV(pipe, param_grid, cv=gkf, n_jobs=12)
gscv.fit(features_array, label_array, groups=group_array)

In [None]:
gscv.best_estimator_

In [None]:
all_channels_after_hyper = pd.DataFrame.from_dict(classifier_results_dictionary)