In [None]:
#Inherited from https://www.kaggle.com/j105sahil/eeg-brainwave-dataset-mental-state

from mpl_toolkits.mplot3d import Axes3D
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt # plotting
import numpy as np # linear algebra
import os # accessing directory structure
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
import shutil

import xgboost as xgb

import warnings
warnings.filterwarnings('ignore')

# import os
# os.listdir('../input')

In [None]:
pd.set_option('display.float_format', lambda x: '%.3f' % x)

In [None]:
# #Script to convert from mean centered to 0 - 1600 uv
# data = res[1:]
# data = 0.48828125 * (np.array(data))
# 1680 * ((data) - data.min()) / (data.max() - data.min())

In [None]:
try: 
    shutil.rmtree("dataset/transformed_data_main/")
    print("deleted dataset/transformed_data_main/")
except FileNotFoundError:
    print("folder dataset/transformed_data_main/ does not exist")
    
print("creating dataset/transformed_data_main/")
os.mkdir("dataset/transformed_data_main/")

In [None]:
#Inherited from https://github.com/jordan-bird/eeg-feature-generation

# data transformation from 0 - 1600 to mean centering around 0
# https://github.com/alexandrebarachant/muse-lsl/issues/11


for x in os.listdir("dataset/original_data_main"):

    # Ignore non-CSV files
    if not x.lower().endswith('.csv'):
        continue
    print("processing ", x)
    df = pd.read_csv("dataset/original_data_main/"+x)
    for electrode in ["TP9", "TP10", "AF7", "AF8", "Right AUX"]:    
        data = df[electrode]
        data = 0.48828125 * (np.array(data))
        data = 1680 * ((data) - data.min()) / (data.max() - data.min())
        df[electrode] = data
    df.to_csv("dataset/transformed_data_main/"+x, index=False)

In [None]:
from EEG_generate_training_matrix import gen_training_matrix

data_directory_path = "dataset/transformed_data_main/"
preprocessed_data_file_name = "out_main.csv"

print("generating training matrix...")
gen_training_matrix(data_directory_path, preprocessed_data_file_name, cols_to_ignore = -1)

In [None]:
nRowsRead = None # specify 'None' if want to read whole file
# mental-state.csv has 2360 rows in reality, but we are only loading/previewing the first 1000 rows
df = pd.read_csv(preprocessed_data_file_name, delimiter=',', nrows = nRowsRead)
df.dataframeName = preprocessed_data_file_name
nRow, nCol = df.shape
print(f'There are {nRow} rows and {nCol} columns')

In [None]:
df = df.applymap(complex)
df = df.astype(float)


In [None]:
from sklearn import ensemble

model = ensemble.RandomForestClassifier(n_estimators=100,max_depth=20)

In [None]:
msk = np.random.rand(len(df)) < 0.7

train = df[msk]
test = df[~msk]

y_train = train["Label"]
y_test = test["Label"]

X_train = train.drop("Label", axis=1)
X_test = test.drop("Label", axis=1)

print("training model...")
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)

In [None]:
from sklearn import metrics

count_misclassified = (y_test != y_pred).sum()
print('Misclassified samples: {}'.format(count_misclassified))
accuracy = metrics.accuracy_score(y_test, y_pred)
print('Accuracy: {:.2f}'.format(accuracy))

In [None]:
from sklearn.metrics import confusion_matrix
y_true = y_test
matrix = confusion_matrix(y_true, y_pred)
matrix.diagonal()/matrix.sum(axis=1)

In [None]:
classwise_accuracy = matrix.diagonal()/matrix.sum(axis=1)

In [None]:
activities = ['relaxedeyesopen', 'lostinmigration']

for index, activity in enumerate(activities): 
    print(activity, classwise_accuracy[index])

In [None]:
from joblib import dump, load
dump(model, 'model.joblib') 
print("model training done.")

In [None]:
model = load('model.joblib') 

In [None]:
y_pred = model.predict(X_test)

### Testing on muse monitor converted data

In [None]:
nRowsRead = None # specify 'None' if want to read whole file
# mental-state.csv has 2360 rows in reality, but we are only loading/previewing the first 1000 rows
preprocessed_data_file_name = "out_testing.csv"
df = pd.read_csv(preprocessed_data_file_name, delimiter=',', nrows = nRowsRead)
df.dataframeName = preprocessed_data_file_name
nRow, nCol = df.shape
print(f'There are {nRow} rows and {nCol} columns')
df

In [None]:
X_muse_test = df.drop("Label", axis=1)

In [None]:
y_muse_pred = model.predict_proba(X_muse_test)

In [None]:
#debbie league part 1
np.sum(y_muse_pred, axis=0)

In [None]:
#eyes closed
np.sum(y_muse_pred, axis=0)

In [None]:
#eyes open 250hz
np.sum(y_muse_pred, axis=0)

In [None]:
#eyes open relaxed
np.sum(y_muse_pred, axis=0)

In [None]:
#league part 2
np.sum(y_muse_pred, axis=0)

In [None]:
(unique, counts) = np.unique(y_muse_pred, return_counts=True)
frequencies = np.asarray((unique, counts)).T

In [None]:
#eyes closed 
frequencies

In [None]:
#eyes open relaxed
frequencies

In [None]:
#league part 1
frequencies

In [None]:
#league part 2
frequencies

## CNN and statistical features

### Paper: Classification of EEG Signals Based on Image Representation of Statistical Features
#### Link: https://link.springer.com/chapter/10.1007/978-3-030-29933-0_37

In [None]:
importances = model.feature_importances_

In [None]:
ranked = np.argsort(importances)[:729]

In [None]:
feature_imp_df = df[df.columns[:-1][ranked]] 
feature_imp_df["Label"] = df["Label"]

In [None]:
from sklearn import ensemble

model = ensemble.RandomForestClassifier(n_estimators=15,max_depth=4)

In [None]:
df = feature_imp_df

msk = np.random.rand(len(df)) < 0.7

train = df[msk]
test = df[~msk]

y_train = train["Label"]
y_test = test["Label"]

X_train = train.drop("Label", axis=1)
X_test = test.drop("Label", axis=1)

model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)

In [None]:
from sklearn import metrics

count_misclassified = (y_test != y_pred).sum()
print('Misclassified samples: {}'.format(count_misclassified))
accuracy = metrics.accuracy_score(y_test, y_pred)
print('Accuracy: {:.2f}'.format(accuracy))

In [None]:
X_test.shape

In [None]:
X_train_np = X_train.to_numpy()
y_train_np = y_train.to_numpy()

X_test_np = X_test.to_numpy()
y_test_np = y_test.to_numpy()

In [None]:
X_train_np = X_train_np.reshape(X_train_np.shape[0], 27, 27, 1)
X_test_np = X_test_np.reshape(X_test_np.shape[0], 27, 27, 1)

In [None]:
X_train_np.shape

In [None]:
import tensorflow as tf

from tensorflow.keras import datasets, layers, models
from tensorflow.keras.layers import Conv2D, Dense, MaxPooling2D, Dropout, Flatten
import matplotlib.pyplot as plt

In [None]:
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(27, 27, 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(3))

In [None]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

history = model.fit(X_train_np, y_train_np, epochs=20, 
                    validation_data=(X_test_np, y_test_np))

In [None]:
test_loss, test_acc = model.evaluate(X_test_np,  y_test_np, verbose=2)

In [None]:
model_preds = np.argmax(model.predict(X_test_np), axis=1)

In [None]:
correct_preds = np.sum(model_preds == y_test_np)
correct_preds