In [1]:
import numpy as np
import os
import pyedflib
import json
import sklearn
import matplotlib
import mne
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import Normalizer
from sklearn import neighbors
from sklearn import svm
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_validate
from sklearn.metrics import accuracy_score, classification_report

In [3]:
data = np.zeros((1,8,100))
marks = []

dir_str = os.path.dirname(os.path.abspath("__file__"))+("\data")
data_dir = Path(dir_str)
print(data_dir)

for file in data_dir.iterdir():
    if file.suffix == ".edf":
        #exclude spoiled files
        if "spoiled" in file.__str__():
            continue

        #exclude noisy files
        #if "noisy" in file.__str__():
        #    continue
        
        loaded_data = mne.io.read_raw_edf(file.__str__())
        loaded_data.load_data()
        
        events = mne.events_from_annotations(loaded_data)
        arr_events = events[0]
        dict = events[1]
        event_names = []
        print(dict)

        #forming event names array
        if "right answer" in dict:
            event_names.append(dict["right answer"])
        if "right_answer" in dict:
            event_names.append(dict["right_answer"])
        if "fake wrong answer" in dict:
            event_names.append(dict["fake wrong answer"])
        if "wrong answer, shown" in dict:
            event_names.append(dict["wrong answer, shown"])
        if "wrong answer" in dict:
            event_names.append(dict["wrong answer"])
        if "wrong_answer" in dict:
            event_names.append(dict["wrong_answer"])
        print(event_names)

        #add filters
        filtered_data = loaded_data.copy().filter(l_freq=None, h_freq=40., fir_design='firwin', method='iir', iir_params=None)
        filtered_data = filtered_data.copy().filter(l_freq=2.,h_freq=None, fir_design='firwin', method='iir', iir_params=None)
        raw_data = filtered_data.get_data()

        #forming answers event array
        num = 0
        for i in range(arr_events.shape[0]):
            if arr_events[i,2] in event_names:
                num = i
                if (arr_events[i,0] == arr_events[i+1,0]):
                    break

        #separate asnwers (100 dots)
        ans_data = np.zeros((1,8,100))
        for i in range(num):
            if arr_events[i,2] in event_names:
                for j in range(10):
                    ans = raw_data[:,(arr_events[i,0]+j):(arr_events[i,0]+100+j)]
                    ans = ans.reshape(1,ans.shape[0],ans.shape[1])
                    ans_data = np.concatenate((ans_data, ans), axis=0)
                    if arr_events[i,2] == event_names[0]:
                        marks.append(1)
                    else:
                        marks.append(0)
        ans_data = ans_data[1:,:,:]
        data = np.concatenate((data, ans_data), axis=0)
        
data = data[1:,:,:]
data.shape

C:\Users\Ilya\Documents\GitHub\EEG_feedback\EEG_preprocessing\data
Extracting EDF parameters from C:\Users\Ilya\Documents\GitHub\EEG_feedback\EEG_preprocessing\data\eeg-03-05-23_16-35 (9, noisy).edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 205624  =      0.000 ...  1644.992 secs...
Used Annotations descriptions: ['end task', 'right_answer', 'start task', 'wrong_answer']
{'end task': 1, 'right_answer': 2, 'start task': 3, 'wrong_answer': 4}
[2, 4]
Filtering raw data in 1 contiguous segment
Setting up low-pass filter at 40 Hz

IIR filter parameters
---------------------
Butterworth lowpass zero-phase (two-pass forward and reverse) non-causal filter:
- Filter order 8 (effective, after forward-backward)
- Cutoff at 40.00 Hz: -6.02 dB

Filtering raw data in 1 contiguous segment
Setting up high-pass filter at 2 Hz

IIR filter parameters
---------------------
Butterworth highpass zero-phase (two-pass forward and reverse) non-causal fi

(500, 8, 100)

In [107]:
#fft experiments
fft_data = np.fft.fft(data.copy())
fft_data.shape

(50, 8, 51)

In [24]:
#forming data array
X = data.copy()
X.shape

(500, 8, 100)

In [25]:
#forming true answers array
Y = np.array(marks)
Y.shape

(500,)

In [26]:
ones = 0
for mark in marks:
    if mark == 1:
        ones = ones +1
ones

230

In [27]:
#split data
X = X.reshape(X.shape[0], X.shape[1]*X.shape[2])
scaler = Normalizer().fit(X)
X = scaler.transform(X)
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.25, random_state=0)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

(375, 800) (125, 800) (375,) (125,)


In [19]:
#normalize data
scaler = Normalizer().fit(x_train)
x_train = scaler.transform(x_train)
scaler = Normalizer().fit(x_test)
x_test = scaler.transform(x_test)
x_test

array([[-0.00833475, -0.01926104,  0.00072463, ..., -0.00981693,
        -0.02571589,  0.00150965],
       [-0.015917  ,  0.00060424,  0.00297742, ...,  0.0474895 ,
         0.04700257, -0.02442544],
       [-0.07483494,  0.00092324,  0.04789551, ...,  0.03770665,
        -0.03139716, -0.06624978],
       ...,
       [ 0.02138294, -0.02924562, -0.04905707, ...,  0.05212735,
         0.0576762 ,  0.03348965],
       [ 0.0093262 , -0.00714642, -0.01256898, ..., -0.01932318,
        -0.00502299, -0.02061617],
       [-0.04023482, -0.00025853, -0.01572747, ...,  0.01197182,
         0.0266259 , -0.0339813 ]])

In [28]:
#RandomForest model
model = RandomForestClassifier(max_depth=None, random_state=0)
model.fit(x_train.copy(), y_train.copy())
print(model.score(x_test.copy(), y_test.copy()))
cv_results = cross_validate(model, X, Y, cv = 4)
print(cv_results['test_score'])
pre_Y = model.predict(x_test.copy())
print("true: ", y_test.copy())
print("pred: ", pre_Y)
print("\n", classification_report(y_test.copy(), pre_Y))

0.856
[0.464 0.304 0.576 0.48 ]
true:  [1 0 1 0 0 0 1 0 1 1 1 0 0 1 0 0 0 1 1 1 1 0 1 0 1 0 1 0 1 0 1 0 1 1 0 0 1
 0 1 0 0 1 0 1 1 0 1 0 0 0 0 0 1 0 1 0 0 0 1 1 1 1 0 1 0 1 0 0 1 1 0 0 1 0
 0 0 1 0 0 0 0 0 1 1 0 0 1 1 1 1 1 0 0 1 0 0 1 0 0 1 0 0 1 1 0 0 0 0 0 1 1
 0 1 0 0 0 0 1 1 0 0 1 1 0 0]
pred:  [1 0 0 0 0 0 1 0 0 0 1 0 0 1 0 0 0 1 0 1 1 0 1 0 0 0 1 0 1 0 0 0 1 1 0 0 1
 0 1 0 0 0 0 1 1 0 1 0 0 0 0 0 1 0 0 0 0 0 1 1 1 1 0 0 0 1 0 0 1 1 0 1 1 0
 0 0 1 0 0 0 0 1 0 1 0 0 1 1 0 1 1 0 0 1 0 0 1 0 1 1 0 0 1 0 0 0 0 1 0 0 1
 0 1 0 0 0 0 1 1 0 0 0 1 0 0]

               precision    recall  f1-score   support

           0       0.82      0.94      0.88        70
           1       0.91      0.75      0.82        55

    accuracy                           0.86       125
   macro avg       0.87      0.84      0.85       125
weighted avg       0.86      0.86      0.85       125



In [29]:
#SVC model
model = svm.SVC(kernel='linear', C=1)
model.fit(x_train.copy(), y_train.copy())
print(model.score(x_test.copy(), y_test.copy()))
cv_results = cross_validate(model, X, Y, cv = 4)
print(cv_results['test_score'])
pre_Y = model.predict(x_test.copy())
print("true: ", y_test.copy())
print("pred: ", pre_Y)
print("\n", classification_report(y_test.copy(), pre_Y))

0.72
[0.368 0.376 0.464 0.44 ]
true:  [1 0 1 0 0 0 1 0 1 1 1 0 0 1 0 0 0 1 1 1 1 0 1 0 1 0 1 0 1 0 1 0 1 1 0 0 1
 0 1 0 0 1 0 1 1 0 1 0 0 0 0 0 1 0 1 0 0 0 1 1 1 1 0 1 0 1 0 0 1 1 0 0 1 0
 0 0 1 0 0 0 0 0 1 1 0 0 1 1 1 1 1 0 0 1 0 0 1 0 0 1 0 0 1 1 0 0 0 0 0 1 1
 0 1 0 0 0 0 1 1 0 0 1 1 0 0]
pred:  [1 0 1 0 1 0 1 1 1 0 0 0 0 1 1 1 0 1 0 1 0 0 0 0 1 0 1 0 1 0 1 0 0 1 0 0 1
 0 1 1 0 1 0 1 0 0 1 1 0 0 0 1 1 0 0 0 1 0 1 1 0 0 0 1 0 1 0 0 0 0 0 1 1 0
 0 0 1 0 0 0 0 1 0 0 0 1 1 1 1 1 1 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 1 0
 0 0 0 0 0 0 1 1 1 0 1 0 0 0]

               precision    recall  f1-score   support

           0       0.73      0.80      0.76        70
           1       0.71      0.62      0.66        55

    accuracy                           0.72       125
   macro avg       0.72      0.71      0.71       125
weighted avg       0.72      0.72      0.72       125



In [30]:
#Gauss model
model = GaussianNB()
model.fit(x_train.copy(), y_train.copy())
print(model.score(x_test.copy(), y_test.copy()))
cv_results = cross_validate(model, X, Y, cv = 4)
print(cv_results['test_score'])
pre_Y = model.predict(x_test.copy())
print("true: ", y_test.copy())
print("pred: ", pre_Y)
print("\n", classification_report(y_test.copy(), pre_Y))

0.664
[0.608 0.224 0.704 0.576]
true:  [1 0 1 0 0 0 1 0 1 1 1 0 0 1 0 0 0 1 1 1 1 0 1 0 1 0 1 0 1 0 1 0 1 1 0 0 1
 0 1 0 0 1 0 1 1 0 1 0 0 0 0 0 1 0 1 0 0 0 1 1 1 1 0 1 0 1 0 0 1 1 0 0 1 0
 0 0 1 0 0 0 0 0 1 1 0 0 1 1 1 1 1 0 0 1 0 0 1 0 0 1 0 0 1 1 0 0 0 0 0 1 1
 0 1 0 0 0 0 1 1 0 0 1 1 0 0]
pred:  [0 1 1 0 0 1 1 0 0 0 1 1 0 1 0 1 0 0 0 1 1 0 1 0 1 0 1 0 1 1 0 0 0 1 0 0 1
 0 0 1 0 1 0 1 1 0 1 0 0 0 0 0 0 0 0 1 1 0 1 0 1 1 0 0 0 1 1 0 1 1 0 1 0 0
 0 0 1 0 0 1 0 1 0 1 1 1 0 0 0 1 1 0 0 1 0 0 1 0 1 1 0 1 1 0 1 0 0 1 1 0 1
 1 0 1 0 0 0 1 1 0 0 1 0 0 0]

               precision    recall  f1-score   support

           0       0.70      0.70      0.70        70
           1       0.62      0.62      0.62        55

    accuracy                           0.66       125
   macro avg       0.66      0.66      0.66       125
weighted avg       0.66      0.66      0.66       125



In [31]:
#KNN model
model = neighbors.KNeighborsClassifier(n_neighbors = 5)
model.fit(x_train.copy(), y_train.copy())
print(model.score(x_test.copy(), y_test.copy()))
cv_results = cross_validate(model, X, Y, cv = 4)
print(cv_results['test_score'])
pre_Y = model.predict(x_test.copy())
print("true: ", y_test.copy())
print("pred: ", pre_Y)
print("\n", classification_report(y_test.copy(), pre_Y))

0.864
[0.384 0.408 0.584 0.536]
true:  [1 0 1 0 0 0 1 0 1 1 1 0 0 1 0 0 0 1 1 1 1 0 1 0 1 0 1 0 1 0 1 0 1 1 0 0 1
 0 1 0 0 1 0 1 1 0 1 0 0 0 0 0 1 0 1 0 0 0 1 1 1 1 0 1 0 1 0 0 1 1 0 0 1 0
 0 0 1 0 0 0 0 0 1 1 0 0 1 1 1 1 1 0 0 1 0 0 1 0 0 1 0 0 1 1 0 0 0 0 0 1 1
 0 1 0 0 0 0 1 1 0 0 1 1 0 0]
pred:  [1 0 1 0 0 0 1 0 1 1 0 0 0 1 0 1 0 1 0 1 1 0 0 0 1 0 1 0 1 0 1 0 0 1 0 0 1
 0 1 0 0 1 0 1 1 0 1 0 0 0 0 1 1 0 1 0 1 0 1 0 1 1 0 1 0 1 1 0 1 1 1 0 0 0
 0 0 1 0 0 0 0 0 0 1 1 0 1 1 1 1 1 0 0 1 0 0 1 0 1 1 0 1 1 0 0 0 0 0 0 1 1
 0 1 0 0 0 0 1 1 1 0 1 1 0 0]

               precision    recall  f1-score   support

           0       0.88      0.87      0.88        70
           1       0.84      0.85      0.85        55

    accuracy                           0.86       125
   macro avg       0.86      0.86      0.86       125
weighted avg       0.86      0.86      0.86       125

