Classification of tasks using EEG data <br>
Using TensorFlow <br>
https://www.kaggle.com/wpncrh/classifying-tasks-using-eeg-data-w-tensorflow-nn

In [1]:
import json
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier

from sklearn.preprocessing import StandardScaler

In [2]:
df = pd.read_csv('eeg-data.csv')

In [3]:
df['eeg_power'] = df.eeg_power.map(json.loads)

In [4]:
df.head(2)

Unnamed: 0.1,Unnamed: 0,id,indra_time,browser_latency,reading_time,attention_esense,meditation_esense,eeg_power,raw_values,signal_quality,createdAt,updatedAt,label
0,3730,12,2015-05-09 23:13:42.281,1461,2015-05-09 16:13:40.954,0,0,"[944412.0, 111373.0, 52404.0, 28390.0, 3237.0,...","[-203.0, -202.0, -196.0, -185.0, -163.0, -137....",200,2015-05-09 23:13:39.550,2015-05-09 23:13:39.549+00,unlabeled
1,3732,12,2015-05-09 23:13:43.186,1461,2015-05-09 16:13:41.964,0,0,"[1793049.0, 89551.0, 3896.0, 21727.0, 9301.0, ...","[104.0, 134.0, 128.0, 121.0, 145.0, 151.0, 123...",200,2015-05-09 23:13:40.559,2015-05-09 23:13:40.559+00,unlabeled


In [5]:
df.describe()

Unnamed: 0.1,Unnamed: 0,id,attention_esense,meditation_esense,signal_quality
count,30013.0,30013.0,30013.0,30013.0,30013.0
mean,15010.952521,13.68557,40.28471,46.638723,25.871856
std,8666.911131,8.479506,24.446432,26.153089,64.63664
min,0.0,1.0,0.0,0.0,0.0
25%,7506.0,6.0,24.0,34.0,0.0
50%,15010.0,13.0,43.0,50.0,0.0
75%,22517.0,20.0,57.0,64.0,0.0
max,30021.0,30.0,100.0,100.0,200.0


In [6]:
df = df.drop('Unnamed: 0', 1)
df = df.drop('indra_time', 1)
df = df.drop('browser_latency', 1)
df = df.drop('reading_time', 1)
df = df.drop('attention_esense', 1)
df = df.drop('meditation_esense', 1)
df = df.drop('raw_values', 1)
df = df.drop('signal_quality', 1)
df = df.drop('createdAt', 1)
df = df.drop('updatedAt', 1)

In [7]:
df.head()

Unnamed: 0,id,eeg_power,label
0,12,"[944412.0, 111373.0, 52404.0, 28390.0, 3237.0,...",unlabeled
1,12,"[1793049.0, 89551.0, 3896.0, 21727.0, 9301.0, ...",unlabeled
2,12,"[400192.0, 640624.0, 153087.0, 69733.0, 98854....",unlabeled
3,12,"[681192.0, 138630.0, 67891.0, 26459.0, 592240....",unlabeled
4,12,"[268406.0, 197772.0, 190654.0, 266433.0, 91683...",unlabeled


In [8]:
len(df)

30013

In [9]:
# seperate eeg power into multiple columns
to_series = pd.Series(df['eeg_power'])
eeg_features = pd.DataFrame(to_series.tolist())
df_eeg = pd.concat([df, eeg_features], axis=1)

In [10]:
df_eeg = df_eeg.drop('eeg_power', 1)

In [11]:
df_eeg.head()

Unnamed: 0,id,label,0,1,2,3,4,5,6,7
0,12,unlabeled,944412.0,111373.0,52404.0,28390.0,3237.0,32728.0,4845.0,2036.0
1,12,unlabeled,1793049.0,89551.0,3896.0,21727.0,9301.0,16096.0,3496.0,643.0
2,12,unlabeled,400192.0,640624.0,153087.0,69733.0,98854.0,199537.0,66993.0,51772.0
3,12,unlabeled,681192.0,138630.0,67891.0,26459.0,592240.0,171435.0,164399.0,41765.0
4,12,unlabeled,268406.0,197772.0,190654.0,266433.0,91683.0,200452.0,107585.0,57841.0


In [12]:
df_eeg['label'].value_counts()

unlabeled                       20054
relax                             934
music                             932
video-ver2                        481
thinkOfItems-ver2                 481
colorInstruction2                 469
video-ver1                        457
thinkOfItems-ver1                 449
thinkOfItemsInstruction-ver2      321
colorInstruction1                 316
thinkOfItemsInstruction-ver1      303
videoInstruction                  161
mathInstruction                   161
relaxInstruction                  158
musicInstruction                  152
blinkInstruction                  132
colorRound1-3                     128
colorRound1-4                     127
colorRound1-2                     125
colorRound1-1                     125
colorRound1-5                     124
colorRound1-6                     122
readyRound5                        96
colorRound5-3                      96
readyRound2                        95
readyRound4                        95
colorRound2-

In [13]:
df_eeg.dtypes

id         int64
label     object
0        float64
1        float64
2        float64
3        float64
4        float64
5        float64
6        float64
7        float64
dtype: object

In [14]:
# df_eeg['label'] = df_eeg["label"].astype('category')
# df_eeg.dtypes

In [15]:
# df_eeg['label'].value_counts()

In [16]:
# clean labels
def clean_labels(dd):
    dd.loc[dd.label == 'math1', 'label'] = "math"
    dd.loc[dd.label == 'math2', 'label'] = "math"
    dd.loc[dd.label == 'math3', 'label'] = "math"
    dd.loc[dd.label == 'math4', 'label'] = "math"
    dd.loc[dd.label == 'math5', 'label'] = "math"
    dd.loc[dd.label == 'math6', 'label'] = "math"
    dd.loc[dd.label == 'math7', 'label'] = "math"
    dd.loc[dd.label == 'math8', 'label'] = "math"
    dd.loc[dd.label == 'math9', 'label'] = "math"
    dd.loc[dd.label == 'math10', 'label'] = "math"
    dd.loc[dd.label == 'math11', 'label'] = "math"
    dd.loc[dd.label == 'math12', 'label'] = "math"
    dd.loc[dd.label == 'colorRound1-1', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound1-2', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound1-3', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound1-4', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound1-5', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound1-6', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound2-1', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound2-2', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound2-3', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound2-4', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound2-5', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound2-6', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound3-1', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound3-2', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound3-3', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound3-4', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound3-5', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound3-6', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound4-1', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound4-2', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound4-3', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound4-4', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound4-5', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound4-6', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound5-1', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound5-2', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound5-3', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound5-4', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound5-5', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound5-6', 'label'] = "colors"
    dd.loc[dd.label == 'readyRound1', 'label'] = "ready"
    dd.loc[dd.label == 'readyRound2', 'label'] = "ready"
    dd.loc[dd.label == 'readyRound3', 'label'] = "ready"
    dd.loc[dd.label == 'readyRound4', 'label'] = "ready"
    dd.loc[dd.label == 'readyRound5', 'label'] = "ready"
    dd.loc[dd.label == 'video-ver1', 'label'] = "video"
    dd.loc[dd.label == 'video-ver2', 'label'] = "video"
    dd.loc[dd.label == 'blink1', 'label'] = "blink"
    dd.loc[dd.label == 'blink2', 'label'] = "blink"
    dd.loc[dd.label == 'blink3', 'label'] = "blink"
    dd.loc[dd.label == 'blink4', 'label'] = "blink"
    dd.loc[dd.label == 'blink5', 'label'] = "blink"
    dd.loc[dd.label == 'thinkOfItemsInstruction-ver1', 'label'] = "instruction"
    dd.loc[dd.label == 'thinkOfItemsInstruction-ver2', 'label'] = "instruction"
    dd.loc[dd.label == 'thinkOfItems-ver1', 'label'] = "instruction"
    dd.loc[dd.label == 'thinkOfItems-ver2', 'label'] = "instruction"
    dd.loc[dd.label == 'colorInstruction2', 'label'] = "instruction"
    dd.loc[dd.label == 'colorInstruction1', 'label'] = "instruction"
    dd.loc[dd.label == 'colorInstruction2', 'label'] = "instruction"
    dd.loc[dd.label == 'musicInstruction', 'label'] = "instruction"
    dd.loc[dd.label == 'videoInstruction', 'label'] = "instruction"
    dd.loc[dd.label == 'mathInstruction', 'label'] = "instruction"
    dd.loc[dd.label == 'relaxInstruction', 'label'] = "instruction"
    dd.loc[dd.label == 'blinkInstruction', 'label'] = "instruction"
    return dd

In [17]:
clean_labels_df = clean_labels(df_eeg)

In [18]:
clean_labels_df.label.value_counts()

unlabeled      20054
instruction     3103
colors          2405
video            938
math             936
relax            934
music            932
ready            472
blink            239
Name: label, dtype: int64

In [19]:
# drop unlabeled and other unwanted labels. 
def drop_useless_labels(df):
    df = df[df.label != 'unlabeled']
    df = df[df.label != 'instruction']
    df = df[df.label != 'blink']
    df = df[df.label != 'ready']
    df = df[df.label != 'colors']
    df = df[df.label != 'music']
    df = df[df.label != 'video']
    return df

In [20]:
df_interest = drop_useless_labels(clean_labels_df)

In [21]:
df_interest['label'].value_counts()

math     936
relax    934
Name: label, dtype: int64

In [22]:
df_interest.head()

Unnamed: 0,id,label,0,1,2,3,4,5,6,7
13274,7,relax,5044.0,10156.0,3281.0,10403.0,12393.0,10266.0,1949.0,2937.0
13275,11,relax,548188.0,67192.0,20298.0,4142.0,30576.0,18237.0,5603.0,4783.0
13276,5,relax,449571.0,83093.0,15379.0,34656.0,6750.0,10348.0,5315.0,2585.0
13277,1,relax,85497.0,20547.0,2723.0,3270.0,2522.0,2209.0,449.0,393.0
13278,13,relax,72768.0,44080.0,25974.0,16079.0,12995.0,27132.0,33264.0,9121.0


In [23]:
df_interest.shape

(1870, 10)

In [24]:
df_interest.rename(columns = {0: '1'})

Unnamed: 0,id,label,1,1.1,2,3,4,5,6,7
13274,7,relax,5044.0,10156.0,3281.0,10403.0,12393.0,10266.0,1949.0,2937.0
13275,11,relax,548188.0,67192.0,20298.0,4142.0,30576.0,18237.0,5603.0,4783.0
13276,5,relax,449571.0,83093.0,15379.0,34656.0,6750.0,10348.0,5315.0,2585.0
13277,1,relax,85497.0,20547.0,2723.0,3270.0,2522.0,2209.0,449.0,393.0
13278,13,relax,72768.0,44080.0,25974.0,16079.0,12995.0,27132.0,33264.0,9121.0
13279,14,relax,10171.0,13086.0,13814.0,9290.0,9794.0,8282.0,9724.0,6925.0
13280,4,relax,486066.0,154967.0,11921.0,16636.0,13902.0,9087.0,10355.0,4436.0
13281,6,relax,769537.0,132633.0,8882.0,104962.0,31441.0,11220.0,11482.0,5555.0
13282,2,relax,67919.0,13799.0,27658.0,18156.0,19295.0,6873.0,6570.0,4252.0
13283,12,relax,1413027.0,38537.0,45687.0,17712.0,55097.0,31452.0,13794.0,9813.0


In [25]:
'''df_interest.rename(columns = {0: '1',
                              1: '2',
                              2: '3',
                              3: '4',
                              4: '5',
                              5: '6',
                              6: '7',
                              7: '8'
                             })
df_interest.head()'''

"df_interest.rename(columns = {0: '1',\n                              1: '2',\n                              2: '3',\n                              3: '4',\n                              4: '5',\n                              5: '6',\n                              6: '7',\n                              7: '8'\n                             })\ndf_interest.head()"

In [26]:
# binarize label: math = 1, relax = 0
df_interest['binary_label'] = (df_interest['label'] == 'math')*1
df_interest.head(3)

Unnamed: 0,id,label,0,1,2,3,4,5,6,7,binary_label
13274,7,relax,5044.0,10156.0,3281.0,10403.0,12393.0,10266.0,1949.0,2937.0,0
13275,11,relax,548188.0,67192.0,20298.0,4142.0,30576.0,18237.0,5603.0,4783.0,0
13276,5,relax,449571.0,83093.0,15379.0,34656.0,6750.0,10348.0,5315.0,2585.0,0


In [27]:
# reset index
df_interest = df_interest.reset_index()
df_interest = df_interest.drop('index', 1)

In [28]:
df_interest.head(3)

Unnamed: 0,id,label,0,1,2,3,4,5,6,7,binary_label
0,7,relax,5044.0,10156.0,3281.0,10403.0,12393.0,10266.0,1949.0,2937.0,0
1,11,relax,548188.0,67192.0,20298.0,4142.0,30576.0,18237.0,5603.0,4783.0,0
2,5,relax,449571.0,83093.0,15379.0,34656.0,6750.0,10348.0,5315.0,2585.0,0


In [29]:
# store target label in y
y = df_interest[['binary_label']].copy()

In [30]:
y.head()

Unnamed: 0,binary_label
0,0
1,0
2,0
3,0
4,0


In [31]:
eeg_signal_df = df_interest.drop('id', 1)
eeg_signal_df = eeg_signal_df.drop('label', 1)
eeg_signal_df = eeg_signal_df.drop('binary_label', 1)

In [32]:
eeg_signal_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7
0,5044.0,10156.0,3281.0,10403.0,12393.0,10266.0,1949.0,2937.0
1,548188.0,67192.0,20298.0,4142.0,30576.0,18237.0,5603.0,4783.0
2,449571.0,83093.0,15379.0,34656.0,6750.0,10348.0,5315.0,2585.0
3,85497.0,20547.0,2723.0,3270.0,2522.0,2209.0,449.0,393.0
4,72768.0,44080.0,25974.0,16079.0,12995.0,27132.0,33264.0,9121.0


In [33]:
x_array = StandardScaler().fit_transform(eeg_signal_df)

In [34]:
x = pd.DataFrame(x_array)
x.head()

Unnamed: 0,0,1,2,3,4,5,6,7
0,-0.550709,-0.477853,-0.552418,-0.379412,0.006291,-0.076726,-0.443636,-0.288593
1,0.661348,0.051942,-0.154057,-0.573634,1.16513,0.577499,-0.101136,-0.003616
2,0.441279,0.199643,-0.269209,0.372939,-0.353349,-0.069995,-0.128131,-0.342933
3,-0.371174,-0.381333,-0.565481,-0.600685,-0.622808,-0.738009,-0.584235,-0.681324
4,-0.399579,-0.16274,-0.021184,-0.203337,0.044658,1.307562,2.491613,0.666064


In [35]:
# split eeg_signal_df and y into train and test data
x_train, x_test, y_train, y_test = train_test_split(x, y, 
                                                    test_size=0.33,
                                                    random_state=42)

In [36]:
y_train.describe()

Unnamed: 0,binary_label
count,1252.0
mean,0.497604
std,0.500194
min,0.0
25%,0.0
50%,0.0
75%,1.0
max,1.0


In [37]:
# define classifier
classifier = DecisionTreeClassifier(max_leaf_nodes=50, random_state=42)

In [38]:
# fit classifier
classifier.fit(x_train, y_train)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=50,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=42,
            splitter='best')

In [39]:
# make predictions to test data
preds = classifier.predict(x_test)

In [40]:
# measure accuracy
accuracy_score(y_true=y_test, y_pred=preds)

0.5550161812297735