Classification of tasks using EEG data <br>
Using TensorFlow <br>
https://www.kaggle.com/wpncrh/classifying-tasks-using-eeg-data-w-tensorflow-nn

In [1]:
import json
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler

In [2]:
df = pd.read_csv('eeg-data.csv')

In [3]:
df['eeg_power'] = df.eeg_power.map(json.loads)

In [4]:
df.head(2)

Unnamed: 0.1,Unnamed: 0,id,indra_time,browser_latency,reading_time,attention_esense,meditation_esense,eeg_power,raw_values,signal_quality,createdAt,updatedAt,label
0,3730,12,2015-05-09 23:13:42.281,1461,2015-05-09 16:13:40.954,0,0,"[944412.0, 111373.0, 52404.0, 28390.0, 3237.0,...","[-203.0, -202.0, -196.0, -185.0, -163.0, -137....",200,2015-05-09 23:13:39.550,2015-05-09 23:13:39.549+00,unlabeled
1,3732,12,2015-05-09 23:13:43.186,1461,2015-05-09 16:13:41.964,0,0,"[1793049.0, 89551.0, 3896.0, 21727.0, 9301.0, ...","[104.0, 134.0, 128.0, 121.0, 145.0, 151.0, 123...",200,2015-05-09 23:13:40.559,2015-05-09 23:13:40.559+00,unlabeled


In [5]:
df.describe()

Unnamed: 0.1,Unnamed: 0,id,attention_esense,meditation_esense,signal_quality
count,30013.0,30013.0,30013.0,30013.0,30013.0
mean,15010.952521,13.68557,40.28471,46.638723,25.871856
std,8666.911131,8.479506,24.446432,26.153089,64.63664
min,0.0,1.0,0.0,0.0,0.0
25%,7506.0,6.0,24.0,34.0,0.0
50%,15010.0,13.0,43.0,50.0,0.0
75%,22517.0,20.0,57.0,64.0,0.0
max,30021.0,30.0,100.0,100.0,200.0


In [6]:
df = df.drop('Unnamed: 0', 1)
df = df.drop('indra_time', 1)
df = df.drop('browser_latency', 1)
df = df.drop('reading_time', 1)
df = df.drop('attention_esense', 1)
df = df.drop('meditation_esense', 1)
df = df.drop('raw_values', 1)
df = df.drop('signal_quality', 1)
df = df.drop('createdAt', 1)
df = df.drop('updatedAt', 1)

In [7]:
df.head()

Unnamed: 0,id,eeg_power,label
0,12,"[944412.0, 111373.0, 52404.0, 28390.0, 3237.0,...",unlabeled
1,12,"[1793049.0, 89551.0, 3896.0, 21727.0, 9301.0, ...",unlabeled
2,12,"[400192.0, 640624.0, 153087.0, 69733.0, 98854....",unlabeled
3,12,"[681192.0, 138630.0, 67891.0, 26459.0, 592240....",unlabeled
4,12,"[268406.0, 197772.0, 190654.0, 266433.0, 91683...",unlabeled


In [8]:
len(df)

30013

In [9]:
# seperate eeg power into multiple columns
to_series = pd.Series(df['eeg_power'])
eeg_features = pd.DataFrame(to_series.tolist())
df_eeg = pd.concat([df, eeg_features], axis=1)

In [10]:
df_eeg = df_eeg.drop('eeg_power', 1)

In [11]:
df_eeg.head()

Unnamed: 0,id,label,0,1,2,3,4,5,6,7
0,12,unlabeled,944412.0,111373.0,52404.0,28390.0,3237.0,32728.0,4845.0,2036.0
1,12,unlabeled,1793049.0,89551.0,3896.0,21727.0,9301.0,16096.0,3496.0,643.0
2,12,unlabeled,400192.0,640624.0,153087.0,69733.0,98854.0,199537.0,66993.0,51772.0
3,12,unlabeled,681192.0,138630.0,67891.0,26459.0,592240.0,171435.0,164399.0,41765.0
4,12,unlabeled,268406.0,197772.0,190654.0,266433.0,91683.0,200452.0,107585.0,57841.0


In [12]:
df_eeg['label'].value_counts()

unlabeled                       20054
relax                             934
music                             932
video-ver2                        481
thinkOfItems-ver2                 481
colorInstruction2                 469
video-ver1                        457
thinkOfItems-ver1                 449
thinkOfItemsInstruction-ver2      321
colorInstruction1                 316
thinkOfItemsInstruction-ver1      303
mathInstruction                   161
videoInstruction                  161
relaxInstruction                  158
musicInstruction                  152
blinkInstruction                  132
colorRound1-3                     128
colorRound1-4                     127
colorRound1-2                     125
colorRound1-1                     125
colorRound1-5                     124
colorRound1-6                     122
readyRound5                        96
colorRound5-3                      96
readyRound2                        95
readyRound4                        95
colorRound2-

In [13]:
df_eeg.dtypes

id         int64
label     object
0        float64
1        float64
2        float64
3        float64
4        float64
5        float64
6        float64
7        float64
dtype: object

In [14]:
df_eeg_ch = df_eeg.drop('id', 1)

In [15]:
# clean labels
def clean_labels(dd):
    dd.loc[dd.label == 'math1', 'label'] = "math"
    dd.loc[dd.label == 'math2', 'label'] = "math"
    dd.loc[dd.label == 'math3', 'label'] = "math"
    dd.loc[dd.label == 'math4', 'label'] = "math"
    dd.loc[dd.label == 'math5', 'label'] = "math"
    dd.loc[dd.label == 'math6', 'label'] = "math"
    dd.loc[dd.label == 'math7', 'label'] = "math"
    dd.loc[dd.label == 'math8', 'label'] = "math"
    dd.loc[dd.label == 'math9', 'label'] = "math"
    dd.loc[dd.label == 'math10', 'label'] = "math"
    dd.loc[dd.label == 'math11', 'label'] = "math"
    dd.loc[dd.label == 'math12', 'label'] = "math"
    dd.loc[dd.label == 'colorRound1-1', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound1-2', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound1-3', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound1-4', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound1-5', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound1-6', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound2-1', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound2-2', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound2-3', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound2-4', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound2-5', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound2-6', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound3-1', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound3-2', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound3-3', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound3-4', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound3-5', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound3-6', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound4-1', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound4-2', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound4-3', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound4-4', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound4-5', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound4-6', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound5-1', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound5-2', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound5-3', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound5-4', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound5-5', 'label'] = "colors"
    dd.loc[dd.label == 'colorRound5-6', 'label'] = "colors"
    dd.loc[dd.label == 'readyRound1', 'label'] = "ready"
    dd.loc[dd.label == 'readyRound2', 'label'] = "ready"
    dd.loc[dd.label == 'readyRound3', 'label'] = "ready"
    dd.loc[dd.label == 'readyRound4', 'label'] = "ready"
    dd.loc[dd.label == 'readyRound5', 'label'] = "ready"
    dd.loc[dd.label == 'video-ver1', 'label'] = "video"
    dd.loc[dd.label == 'video-ver2', 'label'] = "video"
    dd.loc[dd.label == 'blink1', 'label'] = "blink"
    dd.loc[dd.label == 'blink2', 'label'] = "blink"
    dd.loc[dd.label == 'blink3', 'label'] = "blink"
    dd.loc[dd.label == 'blink4', 'label'] = "blink"
    dd.loc[dd.label == 'blink5', 'label'] = "blink"
    dd.loc[dd.label == 'thinkOfItemsInstruction-ver1', 'label'] = "instruction"
    dd.loc[dd.label == 'thinkOfItemsInstruction-ver2', 'label'] = "instruction"
    dd.loc[dd.label == 'thinkOfItems-ver1', 'label'] = "instruction"
    dd.loc[dd.label == 'thinkOfItems-ver2', 'label'] = "instruction"
    dd.loc[dd.label == 'colorInstruction2', 'label'] = "instruction"
    dd.loc[dd.label == 'colorInstruction1', 'label'] = "instruction"
    dd.loc[dd.label == 'colorInstruction2', 'label'] = "instruction"
    dd.loc[dd.label == 'musicInstruction', 'label'] = "instruction"
    dd.loc[dd.label == 'videoInstruction', 'label'] = "instruction"
    dd.loc[dd.label == 'mathInstruction', 'label'] = "instruction"
    dd.loc[dd.label == 'relaxInstruction', 'label'] = "instruction"
    dd.loc[dd.label == 'blinkInstruction', 'label'] = "instruction"
    return dd

In [16]:
clean_labels_df = clean_labels(df_eeg_ch)

In [17]:
df_eeg_ch['label'].value_counts()

unlabeled      20054
instruction     3103
colors          2405
video            938
math             936
relax            934
music            932
ready            472
blink            239
Name: label, dtype: int64

In [18]:
# remove unlabeled
df_eeg_chlabel = df_eeg_ch[df_eeg_ch['label'] != 'unlabeled']

In [19]:
# pop off labels to new group
label = df_eeg_chlabel.pop('label')
label.head()

13017    instruction
13018    instruction
13019    instruction
13020    instruction
13021    instruction
Name: label, dtype: object

In [20]:
df_eeg_chlabel.head()

Unnamed: 0,0,1,2,3,4,5,6,7
13017,56887.0,45471.0,20074.0,5359.0,22594.0,7867.0,9437.0,3238.0
13018,11626.0,60301.0,5805.0,15729.0,4448.0,33539.0,8864.0,8610.0
13019,15777.0,33461.0,21385.0,44193.0,11741.0,15867.0,4420.0,1050.0
13020,311822.0,44739.0,19000.0,19100.0,2650.0,17622.0,10762.0,6983.0
13021,687393.0,10289.0,2942.0,9874.0,1059.0,2978.0,2065.0,589.0


In [21]:
eeg_chlabel_scarray = StandardScaler().fit_transform(df_eeg_chlabel)
df_eeg_chlabel_sc = pd.DataFrame(eeg_chlabel_scarray)
df_eeg_chlabel_sc.head()

Unnamed: 0,0,1,2,3,4,5,6,7
0,-0.464218,-0.191529,-0.114032,-0.519684,0.563813,-0.29685,0.192247,-0.309331
1,-0.566218,-0.06714,-0.48414,-0.191681,-0.488977,1.55837,0.138723,0.404538
2,-0.556863,-0.292264,-0.080027,0.708634,-0.065854,0.28128,-0.276395,-0.600088
3,0.110304,-0.197668,-0.141889,-0.085057,-0.593293,0.408108,0.316016,0.188331
4,0.956691,-0.486622,-0.5584,-0.376875,-0.685599,-0.65016,-0.496377,-0.661349


In [36]:
eeg_values = df_eeg_chlabel_sc.values

In [37]:
eeg_values

array([[-0.4642179 , -0.19152865, -0.11403162, ..., -0.2968499 ,
         0.19224708, -0.30933141],
       [-0.56621814, -0.06714033, -0.48413957, ...,  1.55836978,
         0.13872269,  0.40453836],
       [-0.55686345, -0.29226389, -0.08002703, ...,  0.28128026,
        -0.27639493, -0.6000885 ],
       ...,
       [ 0.7734666 ,  1.10049102, -0.03297567, ...,  1.88262853,
         0.63678911,  0.25610341],
       [-0.56767172, -0.41135962, -0.15602509, ..., -0.28731076,
        -0.30329724,  0.20095525],
       [-0.32355939, -0.2704561 , -0.05427069, ..., -0.20760106,
        -0.00886638,  0.66486429]])

In [38]:
# convert labels to onehots
train_labels = pd.get_dummies(label)

In [39]:
train_labels.shape

(9959, 8)

In [40]:
# convert train_labels to np array
train_labels_values = train_labels.values

In [41]:
x_train, x_test, y_train, y_test = train_test_split(eeg_values,
                                                    train_labels_values,
                                                    test_size=0.2)

In [42]:
x_train, x_test, y_train, y_test = np.array(x_train, dtype='float32'), np.array(x_test, dtype='float32'), np.array(y_train, dtype='float32'), np.array(y_test, dtype='float32')

In [43]:
# build a simple tensorflow model with 1 hidden layer with 1000 nodes in this layer
x = tf.placeholder(tf.float32, [None, x_train.shape[1]])
w1 = tf.Variable(tf.random_normal([x_train.shape[1], 1000], stddev=.5, name='w1'))
b1 = tf.Variable(tf.zeros([1000]))
# calculate hidden output
hidden_output = tf.nn.softmax(tf.matmul(x, w1) + b1)
# bring from 1000 nodes to one of 8 possible labels
w2 = tf.Variable(tf.random_normal([1000, y_train.shape[1]], stddev=.5, name='w2'))
b2 = tf.Variable(tf.zeros([y_train.shape[1]]))
# placeholder for correct values
y_ = tf.placeholder('float', [None, y_train.shape[1]])
# implement model, find predicted y
y = tf.nn.softmax(tf.matmul(hidden_output, w2) + b2)

In [44]:
# use ADAM optimizer to adjust learning rate over time
loss = tf.reduce_mean(tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=y_, name='xentropy')))
opt = tf.train.AdamOptimizer(learning_rate=.0005)
train_step = opt.minimize(loss, var_list=[w1, b1, w2, b2])

In [45]:
# get mini_batch, so that we aren't feeding data in every training epoch
def get_mini_batch(x,y):
    rows = np.random.choice(x.shape[0], 100)
    return x[rows], y[rows]

In [46]:
sess = tf.Session()
# init all vars in graph
## init = tf.initialize_all_variables()
init = tf.global_variables_initializer()
sess.run(init)

In [47]:
ntrials = 10000
for i in range(ntrials):
    # get mini batch
    a,b = get_mini_batch(x_train, y_train)
    # run train step, feeding arrays of 100 rows at a time
    _, cost = sess.run([train_step, loss], feed_dict={x: a, y_: b})
    if i%500 == 0:
        print('epoch is {0} and cost is {1}'.format(i, cost))
        
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

epoch is 0 and cost is 207.87782287597656
epoch is 500 and cost is 202.75961303710938
epoch is 1000 and cost is 200.3954315185547
epoch is 1500 and cost is 202.80287170410156
epoch is 2000 and cost is 195.25672912597656
epoch is 2500 and cost is 193.1629180908203
epoch is 3000 and cost is 188.86376953125
epoch is 3500 and cost is 195.75332641601562
epoch is 4000 and cost is 193.5681610107422
epoch is 4500 and cost is 196.07716369628906
epoch is 5000 and cost is 184.83566284179688
epoch is 5500 and cost is 198.68963623046875
epoch is 6000 and cost is 187.4642791748047
epoch is 6500 and cost is 188.76620483398438
epoch is 7000 and cost is 191.9585418701172
epoch is 7500 and cost is 193.36245727539062
epoch is 8000 and cost is 193.98129272460938
epoch is 8500 and cost is 188.0435028076172
epoch is 9000 and cost is 198.3718719482422
epoch is 9500 and cost is 189.0980224609375


In [48]:
print("test accuracy is {}".format(sess.run(accuracy, feed_dict={x: x_test, y_: y_test})))

test accuracy is 0.3237951695919037


In [49]:
sess.close

<bound method BaseSession.close of <tensorflow.python.client.session.Session object at 0x126cfbc50>>