In [0]:
from __future__ import absolute_import, division, print_function, unicode_literals
import functools

import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
tf.enable_eager_execution()

In [2]:
from google.colab import drive

drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [3]:
cd /content/gdrive/My Drive/eeg

/content/gdrive/My Drive/eeg


In [4]:
LABEL_COLUMN = 'group'
LABELS = [0, 1]

CSV_COLUMNS = ['condition','group', 'F1_P300', 'FC3_P300', 'FC1_P300', 'AFz_P300', 'Fz_P300', 'F2_P300', 'FCz_P300']

def get_dataset(file_path, **kwargs):
  dataset = tf.data.experimental.make_csv_dataset(
      file_path,
      batch_size=5, # Artificially small to make examples easier to show.
      label_name=LABEL_COLUMN,
      na_value="?",
      num_epochs=1,
      ignore_errors=True, 
      **kwargs)
  return dataset

temp_dataset = get_dataset('P300.csv', select_columns=CSV_COLUMNS)

Instructions for updating:
Use `tf.data.Dataset.interleave(map_func, cycle_length, block_length, num_parallel_calls=tf.data.experimental.AUTOTUNE)` instead. If sloppy execution is desired, use `tf.data.Options.experimental_determinstic`.


In [0]:
def pack(features, label):
  return tf.stack(list(features.values()), axis=-1), label

In [0]:
class PackNumericFeatures(object):
  def __init__(self, names):
    self.names = names

  def __call__(self, features, labels):
    numeric_features = [features.pop(name) for name in self.names]
    numeric_features = [tf.cast(feat, tf.float32) for feat in numeric_features]
    numeric_features = tf.stack(numeric_features, axis=-1)
    features['numeric'] = numeric_features

    return features, labels

In [0]:
NUMERIC_FEATURES = ['F1_P300', 'FC3_P300', 'FC1_P300', 'AFz_P300', 'Fz_P300', 'F2_P300', 'FCz_P300']

packed_data = temp_dataset.map(
    PackNumericFeatures(NUMERIC_FEATURES))

In [8]:
import pandas as pd
desc = pd.read_csv('P300.csv')[NUMERIC_FEATURES].describe()
desc

Unnamed: 0,F1_P300,FC3_P300,FC1_P300,AFz_P300,Fz_P300,F2_P300,FCz_P300
count,23201.0,23201.0,23201.0,23201.0,23201.0,23201.0,23201.0
mean,19.722599,18.564421,19.582112,20.168327,19.900291,19.858939,20.04432
std,18.002531,15.52908,15.46232,23.876658,18.482256,18.600639,15.257639
min,-119.649,-87.2075,-77.728,-142.7501,-100.2636,-104.256,-78.3948
25%,9.9157,9.8355,10.5514,9.2242,9.9598,9.9951,10.8261
50%,17.04,16.424,17.6471,16.134,17.2932,17.0181,18.1997
75%,25.7391,24.3726,25.9477,24.8952,26.0631,25.6702,26.7563
max,298.6268,314.0851,392.577,418.2767,302.8597,278.6592,236.622


In [0]:
MEAN = np.array(desc.T['mean'])
STD = np.array(desc.T['std'])

In [0]:
def normalize_numeric_data(data, mean, std):
  # Center the data
  return (data-mean)/std


In [11]:
normalizer = functools.partial(normalize_numeric_data, mean=MEAN, std=STD)

numeric_column = tf.feature_column.numeric_column('numeric', normalizer_fn=normalizer, shape=[len(NUMERIC_FEATURES)])
numeric_columns = [numeric_column]
numeric_column

NumericColumn(key='numeric', shape=(7,), default_value=None, dtype=tf.float32, normalizer_fn=functools.partial(<function normalize_numeric_data at 0x7f4d94ec8b70>, mean=array([19.72259939, 18.56442063, 19.58211167, 20.16832704, 19.90029093,
       19.85893852, 20.04432009]), std=array([18.00253096, 15.52908046, 15.4623197 , 23.87665776, 18.48225643,
       18.60063883, 15.25763941])))

In [12]:
example_batch, labels_batch = next(iter(packed_data)) 
example_batch['numeric']

<tf.Tensor: id=96, shape=(5, 7), dtype=float32, numpy=
array([[24.0825, 18.3535, 30.2719, 47.655 , 19.7046, 29.1978, 17.8254],
       [35.929 , 33.9416, 32.2931, 24.8124, 31.2819, 24.8625, 41.3216],
       [ 1.5433, 14.4786,  8.0098,  8.4749, 14.2307, 10.4456, 12.1196],
       [28.1762, 28.0287, 27.9261, 26.0855, 31.5575, 31.9241, 36.2192],
       [ 9.2698,  6.5928,  2.2152,  1.5559,  5.4011,  8.1458,  3.6442]],
      dtype=float32)>

In [13]:
numeric_layer = tf.keras.layers.DenseFeatures(numeric_columns)
numeric_layer(example_batch).numpy()

array([[ 0.24218269, -0.01358228,  0.69134444,  1.1511943 , -0.0105881 ,
         0.5020721 , -0.14543007],
       [ 0.9002291 ,  0.99021834,  0.82206213,  0.19450267,  0.6158128 ,
         0.2689994 ,  1.394533  ],
       [-1.009819  , -0.26310775, -0.7484202 , -0.48974305, -0.30675864,
        -0.5060761 , -0.5193935 ],
       [ 0.4695785 ,  0.6094552 ,  0.5396337 ,  0.24782252,  0.6307244 ,
         0.64864236,  1.0601169 ],
       [-0.5806294 , -0.7709163 , -1.1231763 , -0.77952397, -0.7844925 ,
        -0.629717  , -1.0748792 ]], dtype=float32)

In [0]:
CATEGORIES = {'condition': [1, 2, 3]}

In [0]:
categorical_columns = []
for feature, vocab in CATEGORIES.items():
  cat_col = tf.feature_column.categorical_column_with_vocabulary_list(
        key=feature, vocabulary_list=vocab)
  categorical_columns.append(tf.feature_column.indicator_column(cat_col))

In [16]:
categorical_layer = tf.keras.layers.DenseFeatures(categorical_columns)
print(categorical_layer(example_batch).numpy()[0])

Instructions for updating:
The old _FeatureColumn APIs are being deprecated. Please use the new FeatureColumn APIs instead.
Instructions for updating:
The old _FeatureColumn APIs are being deprecated. Please use the new FeatureColumn APIs instead.
[0. 0. 1.]


In [0]:
preprocessing_layer = tf.keras.layers.DenseFeatures(categorical_columns+numeric_columns)

In [0]:
model = tf.keras.Sequential([
  preprocessing_layer,
  tf.keras.layers.Dense(256, activation='relu'),
  tf.keras.layers.Dense(256, activation='relu'),
  tf.keras.layers.Dense(256, activation='relu'),
  tf.keras.layers.Dense(256, activation='relu'),
  tf.keras.layers.Dense(1, activation='sigmoid'),
])

model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy'])

In [0]:
train_data = packed_data.shuffle(8000)
test_data = packed_data.shuffle(200)

In [22]:
model.fit(train_data, epochs=20)

Epoch 1/20
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7f4d94e08080>

In [23]:
test_loss, test_accuracy = model.evaluate(test_data)

print('\n\nTest Loss {}, Test Accuracy {}'.format(test_loss, test_accuracy))



Test Loss 0.626742870241654, Test Accuracy 0.6403172016143799
