In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Activation, Flatten, Dense, Input, Dropout, Convolution1D, MaxPool1D, GlobalMaxPool1D, GlobalAveragePooling1D, \
    concatenate, SpatialDropout1D, TimeDistributed, Bidirectional, LSTM, Reshape, Conv1D, MaxPooling1D,Permute


In [2]:
numFiles = 16
dataset_size = 148128/6

fileNames = []
for file_id in range(numFiles):
    #fileNames.append('F:\ML_Project\Records\SIGNALS_train_datastrings' + str(file_id) + '.tfrecord')
    fileNames.append('../ml_project_data/records_tf/SIGNALS_train_data_strings_' + str(file_id) + '.tfrecord')
dataset = tf.data.TFRecordDataset(fileNames)

In [3]:
def parser(record):
    features = {
                'size': tf.io.FixedLenFeature([], dtype=tf.int64),
                'eeg_1': tf.io.FixedLenFeature([1500],dtype=tf.float32),
                'eeg_2': tf.io.FixedLenFeature([1500],dtype=tf.float32),
                'eeg_3': tf.io.FixedLenFeature([1500],dtype=tf.float32),
                'eeg_4': tf.io.FixedLenFeature([1500],dtype=tf.float32),
                'eeg_5': tf.io.FixedLenFeature([1500],dtype=tf.float32),
                'eeg_5': tf.io.FixedLenFeature([1500],dtype=tf.float32),
                'eeg_6': tf.io.FixedLenFeature([1500],dtype=tf.float32),
                'eeg_7': tf.io.FixedLenFeature([1500],dtype=tf.float32),
                'pulse': tf.io.FixedLenFeature([1500],dtype=tf.float32),
                'x': tf.io.FixedLenFeature([1500],dtype=tf.float32),
                'y': tf.io.FixedLenFeature([1500],dtype=tf.float32),
                'z': tf.io.FixedLenFeature([1500],dtype=tf.float32),
                'label': tf.io.FixedLenFeature([], dtype=tf.int64)

                }
                
    parsed = tf.io.parse_single_example(record, features)

    to_return = tf.stack(
        [
        parsed["eeg_1"],
        parsed["eeg_2"],
        parsed["eeg_3"],
        parsed["eeg_4"],
        parsed["eeg_5"],
        parsed["eeg_6"],
        parsed["eeg_7"],
        # parsed["pulse"],
        # parsed["x"],
        # parsed["y"],
        # parsed["z"]
        ]
    )


    return to_return, parsed["label"]

dataset = dataset.map(parser).batch(500)
# iterator_dataset = dataset.map(parser).__iter__()
dataset

<BatchDataset shapes: ((None, 7, 1500), (None,)), types: (tf.float32, tf.int64)>

In [4]:
dataset.unbatch().take(1).as_numpy_iterator().next()[0]

array([[-7.4333719e+04, -2.0358532e+06, -2.3794528e+06, ...,
        -3.3437515e+01, -2.1725368e+01, -3.1227282e+01],
       [-2.2316191e+04, -6.1120056e+05, -7.1444931e+05, ...,
         4.5841770e+01,  5.4701538e+01,  5.2347439e+01],
       [ 2.1174299e+04,  5.7997925e+05,  6.7793569e+05, ...,
        -1.8731419e+01, -3.2894154e+01, -4.2665634e+01],
       ...,
       [-7.6816406e+02, -2.0950875e+04, -2.4512250e+04, ...,
         1.1273933e+01,  1.4340935e+01,  2.5053711e+00],
       [-1.1418926e+03, -3.1221312e+04, -3.6513625e+04, ...,
         2.7110352e+01,  2.1807384e+01,  9.6818047e+00],
       [ 7.3565555e+04,  2.0149024e+06,  2.3549405e+06, ...,
         4.4711449e+01,  3.6066303e+01,  3.3732651e+01]], dtype=float32)

In [5]:
def count(counts, batch):
  features, labels = batch
  class_0 = labels == 0
  class_0 = tf.cast(class_0, tf.int32)

  class_1 = labels == 1
  class_1 = tf.cast(class_1, tf.int32)

  class_2 = labels == 2
  class_2 = tf.cast(class_2, tf.int32)

  class_3 = labels == 3
  class_3 = tf.cast(class_3, tf.int32)

  class_4 = labels == 4
  class_4 = tf.cast(class_4, tf.int32)

  counts['class_0'] += tf.reduce_sum(class_0)
  counts['class_1'] += tf.reduce_sum(class_1)
  counts['class_2'] += tf.reduce_sum(class_2)
  counts['class_3'] += tf.reduce_sum(class_3)
  counts['class_4'] += tf.reduce_sum(class_4)

  return counts

On peut voir que les classes ne sont pas équilibrées

In [6]:
echantillon = dataset.take(3)
counts =echantillon.reduce(
    initial_state={'class_0': 0, 'class_1': 0,'class_2': 0,'class_3': 0,'class_4': 0,},
    reduce_func = count)

counts = np.array([counts['class_0'].numpy(),
                   counts['class_1'].numpy(),
                   counts['class_2'].numpy(),
                   counts['class_3'].numpy(),
                   counts['class_4'].numpy(),
                   ]).astype(np.float32)

fractions = counts/counts.sum()
print(fractions)



[0.17       0.04466667 0.33866668 0.318      0.12866667]


In [7]:

@tf.autograph.experimental.do_not_convert
def dataset_per_label(dataset):
    dataset_label0 = (
    dataset
        .unbatch()
        .filter(lambda features, label: label==0)
        .repeat())

    dataset_label1 = (
    dataset
        .unbatch()
        .filter(lambda features, label: label==1)
        .repeat())

    dataset_label2 = (
    dataset
        .unbatch()
        .filter(lambda features, label: label==2)
        .repeat())

    dataset_label3 = (
    dataset
        .unbatch()
        .filter(lambda features, label: label==3)
        .repeat())

    dataset_label4 = (
    dataset
        .unbatch()
        .filter(lambda features, label: label==4)
        .repeat())
    return dataset_label0,dataset_label1,dataset_label2,dataset_label3,dataset_label4

dataset_label0,dataset_label1,dataset_label2,dataset_label3,dataset_label4 = dataset_per_label(dataset)


In [8]:
balanced_ds = tf.data.experimental.sample_from_datasets(
    [dataset_label0,dataset_label1,dataset_label2,dataset_label3,dataset_label4], [1/5, 1/5, 1/5, 1/5, 1/5])


Maintenant les observation piochées sont également distribuées

In [9]:
final_dataset = balanced_ds

In [10]:
final_dataset

<_DirectedInterleaveDataset shapes: ((7, 1500), ()), types: (tf.float32, tf.int64)>

On peut prendre un échantillon de notre base de donné

In [11]:
BATCH_SIZE = 32
train_size = int(0.75 * dataset_size)
test_size = int(0.25 * dataset_size)
train_dataset = final_dataset.take(train_size).batch(BATCH_SIZE)
test_dataset = final_dataset.skip(train_size).take(test_size).batch(BATCH_SIZE)

In [12]:
cnn_model = tf.keras.Sequential([
    Permute((2, 1)),
    Conv1D(16, 3, activation='relu'),
    Conv1D(16, 3, activation='relu'),
    MaxPooling1D(3),
    Conv1D(32, 3, activation='relu'),
    Conv1D(32, 3, activation='relu'),
    MaxPooling1D(3),
    Conv1D(64, 3, activation='relu'),
    Conv1D(64, 3, activation='relu'),
    GlobalAveragePooling1D(),
    Dense(64, activation='relu'),
    Dense(5, activation='softmax')
])


In [13]:
optimizer = tf.keras.optimizers.Adam()
cce = tf.keras.losses.SparseCategoricalCrossentropy()
no_epochs = 30

In [14]:
cnn_model.compile(
              loss=cce,
              optimizer=optimizer,
              metrics=['accuracy'])

In [15]:
history = cnn_model.fit(
            train_dataset,
            shuffle = True,
            epochs=no_epochs,
            verbose=1,
            validation_data=test_dataset
            )

Epoch 1/30
    273/Unknown - 54s 197ms/step - loss: 11.2440 - accuracy: 0.2655

KeyboardInterrupt: 