In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers


In [2]:
numFiles = 16
dataset_size = 148128
default_value = [0.0]*250

fileNames = []
for file_id in range(numFiles):
    #fileNames.append('F:\ML_Project\Records\SIGNALS_train_datastrings' + str(file_id) + '.tfrecord')
    fileNames.append('../ml_project_data/records_tf/SIGNALS_train_data_strings_' + str(file_id) + '.tfrecord')
dataset = tf.data.TFRecordDataset(fileNames)

In [3]:
def parser(record):
    features = {
                'size': tf.io.FixedLenFeature([], dtype=tf.int64),
                'eeg_1': tf.io.FixedLenFeature([1500],dtype=tf.float32),
                'eeg_2': tf.io.FixedLenFeature([1500],dtype=tf.float32),
                'eeg_3': tf.io.FixedLenFeature([1500],dtype=tf.float32),
                'eeg_4': tf.io.FixedLenFeature([1500],dtype=tf.float32),
                'eeg_5': tf.io.FixedLenFeature([1500],dtype=tf.float32),
                'eeg_5': tf.io.FixedLenFeature([1500],dtype=tf.float32),
                'eeg_6': tf.io.FixedLenFeature([1500],dtype=tf.float32),
                'eeg_7': tf.io.FixedLenFeature([1500],dtype=tf.float32),
                'pulse': tf.io.FixedLenFeature([1500],dtype=tf.float32),
                'x': tf.io.FixedLenFeature([1500],dtype=tf.float32),
                'y': tf.io.FixedLenFeature([1500],dtype=tf.float32),
                'z': tf.io.FixedLenFeature([1500],dtype=tf.float32),
                'label': tf.io.FixedLenFeature([], dtype=tf.int64)

                }
    parsed = tf.io.parse_single_example(record, features)

    to_return = tf.stack([
        parsed["eeg_1"],
        parsed["eeg_2"],
        parsed["eeg_3"],
        parsed["eeg_4"],
        parsed["eeg_5"],
        parsed["eeg_6"],
        parsed["eeg_7"],
#         parsed["pulse"],
        parsed["x"],
        parsed["y"],
        parsed["z"]]
    )


    return to_return, parsed["label"]

dataset = dataset.map(parser).batch(500)
# iterator_dataset = dataset.map(parser).__iter__()

In [4]:
def count(counts, batch):
  features, labels = batch
  class_0 = labels == 0
  class_0 = tf.cast(class_0, tf.int32)

  class_1 = labels == 1
  class_1 = tf.cast(class_1, tf.int32)

  class_2 = labels == 2
  class_2 = tf.cast(class_2, tf.int32)

  class_3 = labels == 3
  class_3 = tf.cast(class_3, tf.int32)

  class_4 = labels == 4
  class_4 = tf.cast(class_4, tf.int32)

  counts['class_0'] += tf.reduce_sum(class_0)
  counts['class_1'] += tf.reduce_sum(class_1)
  counts['class_2'] += tf.reduce_sum(class_2)
  counts['class_3'] += tf.reduce_sum(class_3)
  counts['class_4'] += tf.reduce_sum(class_4)

  return counts

On peut voir que les classes ne sont pas équilibrées

In [5]:
echantillon = dataset.take(3)
counts =echantillon.reduce(
    initial_state={'class_0': 0, 'class_1': 0,'class_2': 0,'class_3': 0,'class_4': 0,},
    reduce_func = count)

counts = np.array([counts['class_0'].numpy(),
                   counts['class_1'].numpy(),
                   counts['class_2'].numpy(),
                   counts['class_3'].numpy(),
                   counts['class_4'].numpy(),
                   ]).astype(np.float32)

fractions = counts/counts.sum()
print(fractions)



[0.17       0.04466667 0.33866668 0.318      0.12866667]


In [6]:

@tf.autograph.experimental.do_not_convert
def dataset_per_label(dataset):
    dataset_label0 = (
    dataset
        .unbatch()
        .filter(lambda features, label: label==0)
        .repeat())

    dataset_label1 = (
    dataset
        .unbatch()
        .filter(lambda features, label: label==1)
        .repeat())

    dataset_label2 = (
    dataset
        .unbatch()
        .filter(lambda features, label: label==2)
        .repeat())

    dataset_label3 = (
    dataset
        .unbatch()
        .filter(lambda features, label: label==3)
        .repeat())

    dataset_label4 = (
    dataset
        .unbatch()
        .filter(lambda features, label: label==4)
        .repeat())
    return dataset_label0,dataset_label1,dataset_label2,dataset_label3,dataset_label4

dataset_label0,dataset_label1,dataset_label2,dataset_label3,dataset_label4 = dataset_per_label(dataset)


In [7]:
balanced_ds = tf.data.experimental.sample_from_datasets(
    [dataset_label0,dataset_label1,dataset_label2,dataset_label3,dataset_label4], [1/5, 1/5, 1/5, 1/5, 1/5]).batch(500)


Maintenant les observation piochées sont également distribuées

In [8]:
echantillon = balanced_ds.take(3)
counts =echantillon.reduce(
    initial_state={'class_0': 0, 'class_1': 0,'class_2': 0,'class_3': 0,'class_4': 0,},
    reduce_func = count)

counts = np.array([counts['class_0'].numpy(),
                   counts['class_1'].numpy(),
                   counts['class_2'].numpy(),
                   counts['class_3'].numpy(),
                   counts['class_4'].numpy(),
                   ]).astype(np.float32)

fractions = counts/counts.sum()
print(fractions)


[0.224      0.20133333 0.19733334 0.19133334 0.186     ]


In [9]:
final_dataset = balanced_ds.unbatch()

In [10]:
final_dataset

<_UnbatchDataset shapes: ((10, 1500), ()), types: (tf.float32, tf.int64)>

On peut prendre un échantillon de notre base de donné

In [11]:
train_size = int(0.75 * dataset_size)
test_size = int(0.25 * dataset_size)
train_dataset = final_dataset.take(train_size).shuffle(100).batch(256)
test_dataset = final_dataset.skip(train_size).take(test_size)

In [12]:
train_dataset

<BatchDataset shapes: ((None, 10, 1500), (None,)), types: (tf.float32, tf.int64)>

In [21]:
cnn_model = tf.keras.Sequential([
    keras.layers.Conv1D(128, kernel_size=3,padding ="same",activation="relu"),
    keras.layers.BatchNormalization(),
    keras.layers.MaxPooling1D(pool_size=(2)),
    keras.layers.Conv1D(128,kernel_size=3,padding ="same" , activation="relu"),
    keras.layers.BatchNormalization(),
    keras.layers.MaxPooling1D(pool_size=(2)),
    keras.layers.Flatten(),
    keras.layers.Dense(64,activation='tanh'),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(32,activation='tanh'),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(16,activation='relu'),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(5,activation='softmax'),
])
# lstm_model.summary()

In [22]:
optimizer = tf.keras.optimizers.Adam()
cce = tf.keras.losses.SparseCategoricalCrossentropy()
no_epochs = 30

In [23]:
cnn_model.compile(loss=cce,
              optimizer=optimizer,
              metrics=['accuracy'])

In [24]:
history = cnn_model.fit(train_dataset,
            epochs=no_epochs,
            verbose=1)

Epoch 1/30
    142/Unknown - 35s 247ms/step - loss: 1.5806 - accuracy: 0.2559

KeyboardInterrupt: 