In [1]:
import tensorflow as tf 
import pandas as pd 
import io
import itertools
import numpy as np 
import json
from tensorflow import feature_column
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing
from tensorboard.plugins.hparams import api as hp
import matplotlib.pyplot as plt
import sklearn.metrics
import datetime


## Load Data and Create Dataset

ear_l     float64
ear_r     float64
ear       float64
Target      int64
dtype: object
               ear_l          ear_r            ear         Target
count  116069.000000  116069.000000  116069.000000  116069.000000
mean        0.305900       0.304856       0.305378       0.732986
std         0.033780       0.031586       0.031354       0.442401
min         0.112955       0.093567       0.105054       0.000000
25%         0.286097       0.290172       0.289110       0.000000
50%         0.311300       0.310224       0.312204       1.000000
75%         0.329276       0.325553       0.326692       1.000000
max         0.440620       0.500000       0.427168       1.000000
          ear_l     ear_r       ear  Target
frame                                      
0.0    0.282051  0.289474  0.285762       0
1.0    0.294775  0.310811  0.302793       0
2.0    0.294872  0.281959  0.288415       0
3.0    0.269142  0.289374  0.279258       0
4.0    0.277569  0.276817  0.277193       0


In [60]:
df = pd.read_csv("data/stage_data_out/dataset/Merge_Dataset/Merge_Dataset.csv", index_col=0)
print(df.dtypes)
print(df.describe())
print(df.head(5))

target = None
target = df.pop('Target')
target = pd.DataFrame(target)
target = target.rename(columns = {"Target"  : 'Fatigue'})

target['Awake'] = np.where(target['Fatigue'] == 0, 1, 0)
print(dict(target))

dataset = tf.data.Dataset.from_tensor_slices((dict(df), dict(target)))
print(dataset)


ear_l     float64
ear_r     float64
ear       float64
Target      int64
dtype: object
               ear_l          ear_r            ear         Target
count  116069.000000  116069.000000  116069.000000  116069.000000
mean        0.305900       0.304856       0.305378       0.732986
std         0.033780       0.031586       0.031354       0.442401
min         0.112955       0.093567       0.105054       0.000000
25%         0.286097       0.290172       0.289110       0.000000
50%         0.311300       0.310224       0.312204       1.000000
75%         0.329276       0.325553       0.326692       1.000000
max         0.440620       0.500000       0.427168       1.000000
          ear_l     ear_r       ear  Target
frame                                      
0.0    0.282051  0.289474  0.285762       0
1.0    0.294775  0.310811  0.302793       0
2.0    0.294872  0.281959  0.288415       0
3.0    0.269142  0.289374  0.279258       0
4.0    0.277569  0.276817  0.277193       0
{'Fatigue': 

In [39]:
for feature_batch, label_batch in dataset.take(1):
  print('Every Feature:', list(feature_batch.keys()))
  print('Every Target:', list(label_batch.keys()))
  print('A batch of ear:', feature_batch['ear'])
  print('A batch of targets:', list(label_batch.values()) )

Every Feature: ['ear_l', 'ear_r', 'ear']
Every Target: ['Fatigue', 'Awake']
A batch of ear: tf.Tensor(0.2857624831309042, shape=(), dtype=float64)
A batch of targets: [<tf.Tensor: shape=(), dtype=int64, numpy=0>, <tf.Tensor: shape=(), dtype=int64, numpy=1>]


## Splitting, Shuffing, Batching  data

### Splitting and Shuffling

In [41]:
dataset_size = dataset.reduce(0, lambda x, _: x + 1).numpy()
dataset = dataset.shuffle(buffer_size = dataset_size)

train_size = int(0.7*dataset_size)
val_size = int(0.15*dataset_size)
test_size = int(0.15*dataset_size)

train = dataset.take(train_size)
val = dataset.skip(train_size)
val = dataset.take(val_size)
test = dataset.skip(train_size + val_size)
test = dataset.take(test_size)

train_size = train.reduce(0, lambda x, _: x + 1).numpy()
val_size = val.reduce(0, lambda x, _: x + 1).numpy()
test_size = test.reduce(0, lambda x, _: x + 1).numpy()

print("Full dataset size:", dataset_size)
print("Train dataset size:", train_size)
print("Val dataset size:", val_size)
print("Test dataset size:", test_size)

Full dataset size: 116069
Train dataset size: 81248
Val dataset size: 17410
Test dataset size: 17410


### Shuffling, Batching

In [42]:
BATCH_SIZE = 32

train = train.shuffle(buffer_size = train_size)
train = train.batch(BATCH_SIZE)

val = val.shuffle(buffer_size = val_size)
val = val.batch(BATCH_SIZE)

test = test.batch(BATCH_SIZE)

## Feature Engineering

In [44]:
example_batch = next(iter(train))[0]


In [43]:
def demo(feature_column):
  feature_layer = layers.DenseFeatures(feature_column)
  print(feature_layer(example_batch).numpy())

# POnly if we have features with different scale
def normalize_numerical_features(df, features):
  def get_mean_std(x):
    return df[x].mean(), df[x].std()
  for column in features: 
    mean, std = get_mean_std(column)
    def z_score(col):
      return (col - mean)/std    
    def _numeric_column_normalized(column_name, normalizer_fn):
      return tf.feature_column.numeric_column(column_name, normalizer_fn=normalizer_fn)
    return _numeric_column_normalized(column,z_score)
  
def get_normalization_layer(name, dataset):
  # Create a Normalization layer for our feature.
  normalizer = preprocessing.Normalization()
  # Prepare a Dataset that only yields our feature.
  feature_ds = dataset.map(lambda x, y: x[name])
  # Learn the statistics of the data.
  normalizer.adapt(feature_ds)
  return normalizer

def make_numerical_feature_col(numerical_column, normalize = False):
    for column_name in numerical_column:
        numeric_col = tf.keras.Input(shape=(1,), name=column_name)
        if normalize : 
            normalization_layer = get_normalization_layer(column_name, train)
            encoded_numeric_col = normalization_layer(numeric_col) 
        else : 
            encoded_numeric_col = feature_column.numeric_column(column_name)
        all_inputs.append(numeric_col)
        encoded_features.append(encoded_numeric_col)
    return all_inputs, encoded_features


In [46]:
all_inputs = []
encoded_features = []
numerical_features = ["ear","ear_l","ear_r"]
all_inputs, encoded_features = make_numerical_feature_col(numerical_features, normalize = True)

In [47]:
all_features = []
all_features = tf.keras.layers.concatenate(encoded_features)

In [20]:
from sklearn.model_selection import train_test_split
import random

df = pd.read_csv("data/stage_data_out/dataset/Merge_Dataset/Merge_Dataset.csv")
df.pop("frame")
print(df)
index_list = list(df[df["Target"] == 1].index)
index_to_remove = random.sample(index_list, len(index_list) -len(df[df["Target"] == 0]) )
print(len(df[df["Target"] == 0]))
print(len(df[df["Target"] == 1]))

df.drop(index_to_remove, inplace=True)
print(len(df[df["Target"] == 0]))
print(len(df[df["Target"] == 1]))

df_train, df_test = train_test_split(df, test_size=0.33)

FEATURES = ["ear", "ear_l", "ear_r"]
LABEL = 'Target'
continuous_features = [tf.feature_column.numeric_column(k) for k in FEATURES]		
model = tf.estimator.LinearClassifier(continuous_features, n_classes = 2)

def get_input_fn(data_set, num_epochs=None, n_batch = 128, shuffle=True):
    return tf.compat.v1.estimator.inputs.pandas_input_fn( x=pd.DataFrame({k: data_set[k].values for k in FEATURES}), y = pd.Series(data_set[LABEL].values), batch_size=n_batch, num_epochs=num_epochs, shuffle=shuffle)

model.train(input_fn=get_input_fn(df_train, num_epochs=None, n_batch = 128, shuffle=False),steps=1000)


           ear_l     ear_r       ear  Target
0       0.282051  0.289474  0.285762       0
1       0.294775  0.310811  0.302793       0
2       0.294872  0.281959  0.288415       0
3       0.269142  0.289374  0.279258       0
4       0.277569  0.276817  0.277193       0
...          ...       ...       ...     ...
116064  0.225338  0.235180  0.230259       1
116065  0.258415  0.250970  0.254693       1
116066  0.249878  0.270459  0.260168       1
116067  0.258046  0.250010  0.254028       1
116068  0.279743  0.277760  0.278752       1

[116069 rows x 4 columns]
30992
85077
30992
30992
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmps4g0kech', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 100

<tensorflow_estimator.python.estimator.canned.linear.LinearClassifierV2 at 0x7f9a45034cd0>

In [21]:
model.evaluate(input_fn=get_input_fn(df_test, 
                                      num_epochs=1,
                                      n_batch = 128,
                                      shuffle=False),
                                      steps=1000)

INFO:tensorflow:Calling model_fn.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2021-04-14T17:23:33Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmps4g0kech/model.ckpt-1000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Evaluation [100/1000]
INFO:tensorflow:Inference Time : 4.39245s
INFO:tensorflow:Finished evaluation at 2021-04-14-17:23:37
INFO:tensorflow:Saving dict for global step 1000: accuracy = 0.4986556, accuracy_baseline = 0.50134444, auc = 0.5212795, auc_precision_recall = 0.5248309, average_loss = 0.69346386, global_step = 1000, label/mean = 0.50134444, loss 

{'accuracy': 0.4986556,
 'accuracy_baseline': 0.50134444,
 'auc': 0.5212795,
 'auc_precision_recall': 0.5248309,
 'average_loss': 0.69346386,
 'label/mean': 0.50134444,
 'loss': 0.6934649,
 'precision': 0.0,
 'prediction/mean': 0.48490652,
 'recall': 0.0,
 'global_step': 1000}

In [22]:
predicitons = model.predict(input_fn=get_input_fn(df_test, num_epochs=1,n_batch = 128, shuffle=False))
predicitons = np.array([np.round(item["logistic"]) for item in predicitons])
print(len(predicitons))
print(len(list(df_test["Target"])))
from sklearn.metrics import confusion_matrix
confusion_matrix(list(df_test["Target"]), predicitons)

INFO:tensorflow:Calling model_fn.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmps4g0kech/model.ckpt-1000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
20455
20455


array([[10200,     0],
       [10255,     0]])

In [29]:
6123],
       [    0, 17091]])

Unnamed: 0_level_0,ear_l,ear_r,ear,Target
frame,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,0.282051,0.289474,0.285762,0
1.0,0.294775,0.310811,0.302793,0
2.0,0.294872,0.281959,0.288415,0
3.0,0.269142,0.289374,0.279258,0
4.0,0.277569,0.276817,0.277193,0
...,...,...,...,...
6454.0,0.250850,0.240810,0.245830,0
6458.0,0.257576,0.283741,0.270658,0
6459.0,0.242424,0.267111,0.254768,0
6461.0,0.227273,0.226482,0.226877,0


## Model

## Hyper Parameter tuning (Hparams & tensor board)

### Define log dir

In [48]:
logdir = "tensorboard/logs/fit/tunning/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")+"/"

### Define model parameter

In [53]:
HP_NUM_UNITS_1 = hp.HParam('num_units_1', hp.Discrete([32]))
HP_NUM_UNITS_2 = hp.HParam('num_units_2', hp.Discrete([512]))
HP_DROPOUT = hp.HParam('dropout', hp.RealInterval(0.5,0.5))
HP_OPTIMIZER = hp.HParam('optimizer', hp.Discrete(['adam']))
HP_ACTIVATION = hp.HParam('activation', hp.Discrete(['relu']))
HP_ACTIVATION_OUTPUT = hp.HParam('activation_output', hp.Discrete(['sigmoid']))


METRIC_BINARY_ACCURACY = "binary_accuracy"
METRIC_BINARY_CROSSENTROPY = "binary_crossentropy"
METRIC_MSE = "mean_squared_error"

NUMBER_OF_TARGET = 2
metrics = ["binary_accuracy","binary_crossentropy","mean_squared_error"]


### Initialize hyper parameter for the log

In [51]:
with tf.summary.create_file_writer(logdir).as_default():
  hp.hparams_config(
    hparams=[HP_NUM_UNITS_1, HP_NUM_UNITS_2, HP_DROPOUT, HP_ACTIVATION, HP_ACTIVATION_OUTPUT, HP_OPTIMIZER],
    metrics=[ hp.Metric(METRIC_BINARY_ACCURACY, display_name='Binary Accuracy'),
              hp.Metric(METRIC_BINARY_CROSSENTROPY, display_name='Binary Cross Entropy'),
              hp.Metric(METRIC_MSE, display_name='MSE'),
    ],
  )

### Define the model

In [52]:
def modeling(hparams):
    
    x = tf.keras.layers.BatchNormalization()(all_features)
    x = tf.keras.layers.Dense(hparams[HP_NUM_UNITS_1],activation=hparams[HP_ACTIVATION])(x)
    x = tf.keras.layers.Dropout(hparams[HP_DROPOUT])(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dense(hparams[HP_NUM_UNITS_2],activation=hparams[HP_ACTIVATION])(x)
    x = tf.keras.layers.Dropout(hparams[HP_DROPOUT])(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dense(hparams[HP_NUM_UNITS_2],activation=hparams[HP_ACTIVATION])(x)
    x = tf.keras.layers.Dropout(hparams[HP_DROPOUT])(x)
    x = tf.keras.layers.BatchNormalization()(x)

    output = tf.keras.layers.Dense(NUMBER_OF_TARGET, activation=hparams[HP_ACTIVATION_OUTPUT])(x)
    model = tf.keras.Model(all_inputs,output)
    return model



In [54]:
def train_test_model(hparams):
    model = modeling(hparams)
    model.summary()
    model.compile(
        optimizer = hparams[HP_OPTIMIZER],
        loss = tf.keras.losses.BinaryCrossentropy(),
        metrics = ["binary_accuracy","binary_crossentropy","mean_squared_error"],
    )
    model.fit(
        train, 
        validation_data= val,
        epochs=30,
        shuffle=True,
        verbose =1,
        callbacks=[ 
            tf.keras.callbacks.TensorBoard(log_dir = logdir),  # log metrics
            hp.KerasCallback(logdir, hparams),  # log hparams
            tf.keras.callbacks.EarlyStopping(monitor='val_binary_accuracy', patience=10),
        ]
    ) 
    model.save("tensorboard/"+str(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) + "/model")
    _, binary_accuracy, binary_crossentropy, mean_squared_error = model.evaluate(test)
    return binary_accuracy, binary_crossentropy, mean_squared_error

### Define a method to run the the training and testing model function and logs the paramete

In [55]:
def run(run_dir, hparams):
    with tf.summary.create_file_writer(run_dir).as_default():
        hp.hparams(hparams)  # record the values used in this trial
        binary_accuracy, binary_crossentropy, mean_squared_error = train_test_model(hparams)
        tf.summary.scalar(METRIC_BINARY_ACCURACY, binary_accuracy, step=1)
        tf.summary.scalar(METRIC_BINARY_CROSSENTROPY, binary_crossentropy, step=1)
        tf.summary.scalar(METRIC_MSE, mean_squared_error, step=1)

### Tunning the model

In [56]:
session_num = 0
 target
for num_units_1 in HP_NUM_UNITS_1.domain.values:
  for num_units_2 in HP_NUM_UNITS_2.domain.values:
      for dropout_rate in (HP_DROPOUT.domain.min_value, HP_DROPOUT.domain.max_value):
        for optimizer in HP_OPTIMIZER.domain.values:
          for activation in HP_ACTIVATION.domain.values:
            for activation_output in HP_ACTIVATION_OUTPUT.domain.values:
              hparams = {
                HP_NUM_UNITS_1: num_units_1,
                HP_NUM_UNITS_2: num_units_2,
                HP_DROPOUT : dropout_rate,
                HP_OPTIMIZER: optimizer,
                HP_ACTIVATION: activation,
                HP_ACTIVATION_OUTPUT: activation_output
              }
              run_name = "run-%d" % session_num
              print('--- Starting trial: %s' % run_name)
              print({h.name: hparams[h] for h in hparams})
              run(logdir + run_name, hparams)
              session_num += 1          

--- Starting trial: run-0
{'num_units_1': 32, 'num_units_2': 512, 'dropout': 0.5, 'optimizer': 'adam', 'activation': 'relu', 'activation_output': 'sigmoid'}
Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
ear (InputLayer)                [(None, 1)]          0                                            
__________________________________________________________________________________________________
ear_l (InputLayer)              [(None, 1)]          0                                            
__________________________________________________________________________________________________
ear_r (InputLayer)              [(None, 1)]          0                                            
__________________________________________________________________________________________________
normalization (Normalization)

ValueError: in user code:

    /home/simeon/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:806 train_function  *
        return step_function(self, iterator)
    /home/simeon/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:796 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    /home/simeon/.local/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:1211 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /home/simeon/.local/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2585 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /home/simeon/.local/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2945 _call_for_each_replica
        return fn(*args, **kwargs)
    /home/simeon/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:789 run_step  **
        outputs = model.train_step(data)
    /home/simeon/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:748 train_step
        loss = self.compiled_loss(
    /home/simeon/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/compile_utils.py:183 __call__
        y_true = self._conform_to_outputs(y_pred, y_true)
    /home/simeon/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/compile_utils.py:63 _conform_to_outputs
        struct = map_to_output_names(outputs, self._output_names, struct)
    /home/simeon/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/compile_utils.py:586 map_to_output_names
        raise ValueError('Found unexpected keys that do not correspond '

    ValueError: Found unexpected keys that do not correspond to any Model output: dict_keys(['Fatigue', 'Awake']). Expected: ['dense_3']


In [1]:
 %load_ext tensorboard
 %tensorboard --logdir 'tensorboard/logs/save_log' --port=8080

In [1]:
!jupyter nbconvert --to script ann_fatiuge_test.ipynb

[NbConvertApp] Converting notebook ann_fatiuge_test.ipynb to script
[NbConvertApp] Writing 8864 bytes to ann_fatiuge_test.py
