In [13]:
import tensorflow as tf 
import pandas as pd 
import io
import itertools
import numpy as np 
import json
from tensorflow import feature_column
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing
from tensorboard.plugins.hparams import api as hp
import matplotlib.pyplot as plt
import sklearn.metrics
import datetime


In [5]:
df = pd.read_csv("data/stage_data_out/dataset/Merge_Dataset/Merge_Dataset.csv", index_col=0)
print(df.dtypes)
print(df.describe())
print(df.head(5))

ear_l     float64
ear_r     float64
ear       float64
Target      int64
dtype: object
               ear_l          ear_r            ear         Target
count  116069.000000  116069.000000  116069.000000  116069.000000
mean        0.305900       0.304856       0.305378       0.732986
std         0.033780       0.031586       0.031354       0.442401
min         0.112955       0.093567       0.105054       0.000000
25%         0.286097       0.290172       0.289110       0.000000
50%         0.311300       0.310224       0.312204       1.000000
75%         0.329276       0.325553       0.326692       1.000000
max         0.440620       0.500000       0.427168       1.000000
          ear_l     ear_r       ear  Target
frame                                      
0.0    0.282051  0.289474  0.285762       0
1.0    0.294775  0.310811  0.302793       0
2.0    0.294872  0.281959  0.288415       0
3.0    0.269142  0.289374  0.279258       0
4.0    0.277569  0.276817  0.277193       0


In [6]:
target = df.pop('Target')
dataset = tf.data.Dataset.from_tensor_slices((dict(df), target.values))
print(dataset)


<TensorSliceDataset shapes: ({ear_l: (), ear_r: (), ear: ()}, ()), types: ({ear_l: tf.float64, ear_r: tf.float64, ear: tf.float64}, tf.int64)>


In [7]:
for feature_batch, label_batch in dataset.take(1):
  print('Every feature:', list(feature_batch.keys()))
  print('A batch of ear:', feature_batch['ear'])
  print('A batch of targets:', label_batch )

Every feature: ['ear_l', 'ear_r', 'ear']
A batch of ear: tf.Tensor(0.2857624831309042, shape=(), dtype=float64)
A batch of targets: tf.Tensor(0, shape=(), dtype=int64)


In [8]:
dataset_size = dataset.reduce(0, lambda x, _: x + 1).numpy()
dataset = dataset.shuffle(buffer_size = dataset_size)

train_size = int(0.7*dataset_size)
val_size = int(0.15*dataset_size)
test_size = int(0.15*dataset_size)

train = dataset.take(train_size)
val = dataset.skip(train_size)
val = dataset.take(val_size)
test = dataset.skip(train_size + val_size)
test = dataset.take(test_size)

train_size = train.reduce(0, lambda x, _: x + 1).numpy()
val_size = val.reduce(0, lambda x, _: x + 1).numpy()
test_size = test.reduce(0, lambda x, _: x + 1).numpy()

print("Full dataset size:", dataset_size)
print("Train dataset size:", train_size)
print("Val dataset size:", val_size)
print("Test dataset size:", test_size)

Full dataset size: 116069
Train dataset size: 81248
Val dataset size: 17410
Test dataset size: 17410


In [9]:
BATCH_SIZE = 32

train = train.shuffle(buffer_size = train_size)
train = train.batch(BATCH_SIZE)

val = val.shuffle(buffer_size = val_size)
val = val.batch(BATCH_SIZE)

test = test.batch(BATCH_SIZE)

In [10]:
example_batch = next(iter(train))[0]

In [11]:
def demo(feature_column):
  feature_layer = layers.DenseFeatures(feature_column)
  print(feature_layer(example_batch).numpy())

# POnly if we have features with different scale
def normalize_numerical_features(df, features):
  def get_mean_std(x):
    return df[x].mean(), df[x].std()
  for column in features: 
    mean, std = get_mean_std(column)
    def z_score(col):
      return (col - mean)/std    
    def _numeric_column_normalized(column_name, normalizer_fn):
      return tf.feature_column.numeric_column(column_name, normalizer_fn=normalizer_fn)
    return _numeric_column_normalized(column,z_score)
  


def make_numerical_feature_col(numerical_column, normalize = False):
  def get_normalization_layer(name, dataset):
  # Create a Normalization layer for our feature.
  normalizer = preprocessing.Normalization()
  # Prepare a Dataset that only yields our feature.
  feature_ds = dataset.map(lambda x, y: x[name])
  # Learn the statistics of the data.
  normalizer.adapt(feature_ds)
  return normalizer
    for column_name in numerical_column:
        numeric_col = tf.keras.Input(shape=(1,), name=column_name)
        if normalize : 
            normalization_layer = get_normalization_layer(column_name, train)
            encoded_numeric_col = normalization_layer(numeric_col) 
        else : 
            encoded_numeric_col = feature_column.numeric_column(column_name)
        all_inputs.append(numeric_col)
        encoded_features.append(encoded_numeric_col)
    return all_inputs, encoded_features


In [14]:
all_inputs = []
encoded_features = []
numerical_features = ["ear","ear_l","ear_r"]
all_inputs, encoded_features = make_numerical_feature_col(numerical_features, normalize = True)

In [15]:
all_features = []
all_features = tf.keras.layers.concatenate(encoded_features)

In [22]:
model = tf.keras.models.Sequential([
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(32, activation="relu"),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(512, activation = "relu"),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(2, activation="sigmoid"),
])



In [None]:
model.compile(optimizer='adam',
              loss='binary_accuracy',
              metrics=['accuracy'])
model.fit(
        train, 
        validation_data= val,
        epochs=30,
        shuffle=True,
        verbose =1,
        callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=patience, mode='min')]) 
model.save("tensorboard/model/"+str(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) + "/model_" + str(session_num))
