In [1]:
import tensorflow as tf 
import pandas as pd 
import numpy as np 
import json
from tensorflow import feature_column
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing
from tensorboard.plugins.hparams import api as hp


## Load Data and Create Dataset

In [13]:
df = pd.read_csv("data/stage_data_out/dataset/dataset_ann/ear_test.csv", index_col=0)
print(df.dtypes)
print(df.describe())
print(df.head(5))

t_1       float64
t_2       float64
t_3       float64
t_4       float64
t_5       float64
t_6       float64
t_7       float64
t_8       float64
t_9       float64
t_10      float64
target      int64
dtype: object
                t_1           t_2           t_3           t_4           t_5  \
count  12223.000000  12223.000000  12223.000000  12223.000000  12223.000000   
mean       0.316937      0.316948      0.316949      0.316958      0.316948   
std        0.024264      0.024234      0.024221      0.024199      0.024177   
min        0.180592      0.180592      0.180592      0.180592      0.180592   
25%        0.304438      0.304438      0.304414      0.304438      0.304418   
50%        0.319253      0.319253      0.319280      0.319319      0.319298   
75%        0.333979      0.333977      0.333979      0.333972      0.333962   
max        0.401511      0.400485      0.400485      0.400249      0.400249   

                t_6           t_7           t_8           t_9          t_10 

In [3]:
target = df.pop('target')
dataset = tf.data.Dataset.from_tensor_slices((dict(df), target.values))


In [4]:
for feature_batch, label_batch in dataset.take(1):
  print('Every feature:', list(feature_batch.keys()))
  print('A batch of t_1:', feature_batch['t_1'])
  print('A batch of targets:', label_batch )

Every feature: ['t_1', 't_2', 't_3', 't_4', 't_5', 't_6', 't_7', 't_8', 't_9', 't_10']
A batch of t_1: tf.Tensor(0.4015111547859027, shape=(), dtype=float64)
A batch of targets: tf.Tensor(0, shape=(), dtype=int64)


## Splitting, Shuffing, Batching  data

### Splitting and Shuffling

In [5]:
dataset_size = dataset.reduce(0, lambda x, _: x + 1).numpy()
dataset = dataset.shuffle(buffer_size = dataset_size)

train_size = int(0.7*dataset_size)
val_size = int(0.15*dataset_size)
test_size = int(0.15*dataset_size)

train = dataset.take(train_size)
val = dataset.skip(train_size)
val = dataset.take(val_size)
test = dataset.skip(train_size + val_size)
test = dataset.take(test_size)

train_size = train.reduce(0, lambda x, _: x + 1).numpy()
val_size = val.reduce(0, lambda x, _: x + 1).numpy()
test_size = test.reduce(0, lambda x, _: x + 1).numpy()

print("Full dataset size:", dataset_size)
print("Train dataset size:", train_size)
print("Val dataset size:", val_size)
print("Test dataset size:", test_size)

Full dataset size: 12223
Train dataset size: 8556
Val dataset size: 1833
Test dataset size: 1833


### Shuffling, Batching

In [6]:
BATCH_SIZE = 32

train = train.shuffle(buffer_size = train_size)
train = train.batch(BATCH_SIZE)

val = val.shuffle(buffer_size = val_size)
val = val.batch(BATCH_SIZE)

test = test.batch(BATCH_SIZE)

## Feature Engineering

In [7]:
example_batch = next(iter(train))[0]


In [11]:
def demo(feature_column):
  feature_layer = layers.DenseFeatures(feature_column)
  print(feature_layer(example_batch).numpy())

# POnly if we have features with different scale
def normalize_numerical_features(df, features):
  def get_mean_std(x):
    return df[x].mean(), df[x].std()
  for column in features: 
    mean, std = get_mean_std(column)
    def z_score(col):
      return (col - mean)/std    
    def _numeric_column_normalized(column_name, normalizer_fn):
      return tf.feature_column.numeric_column(column_name, normalizer_fn=normalizer_fn)
    return _numeric_column_normalized(column,z_score)
  
def get_normalization_layer(name, dataset):
  # Create a Normalization layer for our feature.
  normalizer = preprocessing.Normalization()
  # Prepare a Dataset that only yields our feature.
  feature_ds = dataset.map(lambda x, y: x[name])
  # Learn the statistics of the data.
  normalizer.adapt(feature_ds)
  return normalizer

def make_numerical_feature_col(numerical_column, normalize = False):
    for column_name in numerical_column:
        numeric_col = tf.keras.Input(shape=(1,), name=column_name)
        if normalize : 
            normalization_layer = get_normalization_layer(column_name, train)
            encoded_numeric_col = normalization_layer(numeric_col) 
        else : 
            encoded_numeric_col = feature_column.numeric_column(column_name)
        all_inputs.append(numeric_col)
        encoded_features.append(encoded_numeric_col)
    return all_inputs, encoded_features


In [12]:
all_inputs = []
encoded_features = []
numerical_features = ["t_1","t_2","t_3","t_4","t_5","t_6","t_7","t_8","t_9","t_10"]
all_inputs, encoded_features = make_numerical_feature_col(numerical_features, normalize = True)

## Model

## Hyper Parameter tuning (Hparams & tensor board)

### Define log dir

In [None]:
logdir = "tensorboard/logs/fit/tunning/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")+"/"

### Define model parameter

In [None]:
HP_NUM_UNITS_1 = hp.HParam('num_units_1', hp.Discrete([256,512]))
HP_NUM_UNITS_2 = hp.HParam('num_units_2', hp.Discrete([256,512]))
HP_DROPOUT = hp.HParam('dropout', hp.RealInterval(0.2, 0.5))
HP_OPTIMIZER = hp.HParam('optimizer', hp.Discrete(['adam']))
HP_ACTIVATION = hp.HParam('activation', hp.Discrete(['relu','elu']))
HP_ACTIVATION_OUTPUT = hp.HParam('activation_output', hp.Discrete(['sigmoid']))

METRIC_CATEGORICAL_ACCURACY = "categorical_accuracy"
METRIC_BINARY_ACCURACY = "binary_accuracy"
METRIC_CATEGORICAL_CROSSENTROPY = "categorical_crossentropy"
METRIC_BINARY_CROSSENTROPY = "binary_crossentropy"
METRIC_MSE = "mean_squared_error"

NUMBER_OF_TARGET = 2
metrics = ["categorical_accuracy","binary_accuracy","categorical_crossentropy","binary_crossentropy","mean_squared_error"]


### Initialize hyper parameter for the log

In [None]:
with tf.summary.create_file_writer(logdir).as_default():
  hp.hparams_config(
    hparams=[HP_NUM_UNITS_1, HP_NUM_UNITS_2, HP_DROPOUT, HP_ACTIVATION, HP_ACTIVATION_OUTPUT, HP_OPTIMIZER],
    metrics=[ hp.Metric(METRIC_CATEGORICAL_ACCURACY, display_name='Categorical Accuracy'),
              hp.Metric(METRIC_BINARY_ACCURACY, display_name='Binary Accuracy'),
              hp.Metric(METRIC_CATEGORICAL_CROSSENTROPY, display_name='Categorical Cross Entropy Accuracy'),
              hp.Metric(METRIC_BINARY_CROSSENTROPY, display_name='Binary Cross Entropy'),
              hp.Metric(METRIC_MSE, display_name='MSE'),
    ],
  )

### Define the model

In [None]:
def model(all_features, hparams):
    
    x = tf.keras.layers.BatchNormalization()(all_features)
    x = tf.keras.layers.Dense(hparams[HP_NUM_UNITS_1],activation=hparams[HP_ACTIVATION])(x)
    x = tf.keras.layers.Dropout(hparams[HP_DROPOUT])(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dense(hparams[HP_NUM_UNITS_2],activation=hparams[HP_ACTIVATION])(x)
    x = tf.keras.layers.Dropout(hparams[HP_DROPOUT])(x)
    x = tf.keras.layers.BatchNormalization()(x)

    output = tf.keras.layers.Dense(NUMBER_OF_TARGET, activation=hparams[HP_ACTIVATION_OUTPUT])(x)
    model = tf.keras.Model(all_inputs,output)



In [14]:
def train_test_model(all_features, hparams):
    model(all_features, hparams)
  
    model.compile(
        optimizer = hparams[HP_OPTIMIZER],
        loss = tf.keras.losses.BinaryCrossentropy(),
        metrics = ["categorical_accuracy","binary_accuracy","categorical_crossentropy","binary_crossentropy","mean_squared_error"],
    )
    model.fit(
        train, 
        validation_data= val,
        epochs=10,
        shuffle=True,
        verbose =1,
        callbacks=[ 
            tf.keras.callbacks.TensorBoard(logdir),  # log metrics
            hp.KerasCallback(logdir, hparams),  # log hparams
            tf.keras.callbacks.EarlyStopping(monitor='val_binary_crossentropy', patience=10),
        ]
    ) 
    _, categorical_accuracy, binary_accuracy, categorical_crossentropy, binary_crossentropy, mean_squared_error = model.evaluate(test)
    return categorical_accuracy, binary_accuracy, categorical_crossentropy, binary_crossentropy, mean_squared_error

### Define a method to run the the training and testing model function and logs the paramete

In [None]:
def run(run_dir, hparams):
    with tf.summary.create_file_writer(run_dir).as_default():
        hp.hparams(hparams)  # record the values used in this trial
        categorical_accuracy, binary_accuracy, categorical_crossentropy, binary_crossentropy, mean_squared_error = train_test_model(hparams)
        tf.summary.scalar(METRIC_CATEGORICAL_ACCURACY, categorical_accuracy, step=1)
        tf.summary.scalar(METRIC_BINARY_ACCURACY, binary_accuracy, step=1)
        tf.summary.scalar(METRIC_CATEGORICAL_CROSSENTROPY, categorical_crossentropy, step=1)
        tf.summary.scalar(METRIC_BINARY_CROSSENTROPY, binary_crossentropy, step=1)
        tf.summary.scalar(METRIC_MSE, mean_squared_error, step=1)

### Tunning the model

In [None]:
session_num = 0
 
for num_units_1 in HP_NUM_UNITS_1.domain.values:
  for num_units_2 in HP_NUM_UNITS_2.domain.values:
      for dropout_rate in (HP_DROPOUT.domain.min_value, HP_DROPOUT.domain.max_value):
        for optimizer in HP_OPTIMIZER.domain.values:
          for activation in HP_ACTIVATION.domain.values:
            for activation_output in HP_ACTIVATION_OUTPUT.domain.values:
              hparams = {
                HP_NUM_UNITS_1: num_units_1,
                HP_NUM_UNITS_2: num_units_2,
                HP_DROPOUT : dropout_rate,
                HP_OPTIMIZER: optimizer,
                HP_ACTIVATION: activation,
                HP_ACTIVATION_OUTPUT: activation_output
              }
              run_name = "run-%d" % session_num
              print('--- Starting trial: %s' % run_name)
              print({h.name: hparams[h] for h in hparams})
              run(logdir + run_name, hparams)
              session_num += 1          

In [None]:
%load_ext tensorboard
%tensorboard --logdir 'logs/fit'