# Exercise 3.6 - Simple Classification using TensorFlow

1. Import all required modules and print version of the most important ones: 

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

from  IPython import display
from matplotlib import pyplot as plt
from scipy.ndimage.filters import gaussian_filter1d
import pandas as pd
import numpy as np

import tensorflow as tf

print("TensorFlow version: {}".format(tf.__version__)) 

2. Dataset import and data preparation

In [None]:
higgs_path = tf.keras.utils.get_file('HIGGSSmall.csv.gz', 'https://github.com/PacktWorkshops/The-Reinforcement-Learning-Workshop/blob/master/Chapter03/Dataset/HIGGSSmall.csv.gz?raw=true')

    a) Read CSV dataset into TensorFlow Dataset class and repack it to have tuples (features, labels)

In [None]:
N_TEST = int(1e3)
N_VALIDATION = int(1e3)
N_TRAIN = int(1e4)
BUFFER_SIZE = int(N_TRAIN)
BATCH_SIZE = 500
STEPS_PER_EPOCH = N_TRAIN//BATCH_SIZE

N_FEATURES = 28

ds = tf.data.experimental.CsvDataset(higgs_path,[float(),]*(N_FEATURES+1), compression_type="GZIP")

def pack_row(*row):
    label = row[0]
    features = tf.stack(row[1:],1)
    return features, label

packed_ds = ds.batch(N_TRAIN).map(pack_row).unbatch()

    b) Take a look at features value distribution

In [None]:
for features,label in packed_ds.batch(1000).take(1):
    print(features[0])
    plt.hist(features.numpy().flatten(), bins = 101)

    c) Create training, validation and test sets

In [None]:
validate_ds = packed_ds.take(N_VALIDATION).cache()
test_ds = packed_ds.skip(N_VALIDATION).take(N_TEST).cache()
train_ds = packed_ds.skip(N_VALIDATION+N_TEST).take(N_TRAIN).cache()

    d) Define features, label and class names

In [None]:
feature_names = ["lepton pT", "lepton eta", "lepton phi", "missing energy magnitude", "missing energy phi",
                 "jet 1 pt", "jet 1 eta", "jet 1 phi", "jet 1 b-tag", "jet 2 pt", "jet 2 eta", "jet 2 phi", 
                 "jet 2 b-tag", "jet 3 pt", "jet 3 eta", "jet 3 phi", "jet 3 b-tag", "jet 4 pt", "jet 4 eta", 
                 "jet 4 phi", "jet 4 b-tag", "m_jj", "m_jjj", "m_lv", "m_jlv", "m_bb", "m_wbb", "m_wwbb"]
label_name = ['Measure']
class_names = ['Signal', 'Background']

print("Features: {}".format(feature_names))
print("Label: {}".format(label_name))
print("Class names: {}".format(class_names))

    e) Show a sample of a training instance features and label

In [None]:
features, labels = next(iter(train_ds))
print("Features =")
print(features.numpy())
print("Labels =")
print(labels.numpy())

    f) Assign batch size to datasets

In [None]:
test_ds = test_ds.batch(BATCH_SIZE)
validate_ds = validate_ds.batch(BATCH_SIZE)
train_ds = train_ds.shuffle(BUFFER_SIZE).repeat().batch(BATCH_SIZE)

3. Model creation and training

    a) Create a decaying learning rate

In [None]:
lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(
  0.001,
  decay_steps=STEPS_PER_EPOCH*1000,
  decay_rate=1,
  staircase=False)

    b) Define a function to compile a model with
        - Adam optimizer
        - Binary cross entropy as loss function
        
       and fit it on training data using early stopping using validation dataset

In [None]:
def compile_and_fit(model, name, max_epochs=3000):
    
    optimizer = tf.keras.optimizers.Adam(lr_schedule)
    
    model.compile(optimizer=optimizer,
                  loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                  metrics=[
                    tf.keras.losses.BinaryCrossentropy(
                        from_logits=True, name='binary_crossentropy'),
                    'accuracy'])

    model.summary()

    history = model.fit(train_ds,
                        steps_per_epoch = STEPS_PER_EPOCH,
                        epochs=max_epochs,
                        validation_data=validate_ds,
                        callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_binary_crossentropy', patience=200)],
                        verbose=2)
    return history

    c) Create a small model with just 2 layers with 16 and 1 neurons respectively, and compile it and fit

In [None]:
small_model = tf.keras.Sequential([tf.keras.layers.Dense(16, activation='elu', input_shape=(N_FEATURES,)),
                                  tf.keras.layers.Dense(1)])

size_histories = {}

size_histories['small'] = compile_and_fit(small_model, 'sizes/small')

    and check its performances on the test set

In [None]:
test_accuracy = tf.keras.metrics.Accuracy()

for (features, labels) in test_ds:
    logits = small_model(features)
    probabilities = tf.keras.activations.sigmoid(logits)
    predictions = 1*(probabilities.numpy() > 0.5)
    test_accuracy(predictions, labels)
    small_model_accuracy = test_accuracy.result()

print("Test set accuracy: {:.3%}".format(small_model_accuracy))

    d) Create a large model with 5 layers, 4 with 512 and the last one with 1 neuron respectively, and compile it and fit

In [None]:
large_model = tf.keras.Sequential([
    tf.keras.layers.Dense(512, activation='elu', input_shape=(N_FEATURES,)),
    tf.keras.layers.Dense(512, activation='elu'),
    tf.keras.layers.Dense(512, activation='elu'),
    tf.keras.layers.Dense(512, activation='elu'),
    tf.keras.layers.Dense(1)
])

size_histories['large'] = compile_and_fit(large_model, "sizes/large")

    and check its performances on the test set

In [None]:
test_accuracy = tf.keras.metrics.Accuracy()

for (features, labels) in test_ds:
    logits = large_model(features)
    probabilities = tf.keras.activations.sigmoid(logits)
    predictions = 1*(probabilities.numpy() > 0.5)
    test_accuracy(predictions, labels)
    large_model_accuracy = test_accuracy.result()

print("Test set accuracy: {:.3%}".format(large_model_accuracy))

    e) Create the same large model as before but add regularization items such as L2 regularization and Dropout, then compile it and fit

In [None]:
regularization_model = tf.keras.Sequential([
    tf.keras.layers.Dense(512, kernel_regularizer=tf.keras.regularizers.l2(0.0001),
                 activation='elu', input_shape=(N_FEATURES,)),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(512, kernel_regularizer=tf.keras.regularizers.l2(0.0001),
                 activation='elu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(512, kernel_regularizer=tf.keras.regularizers.l2(0.0001),
                 activation='elu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(512, kernel_regularizer=tf.keras.regularizers.l2(0.0001),
                 activation='elu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(1)
])

size_histories['regularization'] = compile_and_fit(regularization_model, "regularizers/regularization", max_epochs=9000)

    and check its performances on the test set

In [None]:
test_accuracy = tf.keras.metrics.Accuracy()

for (features, labels) in test_ds:
    logits = regularization_model(features)
    probabilities = tf.keras.activations.sigmoid(logits)
    predictions = 1*(probabilities.numpy() > 0.5)
    test_accuracy(predictions, labels)
    regularization_model_accuracy = test_accuracy.result()

print("Test set accuracy: {:.3%}".format(regularization_model_accuracy))

    f) Compare three models binary cross entropy trend over epochs

In [None]:
histSmall = pd.DataFrame(size_histories["small"].history)
histSmall['epoch'] = size_histories["small"].epoch

histLarge = pd.DataFrame(size_histories["large"].history)
histLarge['epoch'] = size_histories["large"].epoch

histReg = pd.DataFrame(size_histories["regularization"].history)
histReg['epoch'] = size_histories["regularization"].epoch

trainSmoothSmall = gaussian_filter1d(histSmall['binary_crossentropy'], sigma=3)
testSmoothSmall = gaussian_filter1d(histSmall['val_binary_crossentropy'], sigma=3)

trainSmoothLarge = gaussian_filter1d(histLarge['binary_crossentropy'], sigma=3)
testSmoothLarge = gaussian_filter1d(histLarge['val_binary_crossentropy'], sigma=3)

trainSmoothReg = gaussian_filter1d(histReg['binary_crossentropy'], sigma=3)
testSmoothReg = gaussian_filter1d(histReg['val_binary_crossentropy'], sigma=3)

plt.plot(histSmall['epoch'], trainSmoothSmall, '-', histSmall['epoch'], testSmoothSmall, '--')
plt.plot(histLarge['epoch'], trainSmoothLarge, '-', histLarge['epoch'], testSmoothLarge, '--')
plt.plot(histReg['epoch'], trainSmoothReg, '-', histReg['epoch'], testSmoothReg, '--',)
plt.ylim([0.5, 0.7])
plt.ylabel('Binary Crossentropy')
plt.legend(["Small Training", "Small Validation", "Large Training", "Large Validation", "Regularization Training", "Regularization Validation"])

    g) Compare three models accuracy trend over epochs

In [None]:
trainSmoothSmall = gaussian_filter1d(histSmall['accuracy'], sigma=6)
testSmoothSmall = gaussian_filter1d(histSmall['val_accuracy'], sigma=6)

trainSmoothLarge = gaussian_filter1d(histLarge['accuracy'], sigma=6)
testSmoothLarge = gaussian_filter1d(histLarge['val_accuracy'], sigma=6)

trainSmoothReg = gaussian_filter1d(histReg['accuracy'], sigma=6)
testSmoothReg = gaussian_filter1d(histReg['val_accuracy'], sigma=6)

plt.plot(histSmall['epoch'], trainSmoothSmall, '-', histSmall['epoch'], testSmoothSmall, '--')
plt.plot(histLarge['epoch'], trainSmoothLarge, '-', histLarge['epoch'], testSmoothLarge, '--')
plt.plot(histReg['epoch'], trainSmoothReg, '-', histReg['epoch'], testSmoothReg, '--',)

plt.ylim([0.5, 0.75])
plt.ylabel('Accuracy')
plt.legend(["Small Training", "Small Validation", "Large Training", "Large Validation","Regularization Training", "Regularization Validation",])