In [None]:
%matplotlib inline
import numpy as np
import tensorflow as tf
import os
import time
import pickle
import warnings
import data_normalization
from copy import deepcopy
import matplotlib.pyplot as plt
from tensorflow.keras import Model
from sklearn.metrics import confusion_matrix
from tensorflow.keras.layers import Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support

In [None]:
print(tf.__version__)

In [None]:
print("GPU Available: ", tf.test.is_gpu_available())
tf.debugging.set_log_device_placement(True)

In [None]:
warnings.filterwarnings('ignore')
tf.keras.backend.clear_session()  # For easy reset of notebook state.
np.set_printoptions(suppress=True, linewidth=120, precision=2)

In [5]:
def perf_measure(y_true, y_pred):
    
    cnf_matrix = confusion_matrix(y_true, y_pred)
    
    FP = cnf_matrix.sum(axis=0) - np.diag(cnf_matrix)  
    FN = cnf_matrix.sum(axis=1) - np.diag(cnf_matrix)
    TP = np.diag(cnf_matrix)
    TN = cnf_matrix.sum() - (FP + FN + TP)

    FP = FP.astype(float)
    FN = FN.astype(float)
    TP = TP.astype(float)
    TN = TN.astype(float)

    # Specificity or true negative rate
    TNR = TN/(TN+FP) 
    # Sensitivity, hit rate, recall, or true positive rate
    TPR = TP/(TP+FN)
    # Precision or positive predictive value
    PPV = TP/(TP+FP)
    # Negative predictive value
    NPV = TN/(TN+FN)
    # Fall out or false positive rate
    FPR = FP/(FP+TN)
    # False negative rate
    FNR = FN/(TP+FN)
    # False discovery rate
    FDR = FP/(TP+FP)
    # Overall accuracy
    ACC = (TP+TN)/(TP+FP+FN+TN)
    
    FSCORE = np.divide((2*PPV*TPR), (PPV+TPR))
    
    return PPV, TPR, FSCORE, FNR, FPR, TNR

#### Load and Visulize Y with Quantatitve features

In [6]:
name_of_particle = 'JetHTs'

X_train = np.load("matrices/" + name_of_particle +"_train.npy",)
y_train = np.load("matrices/" + name_of_particle +"_y_train.npy",)
X_val = np.load("matrices/" + name_of_particle +"_val.npy",)
y_val = np.load("matrices/" + name_of_particle +"_y_val.npy",)
X_test = np.load("matrices/" + name_of_particle +"_test.npy",)
y_test = np.load("matrices/" + name_of_particle +"_y_test.npy",)
X_train = X_train[:, :-3]
X_val = X_val[:, :-3]
X_test = X_test[:, :-3]
_, V = X_train.shape
K = 2
V

In [7]:
X_test.shape

### Spiliting Data and converting to tf.DataSet format

In [8]:
batch_size = 512
test_ds = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(batch_size).shuffle(1000)  

train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(batch_size).shuffle(1000)  #shuffle(1000)
val_ds = tf.data.Dataset.from_tensor_slices((X_val, y_val)).batch(batch_size).shuffle(1000)


In [9]:
X_train.shape, X_test.shape, X_val.shape

In [10]:
len(y_train), len(y_test), len(y_val)

In [11]:
# N == len(y_train)+len(y_test)+ len(y_val)

### ThreeLayerNN classifier

In [12]:
class ThreeLayerNN(Model):
    def __init__(self, name=None):
        
        super(ThreeLayerNN, self).__init__(name=name)
        # super(name=name)

        self.dense1 = Dense(32, activation='relu', input_shape=(V,),
                            kernel_regularizer='l2', bias_regularizer='l2',  
                            kernel_initializer='uniform',
                            name='dense_1')
        self.dropout1 = Dropout(0.25)
        
        self.dense2 = Dense(16, activation='relu', 
                            kernel_regularizer='l2', bias_regularizer='l2',
                            name='dense_2')
        self.dropout2 = Dropout(0.25)
        
        self.pred_layer = Dense(2, activation='sigmoid',
                                kernel_initializer='uniform',
                                name='predictions')  # output layer 
    def call(self, x):
        x = self.dense1(x)
        x = self.dropout1(x)
        x = self.dense2(x)
        x = self.dropout2(x)
        return self.pred_layer(x)
    
def get_model():
    return ThreeLayerNN(name='3_layer_nn')
            
model_nn3 = get_model() 

#### Choose an optimizer and loss function for training: 

In [13]:
loss_object = tf.keras.losses.BinaryCrossentropy()  
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-6) 

#### metrics to measure the loss and accuracy of the model

In [14]:
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.BinaryAccuracy(name='train_accuracy')

test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.BinaryAccuracy(name='test_accuracy')

Executing op VarHandleOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op VarIsInitializedOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op LogicalNot in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Assert in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op AssignVariableOp in device /job:localhost/replica:0/task:0/device:GPU:0


#### Train 
using eager execution with tf.GradientTape to record the gradients

In [15]:
@tf.function
def train_step(X, labels):
    with tf.GradientTape() as tape:  # Record operations for automatic differentiation
        predictions = model_nn3(X)
        print("predictions", predictions)
        labels = tf.reshape(tf.tile(labels, [2]), [-1, 2])
        loss = loss_object(labels, predictions)
        print("loss:", loss)
    gradients = tape.gradient(loss, model_nn3.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model_nn3.trainable_variables))
    
    train_loss(loss)
    train_accuracy(labels, predictions)

#### Test / Validation

In [16]:
@tf.function
def test_step(X, labels):
    predictions = model_nn3(X)
    labels = tf.reshape(tf.tile(labels, [2]), [-1, 2])
    t_loss = loss_object(labels, predictions)
    test_loss(t_loss)
    test_accuracy(labels, predictions)

#### Training the model ThreeLayerMLP

In [17]:
EPOCHS = 500
early_stop_counter = 0 
delta = 0.0001  # the differenece between two consecutive validation losses (accuracies) for early stop
patience = 5 
early_stop = False

training_losses_nn3, training_accuracies_nn3 = [], []
validations_losses_nn3, validations_accuracies_nn3 = [], []
i = 0

for epoch in range(EPOCHS):
    for X_tr, y_tr in train_ds:  # X_tr := train iterator from traning data set (train_ds)
        results = train_step(X_tr, y_tr)
    training_losses_nn3.append(train_loss.result().numpy())
    training_accuracies_nn3.append(train_accuracy.result().numpy())
    
    for X_val, y_val in val_ds:  
        test_step(X_val, y_val)
    validations_losses_nn3.append(test_loss.result().numpy())
    validations_accuracies_nn3.append(test_accuracy.result().numpy())
    
    # checkpoint_path. i.e weight and layers and ect.
    # For the case of Server Failures or etc
    
#     if epoch % 10 == 0: 
#         checkpoint_path = "NN-ckecks/ThreeLayerNN6_model_" + name_of_particle + str(epoch)
#         model_nn3.save_weights(checkpoint_path, save_format='tf')
    
    training_losses_nn3.append(train_loss.result().numpy())
    training_accuracies_nn3.append(train_accuracy.result().numpy())

    validations_losses_nn3.append(test_loss.result().numpy())
    validations_accuracies_nn3.append(test_accuracy.result().numpy())
    
    template = 'Epoch {}, Train Loss: {:.3f}, Train Accuracy:{:.3f}, \
    Validation Loss: {:.3f}, Validation Accuracy:{:.3f},'
    
    print (template.format(epoch+1,
                         train_loss.result().numpy(),
                         train_accuracy.result().numpy()*100,
                           
                         test_loss.result().numpy(),
                         test_accuracy.result().numpy()*100),)
    
    if epoch >= 5:
        
        history = validations_losses_nn3[-5:]
        for i in range(len(history)):
            if i < len(history)-1:
                if np.abs(history[i+1]-history[i]) <= delta:
                    early_stop_counter += 1
        if early_stop_counter == patience-1:
            early_stop = True

        early_stop_counter = 0

        print("early_stop:", early_stop)

    # Reset metrics for the next epochs
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()
    
#     if early_stop is True:
#         break
    
model_nn3.save_weights("NN-ckecks/ThreeLayerNN_model"+ name_of_particle +".h5")

Executing op OptimizeDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op ModelDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op AnonymousIteratorV2 in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op MakeIterator in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op IteratorGetNextSync in device /job:localhost/replica:0/task:0/device:CPU:0


W1013 19:04:06.009195 140362191300416 base_layer.py:1814] Layer 3_layer_nn is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2.  The layer has dtype float32 because it's dtype defaults to floatx.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



Executing op RandomUniform in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Sub in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Mul in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Add in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op VarHandleOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Fill in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op VarHandleOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op VarHandleOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op VarHandleOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op VarHandleOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op VarHandleOp in device /job:localhost/replica:0/task:0/device:GPU:0
predictions Tensor("3_layer_nn/predictions/Sigmoid:0", shape=(512, 2), dtype=float32)
loss: Tensor("binary_crossentropy/weighted_loss/value:0", shape

Epoch 38, Train Loss: 0.633, Train Accuracy:98.642,     Validation Loss: 0.633, Validation Accuracy:98.751,
early_stop: False
Epoch 39, Train Loss: 0.632, Train Accuracy:98.643,     Validation Loss: 0.631, Validation Accuracy:98.751,
early_stop: False
Epoch 40, Train Loss: 0.630, Train Accuracy:98.643,     Validation Loss: 0.629, Validation Accuracy:98.751,
early_stop: False
Epoch 41, Train Loss: 0.628, Train Accuracy:98.643,     Validation Loss: 0.627, Validation Accuracy:98.751,
early_stop: False
Epoch 42, Train Loss: 0.626, Train Accuracy:98.643,     Validation Loss: 0.626, Validation Accuracy:98.751,
early_stop: False
Epoch 43, Train Loss: 0.624, Train Accuracy:98.643,     Validation Loss: 0.624, Validation Accuracy:98.751,
early_stop: False
Epoch 44, Train Loss: 0.623, Train Accuracy:98.643,     Validation Loss: 0.622, Validation Accuracy:98.751,
early_stop: False
Epoch 45, Train Loss: 0.621, Train Accuracy:98.643,     Validation Loss: 0.620, Validation Accuracy:98.751,
early_stop

KeyboardInterrupt: 

### The summary of theTrained Model (ThreeLayerNN)

In [None]:
model_nn3.summary()

The results are very good however, the data set is not a tricky one!

## A More Complex Network 

### SixLayerNN classifier with more Neurons

In [None]:
class SixLayerNN(Model):
    def __init__(self, name=None):
        super(SixLayerNN, self).__init__(name=name)
        self.dense1 = Dense(256, activation='relu', kernel_regularizer='l2', 
                            bias_regularizer='l2', input_shape=(V,), name='dense_1',
                            kernel_initializer='normal'
                           )
        self.dropout1 = Dropout(0.25)
        
        self.dense2 = Dense(128, activation='relu', 
                            kernel_regularizer='l2', bias_regularizer='l2', name='dense_2'
                           )
        self.dropout2 = Dropout(0.25)
        
        self.dense3 = Dense(64, activation='relu', 
                            kernel_regularizer='l2', bias_regularizer='l2', name='dense_3'
                           )
        self.dropout3 = Dropout(0.25)
        
        self.dense4 = Dense(32, activation='relu', 
                            kernel_regularizer='l2', bias_regularizer='l2', name='dense_4'
                           )
        self.dropout4 = Dropout(0.25)
        
        self.dense5 = Dense(16, activation='relu', 
                            kernel_regularizer='l2', bias_regularizer='l2', name='dense_5'
                           )
        self.dropout5 = Dropout(0.25)
        
        self.pred_layer = Dense(2, activation='sigmoid',
                               name='predictions')  # output layer 
        
    def call(self, x):
        x = self.dense1(x)
        x = self.dropout1(x)
        x = self.dense2(x)
        x = self.dropout2(x)
        x = self.dense3(x)
        x = self.dropout3(x)
        x = self.dense4(x)
        x = self.dropout4(x)
        x = self.dense5(x)
        x = self.dropout5(x)
        return self.pred_layer(x)
    
def get_model6():
    return SixLayerNN(name='6_layer_nn')
            
model_nn6 = get_model6() 

#### Choose an optimizer and loss function for training: 

In [None]:
loss_object6 = tf.keras.losses.BinaryCrossentropy()
optimizer6 = tf.keras.optimizers.Adam(learning_rate=1e-6)

#### metrics to measure the loss and accuracy of the model
(I think it was possible to use the previously defined functions but I decided to defined them once more)

In [None]:
train_loss6 = tf.keras.metrics.Mean(name='train_loss6')
train_accuracy6 = tf.keras.metrics.BinaryAccuracy(name='train_accuracy6')

test_loss6 = tf.keras.metrics.Mean(name='test_loss6')
test_accuracy6 = tf.keras.metrics.BinaryAccuracy(name='test_accuracy6')

#### Train 
using eager execution with tf.GradientTape to record the gradients

In [None]:
@tf.function
def train_step6(X, labels):
    with tf.GradientTape() as tape:  # Record operations for automatic differentiation
        predictions6 = model_nn6(X)
        labels = tf.reshape(tf.tile(labels, [2]), [-1, 2])
        loss6 = loss_object6(labels, predictions6)
    
    gradients6 = tape.gradient(loss6, model_nn6.trainable_variables)
    optimizer6.apply_gradients(zip(gradients6, model_nn6.trainable_variables))
    
    train_loss6(loss6)
    train_accuracy6(labels, predictions6)

#### Test / Validation

In [None]:
@tf.function
def test_step6(X, labels):
    predictions6 = model_nn6(X)
    labels = tf.reshape(tf.tile(labels, [2]), [-1, 2])
    t_loss6 = loss_object6(labels, predictions6)
    
    test_loss6(t_loss6)
    test_accuracy6(labels, predictions6)
    

#### Training the model SixLayerMLP

In [None]:
EPOCHS = 500
early_stop_counter = 0 
delta = 0.0001  # the differenece between two consecutive validation losses (accuracies) for early stop
patience = 5 
early_stop = False


training_losses_nn6, training_accuracies_nn6 = [], []
validations_losses_nn6, validations_accuracies_nn6 = [], []
i = 0

for epoch in range(EPOCHS):
    for X_tr, y_tr in train_ds:  # X_tr := train iterator from traning data set (train_ds)
        results = train_step6(X_tr, y_tr)
    training_losses_nn6.append(train_loss6.result().numpy())
    training_accuracies_nn6.append(train_accuracy6.result().numpy())
    
    for X_val, y_val in val_ds:  
        test_step6(X_val, y_val)
    validations_losses_nn6.append(test_loss6.result().numpy())
    validations_accuracies_nn6.append(test_accuracy6.result().numpy())
    
    # checkpoint_path. i.e weight and layers and ect.
    # For the case of Server Failures or etc
    
#     if epoch % 10 == 0:
#         checkpoint_path = "NN-ckecks/SixLayerNN6_model_" + name_of_particle + str(epoch)
#         model_nn6.save_weights(checkpoint_path, save_format='tf')
    
    training_losses_nn6.append(train_loss6.result().numpy())
    training_accuracies_nn6.append(train_accuracy6.result().numpy())

    validations_losses_nn6.append(test_loss6.result().numpy())
    validations_accuracies_nn6.append(test_accuracy6.result().numpy())
    
    template = 'Epoch {}, Train Loss: {:.3f}, Train Accuracy:{:.3f}, \
    Validation Loss: {:.3f}, Validation Accuracy:{:.3f},'
    _nn3
    print (template.format(epoch+1,
                         train_loss6.result().numpy(),
                         train_accuracy6.result().numpy()*100,
                           
                         test_loss6.result().numpy(),
                         test_accuracy6.result().numpy()*100),)
    
    if epoch >= 5:
        
        history = validations_losses_nn6[-5:]
        for i in range(len(history)):
            if i < len(history)-1:
                if np.abs(history[i+1]-history[i]) <= delta:
                    early_stop_counter += 1
        if early_stop_counter == patience-1:
            early_stop = True

        early_stop_counter = 0


        print("early_stop:", early_stop)

    # Reset metrics for the next epochs
    train_loss6.reset_states()
    train_accuracy6.reset_states()
    test_loss6.reset_states()
    test_accuracy6.reset_states()
    
    
#     if early_stop is True:
#         break
    
model_nn6.save_weights("NN-ckecks/SixLayerNN_model"+ name_of_particle +".h5")

### The summaru of theTrained Model

In [None]:
model_nn6.summary()

### plotting the train and validation losses and accuracies

In [None]:
_ = plt.figure(figsize=[15, 10.5])

plt.subplot(221)
plt.plot(training_accuracies_nn3, 'b-.', validations_accuracies_nn3, 'g-.',
         training_accuracies_nn6, 'm-+', validations_accuracies_nn6, 'y-+',)

plt.title("Accuracy")
plt.xlabel("Number of Epochs")
plt.ylabel("classification accuracy")
plt.legend(["3NN-training", "3NN-valid", "6NN-training", "6NN-valid",])

plt.subplot(222)
plt.plot(training_losses_nn3, 'b-.', validations_losses_nn3, 'g-.',
         training_losses_nn6, 'm-+', validations_losses_nn6, 'y-+',)
plt.title("Loss")
plt.xlabel("Number of Epochs")
plt.ylabel("classification loss")
plt.legend(["3NN-training", "3NN-valid", "6NN-training", "6NN-valid",])
plt.show()

##### Why is the training loss (much) higher than the testing (Validation) loss?

A Keras model has two modes: training and testing(in above validation).
Regularization mechanisms, such as Dropout and L1/L2 weight regularization, are turned off at testing(validation) time.

Besides, the training loss is the average of the losses over each batch of training data. Because your model is changing over time, the loss over the first batches of an epoch is generally higher than over the last batches. On the other hand, the testing loss for an epoch is computed using the model as it is at the end of the epoch, resulting in a lower loss.

I guess that in above case the presence of dropout and legularization especially prevent the accuracy from going to 1.0 during training, while it achieves it during evaluation (testing). 

###### NOTE: /
If you think I am mistaken, I could plot the falsify points and check my claim. 
(It's a bit tricky but it is not impossible)

### Advantages of more complex network

At one can perceive adding more layers and Neuron is equivalent to increasing the speed of convergence which could be indeed useful in real-world applications.
Moreover, I expect that with utilizing the more complex network the better results will obtain.

### Restore the save model - NN3

In [None]:
new_model_nn3 = get_model()
new_model_nn3.compile(loss='binary_crossentropy',
                  optimizer=tf.keras.optimizers.Adam(learning_rate=1e-6))

# Since In this implementation instead of weight we are dealing 
# with codes and classes therefore the traditional serialization and
# deserialization is not possible. So we have to first initialze
# the model (which is code) and then load the weights 
# Ref: https://colab.research.google.com/drive/172D4jishSgE3N7AO6U2OKAA_0wNnrMOq#scrollTo=OOSGiSkHTERy

cntr = 0
for i, j in train_ds:
    if cntr == 0:
        new_model_nn3.train_on_batch(i[:1], j[:1])
    cntr += 1 

new_model_nn3.load_weights('NN-ckecks/ThreeLayerNN_model'+ name_of_particle+'.h5')
test_predictions = new_model_nn3.predict(X_test)
probabilities = tf.nn.sigmoid(test_predictions)
labels_pred_nn3 = tf.argmax(probabilities, axis=1)


labels_true_nn3 = []
for i, j in test_ds:
    for k in j.numpy():
        labels_true_nn3.append(k)

f1_score_nn3 = precision_recall_fscore_support(labels_true_nn3, labels_pred_nn3, average='weighted') # Does not take into account labels imbalanced
print("precision:", "%.2f" %f1_score_nn3[0], "recall:", "%.2f" % f1_score_nn3[1], "fscore:", "%.2f" %f1_score_nn3[2])


In [None]:
test_predictions

In [None]:
# f1_score_nn3 = precision_recall_fscore_support(labels_true_nn3, labels_pred_nn3,) # average='weighted') # Does not take into account labels imbalanced
# print("precision:", f1_score_nn3[0], "recall:", f1_score_nn3[1], "fscore:", f1_score_nn3[2])


### Restore the save model - NN6

In [None]:
new_model_nn6 = get_model6()
new_model_nn6.compile(loss='binary_crossentropy',
                  optimizer=tf.keras.optimizers.Adam(learning_rate=1e-6))

# Since In this implementation instead of weight we are dealing 
# with codes and classes therefore the traditional serialization and
# deserialization is not possible. So we have to first initialze
# the model (which is code) and then load the weights 
# Ref: https://colab.research.google.com/drive/172D4jishSgE3N7AO6U2OKAA_0wNnrMOq#scrollTo=OOSGiSkHTERy

cntr = 0
for i, j in train_ds:
    if cntr == 0:
        new_model_nn6.train_on_batch(i[:1], j[:1])
    cntr += 1 

new_model_nn6.load_weights('NN-ckecks/SixLayerNN_model'+ name_of_particle +'.h5')
test_predictions_ = new_model_nn6.predict(X_test)
probabilities_ = tf.nn.sigmoid(test_predictions_)
labels_pred_nn6 = tf.argmax(probabilities_, axis=1)

labels_true_nn6 = []
for i, j in test_ds:
    for k in j.numpy():
        labels_true_nn6.append(k)

f1_score_nn6 = precision_recall_fscore_support(labels_true_nn6, labels_pred_nn6, average='weighted') # Does not take into account labels imbalanced
print("precision-6NN:", "%.2f" % f1_score_nn6[0], "recall-6NN:", "%.2f" % f1_score_nn6[1], "fscore-6NN:", "%.2f" % f1_score_nn6[2])

In [None]:
# f1_score_nn6 = precision_recall_fscore_support(labels_true_nn6, labels_pred_nn6,) # average='weighted') # Does not take into account labels imbalanced
# print("precision:", f1_score_nn6[0], "recall:", f1_score_nn6[1], "fscore:", f1_score_nn6[2])


In [None]:
PPV3, TPR3, FSCORE3, FNR3, FPR3, TNR3 = perf_measure(y_true=labels_true_nn6, y_pred=labels_pred_nn3)

PPV6, TPR6, FSCORE6, FNR6, FPR6, TNR6 = perf_measure(y_true=labels_true_nn6, y_pred=labels_pred_nn6)


In [None]:
PPV3, TPR3, FSCORE3, FNR3, FPR3, TNR3

(array([0.97, 0.01]),
 array([0., 1.]),
 array([0.  , 0.03]),
 array([1., 0.]),
 array([0., 1.]),
 array([1., 0.]))

In [None]:
PPV6, TPR6, FSCORE6, FNR6, FPR6, TNR6 