### Compare models trained with attention layer.

The purpose of the notebook is to select the best model among the various models trained with Keras Attention Layer mechanic.

As of this moment the comparison are made between models that have been trained on custom Attention Layer and not the one provided by Tensorflow in version 2.4.1.

To select the best model we used the following guidelines:

* 1) The model with the lowest hamming loss & zero one loss
* 2) The model with the lowest test score and the highest test accuracy values
* 3) The model with the most accurate predictions among the 17 labels. It is of high importance the best model to identify correctly the most of the genre tags. Models that cannot identify more than 2 genre tags will not be prefered.
* 4) Compare model predictions on movie never seen before.
* 5) Training-Validation metrics comparison.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
"""
Install those libraries if the notebook is executed on Google Colab
"""
!pip install --quiet unidecode
!pip install --quiet humanfriendly
!pip install git+https://github.com/tensorflow/docs

[K     |████████████████████████████████| 245kB 4.2MB/s 
[K     |████████████████████████████████| 92kB 3.9MB/s 
[?25hCollecting git+https://github.com/tensorflow/docs
  Cloning https://github.com/tensorflow/docs to /tmp/pip-req-build-fvmlz4hb
  Running command git clone -q https://github.com/tensorflow/docs /tmp/pip-req-build-fvmlz4hb
Collecting protobuf>=3.14
[?25l  Downloading https://files.pythonhosted.org/packages/fe/fd/247ef25f5ec5f9acecfbc98ca3c6aaf66716cf52509aca9a93583d410493/protobuf-3.14.0-cp36-cp36m-manylinux1_x86_64.whl (1.0MB)
[K     |████████████████████████████████| 1.0MB 5.9MB/s 
Building wheels for collected packages: tensorflow-docs
  Building wheel for tensorflow-docs (setup.py) ... [?25l[?25hdone
  Created wheel for tensorflow-docs: filename=tensorflow_docs-0.0.0430fe3be375a1f3f6267b86300e1c0f1d79b2b26_-cp36-none-any.whl size=146357 sha256=e2541c93275a011fd7de28471e3e5f448c55e3560926d8ff21b47ec722a6d464
  Stored in directory: /tmp/pip-ephem-wheel-cache-z4ar9

#### Import the libraries

In [None]:
import collections

try:
    collectionsAbc = collections.abc
except AttributeError:
    collectionsAbc = collections

import pandas as pd
import numpy as np
import os
import time
import random
import pickle
import json
import shutil
import unidecode
import glob

%matplotlib inline
import matplotlib.image as mpimg
import matplotlib.pyplot as plt

from datetime import datetime
from tabulate import tabulate
from packaging import version
from humanfriendly import format_timespan
from sklearn.metrics import confusion_matrix, classification_report, hamming_loss, zero_one_loss, f1_score, roc_auc_score

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
date_format='%Y-%m-%d %H-%M-%S'

%matplotlib inline
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import plotly
import plotly.express as px
import plotly.graph_objects as go
import plotly.offline as py

import pydot
import pydotplus
import graphviz

from IPython.display import SVG
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=False)
from pylab import rcParams

import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow_docs as tfdocs #!pip install git+https://github.com/tensorflow/docs
import tensorflow_docs.plots as tfplots
import tensorflow_docs.modeling as tfmodel

from tensorflow.keras import layers, regularizers, models
from tensorflow.keras import models
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.utils import model_to_dot, plot_model
from tensorflow.keras.models import load_model, model_from_json

import tensorflow_hub as hub
from tensorboard.plugins.hparams import api as hp

import keras.backend as K
from tensorflow import keras

Output hidden; open in https://colab.research.google.com to view.

In [None]:
print("TensorFlow version: ", tf.__version__)
assert version.parse(tf.__version__).release[0] >= 2, \
    "This notebook requires TensorFlow 2.0 or above."

print("Version: ", tf.__version__)
print("Eager mode: ", tf.executing_eagerly())
print("Hub version: ", hub.__version__)
print("GPU is", "available" if tf.config.list_physical_devices('GPU') else "NOT AVAILABLE")

TensorFlow version:  2.4.0
Version:  2.4.0
Eager mode:  True
Hub version:  0.10.0
GPU is NOT AVAILABLE


#### Import the data already tokenized and transformed from Part 3.1

Set the version data control parameter (to save the outputs of this notebook at their latest date)

In [None]:
class initialize_notebook_variables():
    saved_word_tokenizers="13072020" #the date I saved the word tokenizers for each of my five inputs
    tokenization_history_folder="text_tokenization_padded_sequences" #the Drive folder were tokenizers & x,y are saved
    batch_size_value = "32batch" #the batch size version of the model fit()
    labelsmoothing_value = "nolabelsmoothing" #if label smoothing is applied
    approach_type = "approach1" #approach implementation of Attention layer
    saved_model_name="classification_attention_layer_model"
    learning_rate_scheduler = "inverse_time_decay"
    dropout_rate = "0.0dropout"

In [None]:
"""
1. Import the number of words tokenized per input
2. Import the class tokenizer per input
3. Import the X_train, X_validation, y_train, y_validation data for training and validation neural network during training
4. Import X_test, y_test data for evaluating the performance of the trained neural networks
5. Import the genres

Train-Test split ratio is: 80-20% and the data were not balanced. The initial frequency ratios have been kept
Train-Validation split ratio is: 80-20%
"""

#1. Word tokenized
with open(f"/content/drive/MyDrive/data/{initialize_notebook_variables.tokenization_history_folder}_{initialize_notebook_variables.saved_word_tokenizers}/words_tokenized_{initialize_notebook_variables.saved_word_tokenizers}.pkl", 'rb') as f:
    words_tokenized = pickle.load(f)

#2. Tokenizers
with open(f"/content/drive/MyDrive/data/{initialize_notebook_variables.tokenization_history_folder}_{initialize_notebook_variables.saved_word_tokenizers}/actors_tokenizer_{words_tokenized['actors_tokenized']}_{initialize_notebook_variables.saved_word_tokenizers}.pkl",'rb') as f:
    actors_tokenizer = pickle.load(f)
    
with open(f"/content/drive/MyDrive/data/{initialize_notebook_variables.tokenization_history_folder}_{initialize_notebook_variables.saved_word_tokenizers}/plot_tokenizer_{words_tokenized['plot_words_tokenized']}_{initialize_notebook_variables.saved_word_tokenizers}.pkl",'rb') as f:
    plot_tokenizer = pickle.load(f)
    
with open(f"/content/drive/MyDrive/data/{initialize_notebook_variables.tokenization_history_folder}_{initialize_notebook_variables.saved_word_tokenizers}/features_tokenizer_{words_tokenized['features_words_tokenized']}_{initialize_notebook_variables.saved_word_tokenizers}.pkl",'rb') as f:
    features_tokenizer = pickle.load(f)
    
with open(f"/content/drive/MyDrive/data/{initialize_notebook_variables.tokenization_history_folder}_{initialize_notebook_variables.saved_word_tokenizers}/reviews_tokenizer_{words_tokenized['reviews_words_tokenized']}_{initialize_notebook_variables.saved_word_tokenizers}.pkl",'rb') as f:
    reviews_tokenizer = pickle.load(f)
    
with open(f"/content/drive/MyDrive/data/{initialize_notebook_variables.tokenization_history_folder}_{initialize_notebook_variables.saved_word_tokenizers}/title_tokenizer_{words_tokenized['title_words_tokenized']}_{initialize_notebook_variables.saved_word_tokenizers}.pkl",'rb') as f:
    title_tokenizer = pickle.load(f)

try:
    assert len(actors_tokenizer.word_index)==words_tokenized['actors_tokenized']
    assert len(plot_tokenizer.word_index)==words_tokenized['plot_words_tokenized']
    assert len(features_tokenizer.word_index)==words_tokenized['features_words_tokenized']
    assert len(reviews_tokenizer.word_index)==words_tokenized['reviews_words_tokenized']
    assert len(title_tokenizer.word_index)==words_tokenized['title_words_tokenized']
except AssertionError:
    print("ERROR: The vocabulary length for some of the tokenizers, is not equal to 20000. Please verify their lengths by running the following: len(actors_tokenizer.word_index)")

#3. Train, Validation data samples
validation_data_split = "8020"

X_train_seq_actors=np.load(f"/content/drive/MyDrive/AttentionLayer/x_train_seq_actors_stratified_{validation_data_split}.npy")
X_train_seq_plot=np.load(f"/content/drive/MyDrive/AttentionLayer/x_train_seq_plot_stratified_{validation_data_split}.npy")
X_train_seq_features=np.load(f"/content/drive/MyDrive/AttentionLayer/x_train_seq_features_stratified_{validation_data_split}.npy")
X_train_seq_reviews=np.load(f"/content/drive/MyDrive/AttentionLayer/x_train_seq_reviews_stratified_{validation_data_split}.npy")
X_train_seq_title=np.load(f"/content/drive/MyDrive/AttentionLayer/x_train_seq_title_stratified_{validation_data_split}.npy")

X_validation_seq_actors=np.load(f"/content/drive/MyDrive/AttentionLayer/x_validation_seq_actors_stratified_{validation_data_split}.npy")
X_validation_seq_plot=np.load(f"/content/drive/MyDrive/AttentionLayer/x_validation_seq_plot_stratified_{validation_data_split}.npy")
X_validation_seq_features=np.load(f"/content/drive/MyDrive/AttentionLayer/x_validation_seq_features_stratified_{validation_data_split}.npy")
X_validation_seq_reviews=np.load(f"/content/drive/MyDrive/AttentionLayer/x_validation_seq_reviews_stratified_{validation_data_split}.npy")
X_validation_seq_title=np.load(f"/content/drive/MyDrive/AttentionLayer/x_validation_seq_title_stratified_{validation_data_split}.npy")

y_train=np.load(f"/content/drive/MyDrive/AttentionLayer/y_train_stratified_{validation_data_split}.npy")
y_validation=np.load(f"/content/drive/MyDrive/AttentionLayer/y_valid_stratified_{validation_data_split}.npy")

#4. Test data samples
X_test_seq_actors=np.load(f"/content/drive/MyDrive/data/{initialize_notebook_variables.tokenization_history_folder}_{initialize_notebook_variables.saved_word_tokenizers}/x_test_seq_actors_80-20_non-balanced_{words_tokenized['actors_tokenized']}_{initialize_notebook_variables.saved_word_tokenizers}.npy")
X_test_seq_plot=np.load(f"/content/drive/MyDrive/data/{initialize_notebook_variables.tokenization_history_folder}_{initialize_notebook_variables.saved_word_tokenizers}/x_test_seq_plot_80-20_non-balanced_{words_tokenized['plot_words_tokenized']}_{initialize_notebook_variables.saved_word_tokenizers}.npy")
X_test_seq_features=np.load(f"/content/drive/MyDrive/data/{initialize_notebook_variables.tokenization_history_folder}_{initialize_notebook_variables.saved_word_tokenizers}/x_test_seq_features_80-20_non-balanced_{words_tokenized['features_words_tokenized']}_{initialize_notebook_variables.saved_word_tokenizers}.npy")
X_test_seq_reviews=np.load(f"/content/drive/MyDrive/data/{initialize_notebook_variables.tokenization_history_folder}_{initialize_notebook_variables.saved_word_tokenizers}/x_test_seq_reviews_80-20_non-balanced_{words_tokenized['reviews_words_tokenized']}_{initialize_notebook_variables.saved_word_tokenizers}.npy")
X_test_seq_title=np.load(f"/content/drive/MyDrive/data/{initialize_notebook_variables.tokenization_history_folder}_{initialize_notebook_variables.saved_word_tokenizers}/x_test_seq_title_80-20_non-balanced_{words_tokenized['title_words_tokenized']}_{initialize_notebook_variables.saved_word_tokenizers}.npy")

y_test=np.load(f"/content/drive/MyDrive/data/{initialize_notebook_variables.tokenization_history_folder}_{initialize_notebook_variables.saved_word_tokenizers}/y_test_80-20_non-balanced_{initialize_notebook_variables.saved_word_tokenizers}.npy")

#5. Genre tags (target label)
with open(f"/content/drive/MyDrive/data/{initialize_notebook_variables.tokenization_history_folder}_{initialize_notebook_variables.saved_word_tokenizers}/genres_list_06032020.pkl",'rb') as f:
    genres_list = pickle.load(f)

In [None]:
folder_path_model_saved=f"/content/drive/MyDrive/AttentionLayer/attention_layer_{initialize_notebook_variables.approach_type}_{initialize_notebook_variables.batch_size_value}_{initialize_notebook_variables.labelsmoothing_value}_{initialize_notebook_variables.learning_rate_scheduler}_{initialize_notebook_variables.dropout_rate}"
#folder_path_model_saved=f"/content/drive/MyDrive/AttentionLayer/attention_layer_{initialize_notebook_variables.approach_type}_{initialize_notebook_variables.batch_size_value}_{initialize_notebook_variables.labelsmoothing_value}"

print(folder_path_model_saved)
model_path = glob.glob(folder_path_model_saved + '/*.h5')
saved_model_date = model_path[-1].split("/")[-1].split("_")[-1].rstrip(".h5")
#saved_df_scored_metric_name="df_metrics_multi_input_keras_model"

/content/drive/MyDrive/AttentionLayer/attention_layer_approach1_32batch_nolabelsmoothing_inverse_time_decay_0.0dropout


In [None]:
0.001 / (1 + 0.1 * 1 / 9791)

0.0009999897866429719

In [None]:
np.ceil((X_train_seq_actors.shape[0])//128)

244.0

Initialise some predefined values first:

* Set Optimization function
* Model loss
* Model metric

In [None]:
neural_network_parameters={}
optimizer_parameters={}

#----------------------------------------------------------------------

# Model Compilation
neural_network_parameters['model_loss'] = tf.keras.losses.BinaryCrossentropy(from_logits=False, name='binary_crossentropy', label_smoothing=0.0) #'binary_crossentropy'
neural_network_parameters['model_metric'] = [tfa.metrics.HammingLoss(mode="multilabel", name="hamming_loss"),
                                             tfa.metrics.F1Score(y_train.shape[-1], average="micro", name="f1_score_micro"), 
                                             tfa.metrics.F1Score(y_train.shape[-1], average=None, name="f1_score_none"),
                                             tfa.metrics.F1Score(y_train.shape[-1], average="macro", name="f1_score_macro")]

#----------------------------------------------------------------------
# Function 1 - Optimizers

# Optimizer: ADAM (Learning scheduler with Inverse Time Decay)

optimizer_parameters['lr_scheduler_decay_rate'] = 0.1
optimizer_parameters['staircase'] = False

def optimizer_adam_v2(hparams):

    # Inverse Time Decay
    initial_learning_rate = hparams[HP_LEARNING_RATE]
    decay_steps = int(np.ceil((X_train_seq_actors.shape[0])//hparams[HP_HIDDEN_UNITS]))*hparams[HP_DECAY_STEPS_MULTIPLIER]
    decay_rate = 0.1
    learning_rate_fn = keras.optimizers.schedules.InverseTimeDecay(initial_learning_rate, decay_steps, decay_rate, staircase=False)

    # PiecewiseConstantDecay
    # step = tf.Variable(0, trainable=False)
    # # boundaries = [100000, 110000]
    # boundaries = [1000, 5000]
    # values = [0.001, 0.0005, 0.00025]

    # learning_rate_fn = keras.optimizers.schedules.PiecewiseConstantDecay(boundaries, values)

    # Later, whenever we perform an optimization step, we pass in the step.
    # learning_rate_adam = learning_rate_fn(step)

    return keras.optimizers.Adam(learning_rate=learning_rate_fn)
#---------------------------------------------------------------------------------------

# Optimizer: SDG (version 1)

optimizer_parameters['SGD_momentum'] = 0.2 #default 0.0
optimizer_parameters['SGD_nesterov'] = True #default False

def optimizer_sgd_v1(haparms, mode):

    if mode=="step decay":

        return keras.optimizers.SGD(lr=0.0, #Notice that we set the learning rate in the SGD class to 0 to clearly indicate that it is not used.
                                    momentum=0.9 #Use a large momentum. Using a larger momentum value will help the optimization algorithm to continue to make updates in the right direction when your learning rate shrinks to small values.
                                   )
    else:
        return keras.optimizers.SGD(lr=hparams[HP_LEARNING_RATE],
                              momentum=optimizer_parameters['SGD_momentum'],
                              nesterov=optimizer_parameters['SGD_nesterov'])

#---------------------------------------------------------------------------------------

# Optimizer: RMSprop (version 1)

optimizer_parameters['RMSprop_momentum'] = 0.5
optimizer_parameters['RMSprop_centered'] = True

def optimizer_rmsprop_v1(haparms):

    return keras.optimizers.RMSprop(lr=hparams[HP_LEARNING_RATE],
                                    momentum=optimizer_parameters['RMSprop_momentum'],
                                    centered=optimizer_parameters['RMSprop_centered'])

#----------------------------------------------------------------------

# Function 2
def hamming_loss(y_true, y_pred, mode='multilabel'):
    if mode not in ['multiclass', 'multilabel']:
        raise TypeError('mode must be: [multiclass, multilabel])')

    if mode == 'multiclass':
        nonzero = tf.cast(tf.math.count_nonzero(y_true * y_pred, axis=-1), tf.float32)
        print(nonzero)
        return 1.0 - nonzero

    else:
        nonzero = tf.cast(tf.math.count_nonzero(y_true - y_pred, axis=-1), tf.float32)
        return nonzero / y_true.shape[-1]

#Metric Wrapper to make Hamming loss a metric for model metrics
class HammingLoss(tfa.metrics.MeanMetricWrapper):
    def __init__(self, name='hamming_loss_approach2', dtype=None, mode='multilabel'):
        super(HammingLoss, self).__init__(hamming_loss, name, dtype=dtype, mode=mode)

In [None]:
# Function 1: Import the model structure and weights
def import_trained_keras_model(model_index, method, decay_steps_mode, optimizer_name, hparams):
    """
    Load the weights of the model saved with EarlyStopping
    """
    if method == "import custom trained model":
        
        if decay_steps_mode=="on":
            
            model_path_structure=f"{folder_path_model_saved}/{initialize_notebook_variables.saved_model_name}_{hparams[HP_EMBEDDING_DIM]}dim_{hparams[HP_HIDDEN_UNITS]}batchsize_{hparams[HP_LEARNING_RATE]}lr_{hparams[HP_DECAY_STEPS_MULTIPLIER]}decaymultiplier"
            model_structure = glob.glob(model_path_structure + '*.json')[-1]
            model_weights = glob.glob(model_path_structure + '*.h5')[-1]
            print(model_path_structure)
            print(model_weights)
            print(model_structure)

            with open(f"{model_structure}",'r') as f:
                model_json = json.load(f)

            model_imported = model_from_json(model_json, custom_objects={'attention': attention(return_sequences=True)})
            #model_imported = model_from_json(model_json, custom_objects={'peel_the_layer': peel_the_layer})
            #model_imported = model_from_json(model_json, custom_objects={'Attention': Attention})

            model_imported.load_weights(f"{model_weights}")
        
        else:
            with open(os.path.join(os.getcwd(), "{0}/{1}_{2}dim_{3}batchsize_{4}lr_{5}.json".format(folder_path_model_saved,
                                                                                                     saved_model_name,
                                                                                                     str(hparams[HP_EMBEDDING_DIM]), 
                                                                                                     str(hparams[HP_HIDDEN_UNITS]), 
                                                                                                     str(hparams[HP_LEARNING_RATE]),
                                                                                                     version_data_control)),'r') as f:
                model_json = json.load(f)

            model_imported = model_from_json(model_json)

            model_imported.load_weights(os.path.join(os.getcwd(), '{0}/{1}_{2}dim_{3}batchsize_{4}lr_{5}.h5'.format(folder_path_model_saved,
                                                                                                                     saved_model_name,
                                                                                                                     str(hparams[HP_EMBEDDING_DIM]), 
                                                                                                                     str(hparams[HP_HIDDEN_UNITS]), 
                                                                                                                     str(hparams[HP_LEARNING_RATE]),
                                                                                                                     version_data_control)))
        if optimizer_name=="adam":
            optimizer = optimizer_adam_v2(hparams)
        
        elif optimizer_name=="sgd":
            optimizer = optimizer_sgd_v1(hparams, "step decay")
            
        else:
            optimizer = optimizer_rmsprop_v1(hparams)
            
        model_imported.compile(optimizer=optimizer,
                               loss=neural_network_parameters['model_loss'],
                               metrics=neural_network_parameters['model_metric'])
        print(f"Model {model_index} is loaded successfully\n")
    
    else:
        
        with open(os.path.join(os.getcwd(), "{0}/{1}_{2}dim_{3}batchsize_{4}lr_{5}decaymultiplier_{6}.json".format(folder_path_model_saved,
                                                                                                                    saved_model_name,
                                                                                                                    str(hparams[HP_EMBEDDING_DIM]), 
                                                                                                                    str(hparams[HP_HIDDEN_UNITS]), 
                                                                                                                    str(hparams[HP_LEARNING_RATE]), 
                                                                                                                    str(hparams[HP_DECAY_STEPS_MULTIPLIER]),
                                                                                                                    version_data_control)),'r') as f:
            model_json = json.load(f)

        model_imported = model_from_json(model_json, custom_objects={'KerasLayer':hub.KerasLayer})

        model_imported.load_weights(os.path.join(os.getcwd(), '{0}/{1}_{2}dim_{3}batchsize_{4}lr_{5}decaymultiplier_{6}.h5'.format(folder_path_model_saved,
                                                                                                                                    saved_model_name,
                                                                                                                                    str(hparams[HP_EMBEDDING_DIM]), 
                                                                                                                                    str(hparams[HP_HIDDEN_UNITS]), 
                                                                                                                                    str(hparams[HP_LEARNING_RATE]), 
                                                                                                                                    str(hparams[HP_DECAY_STEPS_MULTIPLIER]),
                                                                                                                                    version_data_control)))

        optimizer = optimizer_adam_v2(hparams[HP_LEARNING_RATE], hparams[HP_DECAY_STEPS_MULTIPLIER], partial_x_train_actors_array.shape[0], optimizer_parameters['validation_split_ratio'], hparams[HP_HIDDEN_UNITS])

        model_imported.compile(optimizer=optimizer,
                               loss=neural_network_parameters['model_loss'],
                               metrics=neural_network_parameters['model_metric'])
        print("Model is loaded successfully\n")
    
    return model_imported

#----------------------------------------------------------------------

# Function 2: Create the dataframe with the computed metrics.
def create_df_scoring_table(method, decay_steps_mode, model_tag, hparams, model):
    """
    Create a scoring dictionary to select the best out of the four models
    """
    if method == "import custom trained model":
        model_evaluation = model.evaluate([X_test_seq_actors, X_test_seq_plot, X_test_seq_features, X_test_seq_reviews, X_test_seq_title],
                                          y_test,
                                          batch_size=hparams[HP_HIDDEN_UNITS],
                                          verbose=2)

        y_test_pred_probs = model.predict([X_test_seq_actors, X_test_seq_plot, X_test_seq_features, X_test_seq_reviews, X_test_seq_title])
        y_test_predictions = (y_test_pred_probs>0.5).astype(int)

        variance = np.var(y_test_predictions)
        sse = np.mean((np.mean(y_test_predictions) - y_test)**2)
        bias = sse - variance

        hamming_loss_value = HammingLoss(mode='multilabel')
        hamming_loss_value.update_state(y_test, y_test_predictions)
        
        if decay_steps_mode=="on":
            df_scores=pd.DataFrame({'Tag Name':pd.Series(model_tag, dtype='str'),
                                    'Embedding Dimension tag':pd.Series(hparams[HP_EMBEDDING_DIM], dtype='int'),
                                    'Batch tag':pd.Series(hparams[HP_HIDDEN_UNITS], dtype='int'),
                                    'Learning Rate tag':pd.Series(hparams[HP_LEARNING_RATE], dtype='float'),
                                    'Decay Multiplier tag':pd.Series(hparams[HP_DECAY_STEPS_MULTIPLIER], dtype='int'),
                                    'Test Loss':pd.Series([model_evaluation[0]], dtype='float'),
                                    'Test Hamming Loss':pd.Series([model_evaluation[1]], dtype='float'),
                                    'Hamming Loss':pd.Series([hamming_loss_value.result().numpy()], dtype='float'),
                                    'Zero_one Loss':pd.Series([zero_one_loss(y_test, y_test_predictions, normalize=False)], dtype='float'),
                                    'Zero_one Loss (perce)':pd.Series([zero_one_loss(y_test, y_test_predictions, normalize=True)], dtype='float'),
                                    'F1_score':pd.Series([f1_score(y_test, y_test_predictions, average="micro")], dtype='float'),
                                    'F1_score_samples':pd.Series([f1_score(y_test, y_test_predictions, average="samples")], dtype='float'),
                                    'ROC_score':pd.Series([roc_auc_score(y_test, y_test_predictions, average="micro", multi_class="ovr")], dtype='float'),
                                    'ROC_score_samples':pd.Series([roc_auc_score(y_test, y_test_predictions, average="samples", multi_class="ovr")], dtype='float'),
                                    'Bias':pd.Series([bias], dtype='float'),
                                    'Variance':pd.Series([variance], dtype='float')
                                   })

            # df_scores.to_pickle(os.path.join(os.getcwd(), "{0}/{1}_{2}dim_{3}batchsize_{4}lr_{5}decaymultiplier_{6}.pkl".format(folder_path_model_saved,
            #                                                                                                                      saved_df_scored_metric_name,
            #                                                                                                                      str(hparams[HP_EMBEDDING_DIM]),
            #                                                                                                                      str(hparams[HP_HIDDEN_UNITS]),
            #                                                                                                                      str(hparams[HP_LEARNING_RATE]),
            #                                                                                                                      str(hparams[HP_DECAY_STEPS_MULTIPLIER]),
            #                                                                                                                      version_data_control)))
        else:
            df_scores=pd.DataFrame({'Tag Name':pd.Series(model_tag, dtype='str'),
                                    'Embedding Dimension tag':pd.Series(hparams[HP_EMBEDDING_DIM], dtype='int'),
                                    'Batch tag':pd.Series(hparams[HP_HIDDEN_UNITS], dtype='int'),
                                    'Learning Rate tag':pd.Series(hparams[HP_LEARNING_RATE], dtype='float'),
                                    'Test Loss':pd.Series([model_evaluation[0]], dtype='float'),
                                    'Test Hamming Loss':pd.Series([model_evaluation[1]], dtype='float'),
                                    'Hamming Loss':pd.Series([hamming_loss_value.result().numpy()], dtype='float'),
                                    'Zero_one Loss':pd.Series([zero_one_loss(y_test, y_test_predictions, normalize=False)], dtype='float'),
                                    'F1_score':pd.Series([f1_score(y_test, y_test_predictions, average="micro")], dtype='float'),
                                    'F1_score_samples':pd.Series([f1_score(y_test, y_test_predictions, average="samples")], dtype='float'),
                                    'ROC_score':pd.Series([roc_auc_score(y_test, y_test_predictions, average="micro", multi_class="ovr")], dtype='float'),
                                    'ROC_score_samples':pd.Series([roc_auc_score(y_test, y_test_predictions, average="samples", multi_class="ovr")], dtype='float'),
                                    'Bias':pd.Series([bias], dtype='float'),
                                    'Variance':pd.Series([variance], dtype='float')
                                   })

            df_scores.to_pickle(os.path.join(os.getcwd(), "{0}/{1}_{2}dim_{3}batchsize_{4}lr_{5}.pkl".format(folder_path_model_saved,
                                                                                                              saved_df_scored_metric_name,
                                                                                                              str(hparams[HP_EMBEDDING_DIM]),
                                                                                                              str(hparams[HP_HIDDEN_UNITS]),
                                                                                                              str(hparams[HP_LEARNING_RATE]),
                                                                                                              version_data_control)))
    else:
        model_evaluation = model.evaluate([test_bytes_list_plot, test_bytes_list_features, test_bytes_list_reviews, test_bytes_list_title],
                                          test_label,
                                          batch_size=hparams[HP_HIDDEN_UNITS],
                                          verbose=2)

        y_test_pred_probs = model.predict([test_bytes_list_plot, test_bytes_list_features, test_bytes_list_reviews, test_bytes_list_title])
        y_test_predictions = (y_test_pred_probs>0.5).astype(int)

        variance = np.var(y_test_predictions)
        sse = np.mean((np.mean(y_test_predictions) - test_label)**2)
        bias = sse - variance

        hamming_loss_value = HammingLoss(mode='multilabel')
        hamming_loss_value.update_state(test_label, y_test_predictions)

        df_scores=pd.DataFrame({'Tag Name':pd.Series(model_tag, dtype='str'),
                                'Batch tag':pd.Series(hparams[HP_HIDDEN_UNITS], dtype='int'),
                                'Learning Rate tag':pd.Series(hparams[HP_LEARNING_RATE], dtype='float'),
                                'Decay Multiplier tag':pd.Series(hparams[HP_DECAY_STEPS_MULTIPLIER], dtype='int'),
                                'Test Loss':pd.Series([model_evaluation[0]], dtype='float'),
                                'Test Hamming Loss':pd.Series([model_evaluation[1]], dtype='float'),
                                'Hamming Loss':pd.Series([hamming_loss_value.result().numpy()], dtype='float'),
                                'Zero_one Loss':pd.Series([zero_one_loss(test_label, y_test_predictions, normalize=False)], dtype='float'),
                                'F1_score':pd.Series([f1_score(test_label, y_test_predictions, average="micro")], dtype='float'),
                                'F1_score_samples':pd.Series([f1_score(test_label, y_test_predictions, average="samples")], dtype='float'),
                                'ROC_score':pd.Series([roc_auc_score(test_label, y_test_predictions, average="micro", multi_class="ovr")], dtype='float'),
                                'ROC_score_samples':pd.Series([roc_auc_score(test_label, y_test_predictions, average="samples", multi_class="ovr")], dtype='float'),
                                'Bias':pd.Series([bias], dtype='float'),
                                'Variance':pd.Series([variance], dtype='float')
                               })

        df_scores.to_pickle(os.path.join(os.getcwd(), "{0}/{1}_{2}dim_{3}batchsize_{4}lr_{5}decaymultiplier_{6}.pkl".format(folder_path_model_saved,
                                                                                                                             saved_df_scored_metric_name,
                                                                                                                             str(hparams[HP_EMBEDDING_DIM]), 
                                                                                                                             str(hparams[HP_HIDDEN_UNITS]), 
                                                                                                                             str(hparams[HP_LEARNING_RATE]), 
                                                                                                                             str(hparams[HP_DECAY_STEPS_MULTIPLIER]),
                                                                                                                             version_data_control)))
    return df_scores

**Load the models per Optimizer & Create a scoring dataframe for each model**

Step 1 of the selection plan (based on the written thesis documentation)

In [None]:
# Attention layer mechanism (Approach 1 referenced here: https://stackoverflow.com/a/62949137/10623444)
class attention(tf.keras.layers.Layer):
    
    def __init__(self, return_sequences=True, **kwargs):
        self.return_sequences = return_sequences
        super(attention,self).__init__(**kwargs)

    def get_config(self):

      config = super().get_config().copy()
      config.update({
          'return_sequences': self.return_sequences,
      })
      return config
        
    def build(self, input_shape):
        
        self.W=self.add_weight(name="att_weight", shape=(input_shape[-1],1),
                               initializer="normal")
        self.b=self.add_weight(name="att_bias", shape=(input_shape[1],1),
                               initializer="zeros")
        
        super(attention,self).build(input_shape)
        
    def call(self, x):
        
        e = K.tanh(K.dot(x,self.W)+self.b)
        a = K.softmax(e, axis=1) #those are the attention weights. They should sum up to 1 since softmax was applied.
        output = x*a #input x multipled with the respective attention weights
        
        if self.return_sequences:
            return output
        
        return K.sum(output, axis=1)

In [None]:
# Attention layer mechanism (Approach 2 referenced here: https://towardsdatascience.com/create-your-own-custom-attention-layer-understand-all-flavours-2201b5e8be9e)
from keras.layers import Flatten, Activation, RepeatVector, Permute, Multiply, Lambda

class peel_the_layer(tf.keras.layers.Layer): 
    def __init__(self, **kwargs):    
        # Nothing special to be done here
        super(peel_the_layer, self).__init__(**kwargs)
        
    def build(self, input_shape):
        # Define the shape of the weights and bias in this layer
        # As we discussed the layer has just 1 lonely neuron
        # We discussed the shapes of the weights and bias earlier
        num_units = 1 #This is a 1 unit layer
        self.num_dim_perword = input_shape[-1]
        self.words_perplot = input_shape[-2]

        self.w=self.add_weight(name="att_weight", shape=(self.num_dim_perword,num_units), initializer="normal")
        self.b=self.add_weight(name="att_bias", shape=(self.words_perplot,num_units), initializer="zeros") #22 are the single words per plot summary
        super(peel_the_layer, self).build(input_shape)
        
    def call(self, x):
        # x is the input tensor of 100 dimensions. 100 Is the embedding dim
        # Below is the main processing done during training
        # K is the Keras Backend import
        e = K.tanh(K.dot(x,self.w)+self.b)
        e = Flatten()(e) #flatten = K.squeeze(). Both they reduce size of a tensor

        a = Activation('softmax')(e)
        
        # Don't manipulate 'a'. It needs to be 'returned' intact
        temp = RepeatVector(self.num_dim_perword)(a)
        temp = Permute([2,1])(temp)
    
        output = Multiply()([x,temp])
        output = Lambda(lambda values: K.sum(values, axis=1))(output)
        
        #a = K.softmax(e, axis=1)
        #output = x*a
        
        # return the outputs. 'a' is the set of 19 attention weights
        # the second variable is the 'attention adjusted o/p state'
        return a, output

In [None]:
# Attention layer mechanism (Approach 3 referenced here: https://medium.com/analytics-vidhya/attention-mechanism-a-quick-intuition-26e154cdb49a)
class Attention(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(Attention,self).__init__(**kwargs)

    def build(self,input_shape): 
        """
        Matrices for creating the context vector.
        """
        self.W=self.add_weight(name="att_weight",shape=(input_shape[-1],1),initializer="normal")
        self.b=self.add_weight(name="att_bias",shape=(input_shape[1],1),initializer="zeros")        
        super(Attention, self).build(input_shape)

    def call(self,x):
        """
        Function which does the computation and is passed through a softmax layer to calculate the attention probabilities and context vector. 
        """
        et=K.squeeze(K.tanh(K.dot(x,self.W)+self.b),axis=-1)
        at=K.softmax(et)
        at=K.expand_dims(at,axis=-1)
        output=x*at
        return K.sum(output,axis=1)

    def compute_output_shape(self,input_shape):
        """
        For Keras internal compatibility checking.
        """
        return (input_shape[0],input_shape[-1])

    def get_config(self):
        """
        The get_config() method collects the input shape and other information about the model.
        """
        return super(Attention,self).get_config()

In [None]:
model_method_creation="adam"
# First run the 36 models with batch 32, 64 and then the other 18 model of 128 batch size if your Ram is 16GB

In [None]:
# 18 models of 32-batch (approach 1)

list_models=[]
list_df=[]

if model_method_creation=="adam":

    HP_HIDDEN_UNITS = hp.HParam('batch_size', hp.Discrete([int(initialize_notebook_variables.batch_size_value.replace("batch",""))]))
    HP_EMBEDDING_DIM = hp.HParam('embedding_dim', hp.Discrete([50,100,150]))
    HP_LEARNING_RATE = hp.HParam('learning_rate', hp.Discrete([0.001,0.01]))
    HP_DECAY_STEPS_MULTIPLIER = hp.HParam('decay_steps_multiplier', hp.Discrete([10,20]))

    for batch_size in HP_HIDDEN_UNITS.domain.values:
        for embedding_dim in HP_EMBEDDING_DIM.domain.values:
            for learning_rate in HP_LEARNING_RATE.domain.values:
                for decay_steps_multiplier in HP_DECAY_STEPS_MULTIPLIER.domain.values:
                    hparams = {
                        HP_HIDDEN_UNITS: batch_size,
                        HP_EMBEDDING_DIM: embedding_dim,
                        HP_LEARNING_RATE: learning_rate,
                        HP_DECAY_STEPS_MULTIPLIER: decay_steps_multiplier
                      }
                    print(f"\n{len(list_models)+1}/{(len(HP_HIDDEN_UNITS.domain.values)*len(HP_EMBEDDING_DIM.domain.values)*len(HP_LEARNING_RATE.domain.values)*len(HP_DECAY_STEPS_MULTIPLIER.domain.values))}")
                    print({h.name: hparams[h] for h in hparams},'\n')
                    model_object=import_trained_keras_model(len(list_models)+1, "import custom trained model", "on", model_method_creation, hparams)
                    df_object=create_df_scoring_table("import custom trained model", "on", "{0}-{1}".format(initialize_notebook_variables.saved_model_name, len(list_models)+1), hparams, model_object)
                    list_models.append(model_object)
                    list_df.append(df_object)

else:
    HP_HIDDEN_UNITS = hp.HParam('batch_size', hp.Discrete([32, 64]))
    HP_EMBEDDING_DIM = hp.HParam('embedding_dim', hp.Discrete([50, 100, 150]))
    HP_LEARNING_RATE = hp.HParam('learning_rate', hp.Discrete([0.001, 0.01, 0.1]))

    for batch_size in HP_HIDDEN_UNITS.domain.values:
        for embedding_dim in HP_EMBEDDING_DIM.domain.values:
            for learning_rate in HP_LEARNING_RATE.domain.values:
                hparams = {
                    HP_HIDDEN_UNITS: batch_size,
                    HP_EMBEDDING_DIM: embedding_dim,
                    HP_LEARNING_RATE: learning_rate
                  }
                print("\n{0}/{1}".format(len(list_models)+1, (len(HP_HIDDEN_UNITS.domain.values)*len(HP_EMBEDDING_DIM.domain.values)*len(HP_LEARNING_RATE.domain.values))))
                model_object=import_trained_keras_model("import custom trained model", "off", model_method_creation, hparams)
                df_object=create_df_scoring_table("import custom trained model", "off", "{0}-{1}".format(saved_model_name, len(list_models)+1), hparams, model_object)
                list_models.append(model_object)
                list_df.append(df_object)


1/12
{'batch_size': 32, 'embedding_dim': 50, 'learning_rate': 0.001, 'decay_steps_multiplier': 10} 

/content/drive/MyDrive/AttentionLayer/attention_layer_approach1_32batch_nolabelsmoothing_inverse_time_decay_0.0dropout/classification_attention_layer_model_50dim_32batchsize_0.001lr_10decaymultiplier
/content/drive/MyDrive/AttentionLayer/attention_layer_approach1_32batch_nolabelsmoothing_inverse_time_decay_0.0dropout/classification_attention_layer_model_50dim_32batchsize_0.001lr_10decaymultiplier_20210102.h5
/content/drive/MyDrive/AttentionLayer/attention_layer_approach1_32batch_nolabelsmoothing_inverse_time_decay_0.0dropout/classification_attention_layer_model_50dim_32batchsize_0.001lr_10decaymultiplier_20210102.json
Model 1 is loaded successfully

307/307 - 24s - loss: 0.0271 - hamming_loss: 0.0489 - f1_score_micro: 0.7055 - f1_score_none: 0.4762 - f1_score_macro: 0.4762

2/12
{'batch_size': 32, 'embedding_dim': 50, 'learning_rate': 0.001, 'decay_steps_multiplier': 20} 

/content/dri

In [None]:
#32 batch: Inverse time decay 0.0 dropout
model_one, model_two, model_three, model_four, model_five, model_six, model_seven, model_eight, model_nine, model_ten, model_eleven, model_twelve = list_models
df_scores_one, df_scores_two, df_scores_three, df_scores_four, df_scores_five, df_scores_six, df_scores_seven, df_scores_eight, df_scores_nine, df_scores_ten, df_scores_eleven, df_scores_twelve = list_df

frames_adam_64batch_inverse_decay=[df_scores_one, df_scores_two, df_scores_three, df_scores_four, df_scores_five, df_scores_six, df_scores_seven, df_scores_eight,
                                   df_scores_nine, df_scores_ten, df_scores_eleven, df_scores_twelve]

frames_adam_64batch_inverse_decay=pd.concat(frames_adam_64batch_inverse_decay)
frames_adam_64batch_inverse_decay=frames_adam_64batch_inverse_decay.reset_index(drop=True)
frames_adam_64batch_inverse_decay=frames_adam_64batch_inverse_decay.sort_values(by=['Hamming Loss', 'Zero_one Loss'])
frames_adam_64batch_inverse_decay.index += 1
frames_adam_64batch_inverse_decay.to_pickle(f"/content/drive/MyDrive/AttentionLayer/attention_layer_{initialize_notebook_variables.approach_type}_{initialize_notebook_variables.batch_size_value}_{initialize_notebook_variables.labelsmoothing_value}_{initialize_notebook_variables.learning_rate_scheduler}_{initialize_notebook_variables.dropout_rate}/{f'result_adam_{initialize_notebook_variables.batch_size_value}_{initialize_notebook_variables.labelsmoothing_value}_{initialize_notebook_variables.learning_rate_scheduler}'}.pkl")
frames_adam_64batch_inverse_decay # Note: The best model should match the best performed model from tensorboard visualizations.

Unnamed: 0,Tag Name,Embedding Dimension tag,Batch tag,Learning Rate tag,Decay Multiplier tag,Test Loss,Test Hamming Loss,Hamming Loss,Zero_one Loss,Zero_one Loss (perce),F1_score,F1_score_samples,ROC_score,ROC_score_samples,Bias,Variance
10,classification_attention_layer_model-10,150,32,0.001,20,0.02095,0.048957,0.004256,660.0,0.067354,0.980309,0.981292,0.992808,0.994352,-0.001497,0.097145
9,classification_attention_layer_model-9,150,32,0.001,10,0.020753,0.048983,0.004412,692.0,0.070619,0.979573,0.980408,0.99208,0.993824,-0.001376,0.097023
6,classification_attention_layer_model-6,100,32,0.001,20,0.021906,0.048943,0.004442,696.0,0.071028,0.97943,0.980479,0.991915,0.993927,-0.001343,0.09699
5,classification_attention_layer_model-5,100,32,0.001,10,0.027083,0.048985,0.0047,731.0,0.074599,0.978182,0.979772,0.990167,0.992954,-0.000936,0.096581
7,classification_attention_layer_model-7,100,32,0.01,10,0.03712,0.048971,0.005583,860.0,0.087764,0.974154,0.976532,0.989032,0.992172,-0.001381,0.097027
1,classification_attention_layer_model-1,50,32,0.001,10,0.02709,0.048871,0.005595,866.0,0.088376,0.973914,0.974973,0.985869,0.98904,-0.000189,0.095832
2,classification_attention_layer_model-2,50,32,0.001,20,0.028972,0.048883,0.005835,906.0,0.092458,0.972626,0.973334,0.982529,0.986415,0.000851,0.094793
11,classification_attention_layer_model-11,150,32,0.01,10,0.046846,0.049027,0.006375,956.0,0.097561,0.970264,0.971777,0.983657,0.98761,-0.000123,0.095766
4,classification_attention_layer_model-4,50,32,0.01,20,0.040771,0.049049,0.006471,1001.0,0.102153,0.969601,0.970006,0.980299,0.984487,0.00107,0.094576
12,classification_attention_layer_model-12,150,32,0.01,20,0.050094,0.049072,0.006945,1037.0,0.105827,0.967563,0.970136,0.981611,0.986644,9e-05,0.095554


In [None]:
# Read the table results of the second round trained models
class initialize_notebook_variables():
    saved_word_tokenizers="13072020" #the date I saved the word tokenizers for each of my five inputs
    tokenization_history_folder="text_tokenization_padded_sequences" #the Drive folder were tokenizers & x,y are saved
    batch_size_value="64batch" #the batch size version of the model fit()
    labelsmoothing_value="nolabelsmoothing" #if label smoothing is applied
    approach_type="approach3" #approach implementation of Attention layer
    saved_model_name="classification_attention_layer_model"
    # learning_rate_scheduler = "piecewise_constant_decay_shorter_boundary"
    # learning_rate_scheduler = "piecewise_constant_decay"
    # learning_rate_scheduler = "piecewise_constant_decay_even_shorter_boundary"
    learning_rate_scheduler="inverse_time_decay"
    dropout_rate="0.0dropout"

In [None]:
#table 1
table_1 = pd.read_pickle(f"/content/drive/MyDrive/AttentionLayer/metric_score_results_second_round/result_adam_{initialize_notebook_variables.batch_size_value}_{initialize_notebook_variables.labelsmoothing_value}_{initialize_notebook_variables.learning_rate_scheduler}.pkl")
print(f"result_adam_{initialize_notebook_variables.batch_size_value}_{initialize_notebook_variables.labelsmoothing_value}_{initialize_notebook_variables.learning_rate_scheduler}.pkl")

result_adam_64batch_nolabelsmoothing_inverse_time_decay.pkl


In [None]:
#table 2
table_2 = pd.read_pickle(f"/content/drive/MyDrive/AttentionLayer/metric_score_results_second_round/result_adam_{initialize_notebook_variables.batch_size_value}_{initialize_notebook_variables.labelsmoothing_value}_{initialize_notebook_variables.learning_rate_scheduler}.pkl")
print(f"result_adam_{initialize_notebook_variables.batch_size_value}_{initialize_notebook_variables.labelsmoothing_value}_{initialize_notebook_variables.learning_rate_scheduler}.pkl")

result_adam_64batch_nolabelsmoothing_piecewise_constant_decay_even_shorter_boundary.pkl


In [None]:
#table 3
table_3 = pd.read_pickle(f"/content/drive/MyDrive/AttentionLayer/metric_score_results_second_round/result_adam_{initialize_notebook_variables.batch_size_value}_{initialize_notebook_variables.labelsmoothing_value}_{initialize_notebook_variables.learning_rate_scheduler}.pkl")
print(f"result_adam_{initialize_notebook_variables.batch_size_value}_{initialize_notebook_variables.labelsmoothing_value}_{initialize_notebook_variables.learning_rate_scheduler}.pkl")

result_adam_64batch_nolabelsmoothing_piecewise_constant_decay_shorter_boundary.pkl


In [None]:
#table 4
table_4 = pd.read_pickle(f"/content/drive/MyDrive/AttentionLayer/metric_score_results_second_round/result_adam_{initialize_notebook_variables.batch_size_value}_{initialize_notebook_variables.labelsmoothing_value}_{initialize_notebook_variables.learning_rate_scheduler}.pkl")
print(f"result_adam_{initialize_notebook_variables.batch_size_value}_{initialize_notebook_variables.labelsmoothing_value}_{initialize_notebook_variables.learning_rate_scheduler}.pkl")

result_adam_64batch_nolabelsmoothing_piecewise_constant_decay.pkl


In [None]:
#table 5
table_5 = pd.read_pickle(f"/content/drive/MyDrive/AttentionLayer/metric_score_results_second_round/result_adam_{initialize_notebook_variables.batch_size_value}_{initialize_notebook_variables.labelsmoothing_value}_{initialize_notebook_variables.learning_rate_scheduler}.pkl")
print(f"result_adam_{initialize_notebook_variables.batch_size_value}_{initialize_notebook_variables.labelsmoothing_value}_{initialize_notebook_variables.learning_rate_scheduler}.pkl")

result_adam_128batch_nolabelsmoothing_inverse_time_decay.pkl


In [None]:
#table 6
table_6 = pd.read_pickle(f"/content/drive/MyDrive/AttentionLayer/metric_score_results_second_round/result_adam_{initialize_notebook_variables.batch_size_value}_{initialize_notebook_variables.labelsmoothing_value}_{initialize_notebook_variables.learning_rate_scheduler}.pkl")
print(f"result_adam_{initialize_notebook_variables.batch_size_value}_{initialize_notebook_variables.labelsmoothing_value}_{initialize_notebook_variables.learning_rate_scheduler}.pkl")

result_adam_128batch_nolabelsmoothing_piecewise_constant_decay_even_shorter_boundary.pkl


In [None]:
#table 7
table_7 = pd.read_pickle(f"/content/drive/MyDrive/AttentionLayer/metric_score_results_second_round/result_adam_{initialize_notebook_variables.batch_size_value}_{initialize_notebook_variables.labelsmoothing_value}_{initialize_notebook_variables.learning_rate_scheduler}.pkl")
print(f"result_adam_{initialize_notebook_variables.batch_size_value}_{initialize_notebook_variables.labelsmoothing_value}_{initialize_notebook_variables.learning_rate_scheduler}.pkl")

result_adam_128batch_nolabelsmoothing_piecewise_constant_decay.pkl


In [None]:
#table 8
table_8 = pd.read_pickle(f"/content/drive/MyDrive/AttentionLayer/metric_score_results_second_round/result_adam_{initialize_notebook_variables.approach_type}_{initialize_notebook_variables.batch_size_value}_{initialize_notebook_variables.labelsmoothing_value}.pkl")
print(f"result_adam_{initialize_notebook_variables.approach_type}_{initialize_notebook_variables.batch_size_value}_{initialize_notebook_variables.labelsmoothing_value}.pkl")

result_adam_approach3_32batch_nolabelsmoothing.pkl


In [None]:
#table 9
table_9 = pd.read_pickle(f"/content/drive/MyDrive/AttentionLayer/metric_score_results_second_round/result_adam_{initialize_notebook_variables.approach_type}_{initialize_notebook_variables.batch_size_value}_{initialize_notebook_variables.labelsmoothing_value}.pkl")
print(f"result_adam_{initialize_notebook_variables.approach_type}_{initialize_notebook_variables.batch_size_value}_{initialize_notebook_variables.labelsmoothing_value}.pkl")

result_adam_approach3_64batch_nolabelsmoothing.pkl


In [None]:
#table 10
table_10 = pd.read_pickle(f"/content/drive/MyDrive/AttentionLayer/metric_score_results_second_round/result_adam_{initialize_notebook_variables.approach_type}_{initialize_notebook_variables.batch_size_value}_{initialize_notebook_variables.labelsmoothing_value}.pkl")
print(f"result_adam_{initialize_notebook_variables.approach_type}_{initialize_notebook_variables.batch_size_value}_{initialize_notebook_variables.labelsmoothing_value}.pkl")

result_adam_approach3_128batch_nolabelsmoothing.pkl


In [None]:
#table 11
table_11 = pd.read_pickle(f"/content/drive/MyDrive/AttentionLayer/metric_score_results_second_round/result_adam_{initialize_notebook_variables.batch_size_value}_{initialize_notebook_variables.labelsmoothing_value}_{initialize_notebook_variables.learning_rate_scheduler}_{initialize_notebook_variables.dropout_rate}.pkl")
print(f"result_adam_{initialize_notebook_variables.batch_size_value}_{initialize_notebook_variables.labelsmoothing_value}_{initialize_notebook_variables.learning_rate_scheduler}_{initialize_notebook_variables.dropout_rate}.pkl")

result_adam_32batch_nolabelsmoothing_inverse_time_decay_0.0dropout.pkl


In [None]:
#table 12
table_12 = pd.read_pickle(f"/content/drive/MyDrive/AttentionLayer/metric_score_results_second_round/result_adam_{initialize_notebook_variables.batch_size_value}_{initialize_notebook_variables.labelsmoothing_value}_{initialize_notebook_variables.learning_rate_scheduler}_{initialize_notebook_variables.dropout_rate}.pkl")
print(f"result_adam_{initialize_notebook_variables.batch_size_value}_{initialize_notebook_variables.labelsmoothing_value}_{initialize_notebook_variables.learning_rate_scheduler}_{initialize_notebook_variables.dropout_rate}.pkl")

result_adam_64batch_nolabelsmoothing_inverse_time_decay_0.0dropout.pkl


In [None]:
table_1_top = table_1.head(1)
table_2_top = table_2.head(1)
table_3_top = table_3.head(1)
table_4_top = table_4.head(1)
table_5_top = table_5.head(1)
table_6_top = table_6.head(1)
table_7_top = table_7.head(1)
table_8_top = table_8.head(1)
table_9_top = table_9.head(1)
table_10_top = table_10.head(1)
table_11_top = table_11.head(1)
table_12_top = table_12.head(1)

In [None]:
table_final = pd.concat([table_1_top, table_2_top, table_3_top, table_4_top, table_5_top, table_6_top, table_7_top, table_8_top, table_9_top, table_10_top, table_11_top, table_12_top])
table_final.to_pickle(f"/content/drive/MyDrive/AttentionLayer/metric_score_results_second_round/final_scores_table.pkl")
table_final

Unnamed: 0,Tag Name,Embedding Dimension tag,Batch tag,Learning Rate tag,Decay Multiplier tag,Test Loss,Test Hamming Loss,Hamming Loss,Zero_one Loss,Zero_one Loss (perce),F1_score,F1_score_samples,ROC_score,ROC_score_samples,Bias,Variance
11,classification_attention_layer_model-11,150,64,0.01,10,0.035385,0.048975,0.004346,676.0,0.068987,0.979903,0.980988,0.992807,0.994451,-0.001586,0.097234
3,classification_attention_layer_model-3,100,64,0.001,10,0.022153,0.048798,0.004064,633.0,0.064598,0.981186,0.982155,0.993014,0.994538,-0.001385,0.097032
6,classification_attention_layer_model-6,150,64,0.01,10,0.025574,0.049182,0.004688,718.0,0.073273,0.978198,0.979002,0.98941,0.992017,-0.000635,0.09628
5,classification_attention_layer_model-5,150,64,0.001,10,0.027518,0.048844,0.004718,741.0,0.07562,0.978069,0.978392,0.989541,0.991853,-0.000715,0.09636
11,classification_attention_layer_model-11,150,128,0.01,10,0.035431,0.049031,0.004989,774.0,0.078988,0.976912,0.978427,0.990721,0.992814,-0.001432,0.097079
5,classification_attention_layer_model-5,150,128,0.001,10,0.025842,0.048856,0.004802,757.0,0.077253,0.977604,0.977969,0.987842,0.990298,-0.000151,0.095794
6,classification_attention_layer_model-6,150,128,0.01,10,0.029332,0.048898,0.004304,677.0,0.069089,0.980065,0.9805,0.992189,0.99383,-0.00131,0.096957
3,classification_attention_layer_model-3,100,32,0.001,10,0.032109,0.048942,0.005481,815.0,0.083172,0.974233,0.974815,0.98253,0.986674,0.001208,0.094438
2,classification_attention_layer_model-2,50,64,0.01,10,0.035318,0.048866,0.0047,733.0,0.074804,0.97811,0.979336,0.988713,0.99148,-0.000382,0.096025
2,classification_attention_layer_model-2,50,128,0.01,10,0.037176,0.049066,0.005505,821.0,0.083784,0.974212,0.974322,0.984021,0.987076,0.00061,0.095034


In [None]:
#64 batch: Inverse time decay
model_one, model_two, model_three, model_four, model_five, model_six, model_seven, model_eight, model_nine, model_ten, model_eleven, model_twelve = list_models
df_scores_one, df_scores_two, df_scores_three, df_scores_four, df_scores_five, df_scores_six, df_scores_seven, df_scores_eight, df_scores_nine, df_scores_ten, df_scores_eleven, df_scores_twelve = list_df

frames_adam_64batch_inverse_decay=[df_scores_one, df_scores_two, df_scores_three, df_scores_four, df_scores_five, df_scores_six, df_scores_seven, df_scores_eight,
                                   df_scores_nine, df_scores_ten, df_scores_eleven, df_scores_twelve]

frames_adam_64batch_inverse_decay=pd.concat(frames_adam_64batch_inverse_decay)
frames_adam_64batch_inverse_decay=frames_adam_64batch_inverse_decay.reset_index(drop=True)
frames_adam_64batch_inverse_decay=frames_adam_64batch_inverse_decay.sort_values(by=['Hamming Loss', 'Zero_one Loss'])
frames_adam_64batch_inverse_decay.index += 1
frames_adam_64batch_inverse_decay.to_pickle(f"/content/drive/MyDrive/AttentionLayer/attention_layer_{initialize_notebook_variables.approach_type}_{initialize_notebook_variables.batch_size_value}_{initialize_notebook_variables.labelsmoothing_value}_{initialize_notebook_variables.learning_rate_scheduler}_{initialize_notebook_variables.dropout_rate}/{f'result_adam_{initialize_notebook_variables.batch_size_value}_{initialize_notebook_variables.labelsmoothing_value}_{initialize_notebook_variables.learning_rate_scheduler}'}.pkl")
frames_adam_64batch_inverse_decay # Note: The best model should match the best performed model from tensorboard visualizations.

Unnamed: 0,Tag Name,Embedding Dimension tag,Batch tag,Learning Rate tag,Decay Multiplier tag,Test Loss,Test Hamming Loss,Hamming Loss,Zero_one Loss,Zero_one Loss (perce),F1_score,F1_score_samples,ROC_score,ROC_score_samples,Bias,Variance
5,classification_attention_layer_model-5,100,64,0.001,10,0.021501,0.048988,0.004286,673.0,0.06868,0.980105,0.981187,0.991262,0.993406,-0.000941,0.096586
10,classification_attention_layer_model-10,150,64,0.001,20,0.023315,0.049054,0.00461,725.0,0.073987,0.978562,0.979259,0.989626,0.991898,-0.00064,0.096284
9,classification_attention_layer_model-9,150,64,0.001,10,0.030323,0.049083,0.004832,750.0,0.076538,0.977507,0.977934,0.988614,0.990865,-0.000476,0.09612
11,classification_attention_layer_model-11,150,64,0.01,10,0.037596,0.04904,0.00494,778.0,0.079396,0.977066,0.978253,0.989515,0.99189,-0.000926,0.096571
8,classification_attention_layer_model-8,100,64,0.01,20,0.035931,0.049123,0.005121,790.0,0.08062,0.976236,0.977679,0.989168,0.991658,-0.000973,0.096618
7,classification_attention_layer_model-7,100,64,0.01,10,0.035722,0.049095,0.005187,812.0,0.082866,0.975838,0.975744,0.987306,0.989576,-0.00033,0.095974
6,classification_attention_layer_model-6,100,64,0.001,20,0.027261,0.048943,0.005541,857.0,0.087458,0.974091,0.975228,0.98474,0.988361,0.000297,0.095346
3,classification_attention_layer_model-3,50,64,0.01,10,0.037154,0.048943,0.005943,893.0,0.091132,0.972437,0.974135,0.987302,0.989831,-0.001081,0.096727
2,classification_attention_layer_model-2,50,64,0.001,20,0.030402,0.048859,0.006189,926.0,0.094499,0.970931,0.972015,0.981098,0.985638,0.001046,0.094599
1,classification_attention_layer_model-1,50,64,0.001,10,0.034929,0.048835,0.006441,981.0,0.100112,0.969813,0.971235,0.9814,0.985603,0.000676,0.094968


In [None]:
#128 batch: Piecewise Constant Decay
model_one, model_two, model_three, model_four, model_five, model_six = list_models
df_scores_one, df_scores_two, df_scores_three, df_scores_four, df_scores_five, df_scores_six = list_df

frames_adam_64batch_piecewise_decay=[df_scores_one, df_scores_two, df_scores_three, df_scores_four, df_scores_five, df_scores_six]

frames_adam_64batch_piecewise_decay=pd.concat(frames_adam_64batch_piecewise_decay)
frames_adam_64batch_piecewise_decay=frames_adam_64batch_piecewise_decay.reset_index(drop=True)
frames_adam_64batch_piecewise_decay=frames_adam_64batch_piecewise_decay.sort_values(by=['Hamming Loss', 'Zero_one Loss'])
frames_adam_64batch_piecewise_decay.index += 1
frames_adam_64batch_piecewise_decay.to_pickle(f"/content/drive/MyDrive/AttentionLayer/attention_layer_{initialize_notebook_variables.approach_type}_{initialize_notebook_variables.batch_size_value}_{initialize_notebook_variables.labelsmoothing_value}_{initialize_notebook_variables.learning_rate_scheduler}/{f'result_adam_{initialize_notebook_variables.batch_size_value}_{initialize_notebook_variables.labelsmoothing_value}_{initialize_notebook_variables.learning_rate_scheduler}'}.pkl")
frames_adam_64batch_piecewise_decay # Note: The best model should match the best performed model from tensorboard visualizations.

Unnamed: 0,Tag Name,Embedding Dimension tag,Batch tag,Learning Rate tag,Decay Multiplier tag,Test Loss,Test Hamming Loss,Hamming Loss,Zero_one Loss,Zero_one Loss (perce),F1_score,F1_score_samples,ROC_score,ROC_score_samples,Bias,Variance
6,classification_attention_layer_model-6,150,128,0.01,10,0.029332,0.048898,0.004304,677.0,0.069089,0.980065,0.9805,0.992189,0.99383,-0.00131,0.096957
5,classification_attention_layer_model-5,150,128,0.001,10,0.024636,0.048946,0.004694,720.0,0.073477,0.978116,0.97868,0.988297,0.990962,-0.000217,0.09586
4,classification_attention_layer_model-4,100,128,0.01,10,0.029661,0.048977,0.004724,727.0,0.074191,0.978053,0.978079,0.989759,0.991816,-0.000804,0.096449
3,classification_attention_layer_model-3,100,128,0.001,10,0.026106,0.049055,0.00479,753.0,0.076845,0.977666,0.978128,0.987996,0.990478,-0.000198,0.095842
2,classification_attention_layer_model-2,50,128,0.01,10,0.027842,0.049185,0.005007,768.0,0.078375,0.976663,0.977499,0.98753,0.990371,-0.000236,0.095879
1,classification_attention_layer_model-1,50,128,0.001,10,0.048578,0.049553,0.012588,1830.0,0.186754,0.940346,0.943686,0.960499,0.969762,0.002544,0.09311


In [None]:
#128 batch: Piecewise Constant Decay Shorter Boundaries
model_one, model_two, model_three, model_four, model_five, model_six = list_models
df_scores_one, df_scores_two, df_scores_three, df_scores_four, df_scores_five, df_scores_six = list_df

frames_adam_64batch_piecewise_decay=[df_scores_one, df_scores_two, df_scores_three, df_scores_four, df_scores_five, df_scores_six]

frames_adam_64batch_piecewise_decay=pd.concat(frames_adam_64batch_piecewise_decay)
frames_adam_64batch_piecewise_decay=frames_adam_64batch_piecewise_decay.reset_index(drop=True)
frames_adam_64batch_piecewise_decay=frames_adam_64batch_piecewise_decay.sort_values(by=['Hamming Loss', 'Zero_one Loss'])
frames_adam_64batch_piecewise_decay.index += 1
frames_adam_64batch_piecewise_decay.to_pickle(f"/content/drive/MyDrive/AttentionLayer/attention_layer_{initialize_notebook_variables.approach_type}_{initialize_notebook_variables.batch_size_value}_{initialize_notebook_variables.labelsmoothing_value}_{initialize_notebook_variables.learning_rate_scheduler}/{f'result_adam_{initialize_notebook_variables.batch_size_value}_{initialize_notebook_variables.labelsmoothing_value}_{initialize_notebook_variables.learning_rate_scheduler}'}.pkl")
frames_adam_64batch_piecewise_decay # Note: The best model should match the best performed model from tensorboard visualizations.

Unnamed: 0,Tag Name,Embedding Dimension tag,Batch tag,Learning Rate tag,Decay Multiplier tag,Test Loss,Test Hamming Loss,Hamming Loss,Zero_one Loss,Zero_one Loss (perce),F1_score,F1_score_samples,ROC_score,ROC_score_samples,Bias,Variance
6,classification_attention_layer_model-6,150,64,0.01,10,0.025574,0.049182,0.004688,718.0,0.073273,0.978198,0.979002,0.98941,0.992017,-0.000635,0.09628
5,classification_attention_layer_model-5,150,64,0.001,10,0.025018,0.049235,0.004964,768.0,0.078375,0.976833,0.977876,0.987134,0.990286,-4.2e-05,0.095686
3,classification_attention_layer_model-3,50,64,0.001,10,0.029001,0.04941,0.005049,761.0,0.077661,0.9765,0.97728,0.988024,0.990769,-0.000466,0.09611
4,classification_attention_layer_model-4,50,64,0.01,10,0.027031,0.049312,0.005091,793.0,0.080927,0.976209,0.977186,0.986151,0.98929,0.000208,0.095436
1,classification_attention_layer_model-1,10,64,0.001,10,0.067805,0.049176,0.019588,2668.0,0.272273,0.904044,0.909276,0.928049,0.943966,0.008143,0.087603
2,classification_attention_layer_model-2,10,64,0.01,10,0.085481,0.049543,0.02472,3296.0,0.336361,0.878086,0.884955,0.911835,0.933059,0.009272,0.086503


In [None]:
#128 batch: Piecewise Constant Decay Even Shorter Boundaries
model_one, model_two, model_three, model_four, model_five, model_six = list_models
df_scores_one, df_scores_two, df_scores_three, df_scores_four, df_scores_five, df_scores_six = list_df

frames_adam_64batch_piecewise_decay=[df_scores_one, df_scores_two, df_scores_three, df_scores_four, df_scores_five, df_scores_six]

frames_adam_64batch_piecewise_decay=pd.concat(frames_adam_64batch_piecewise_decay)
frames_adam_64batch_piecewise_decay=frames_adam_64batch_piecewise_decay.reset_index(drop=True)
frames_adam_64batch_piecewise_decay=frames_adam_64batch_piecewise_decay.sort_values(by=['Hamming Loss', 'Zero_one Loss'])
frames_adam_64batch_piecewise_decay.index += 1
frames_adam_64batch_piecewise_decay.to_pickle(f"/content/drive/MyDrive/AttentionLayer/attention_layer_{initialize_notebook_variables.approach_type}_{initialize_notebook_variables.batch_size_value}_{initialize_notebook_variables.labelsmoothing_value}_{initialize_notebook_variables.learning_rate_scheduler}/{f'result_adam_{initialize_notebook_variables.batch_size_value}_{initialize_notebook_variables.labelsmoothing_value}_{initialize_notebook_variables.learning_rate_scheduler}'}.pkl")
frames_adam_64batch_piecewise_decay # Note: The best model should match the best performed model from tensorboard visualizations.

Unnamed: 0,Tag Name,Embedding Dimension tag,Batch tag,Learning Rate tag,Decay Multiplier tag,Test Loss,Test Hamming Loss,Hamming Loss,Zero_one Loss,Zero_one Loss (perce),F1_score,F1_score_samples,ROC_score,ROC_score_samples,Bias,Variance
5,classification_attention_layer_model-5,150,128,0.001,10,0.025842,0.048856,0.004802,757.0,0.077253,0.977604,0.977969,0.987842,0.990298,-0.000151,0.095794
6,classification_attention_layer_model-6,150,128,0.01,10,0.027747,0.04885,0.004838,771.0,0.078681,0.97741,0.977453,0.987254,0.989802,3.8e-05,0.095606
3,classification_attention_layer_model-3,100,128,0.001,10,0.031686,0.048879,0.005319,821.0,0.083784,0.975192,0.975514,0.986418,0.989051,-0.000123,0.095766
4,classification_attention_layer_model-4,100,128,0.01,10,0.031276,0.048868,0.005361,836.0,0.085315,0.974931,0.975079,0.985162,0.988295,0.000316,0.095327
2,classification_attention_layer_model-2,50,128,0.01,10,0.029254,0.048919,0.005853,862.0,0.087968,0.972659,0.974333,0.984368,0.988179,0.000127,0.095516
1,classification_attention_layer_model-1,50,128,0.001,10,0.027887,0.048859,0.006393,971.0,0.099092,0.969952,0.971636,0.980268,0.984967,0.00116,0.094486


In [None]:
df_object_inverse_time_decay

Unnamed: 0,Tag Name,Embedding Dimension tag,Batch tag,Learning Rate tag,Decay Multiplier tag,Test Loss,Test Hamming Loss,Hamming Loss,Zero_one Loss,Zero_one Loss (perce misclassification,F1_score,F1_score_samples,ROC_score,ROC_score_samples,Bias,Variance
0,classification_attention_layer_model-1,50,32,0.001,10,0.034748,0.048985,0.007972,1144.0,0.116747,0.962256,0.963804,0.972948,0.979722,0.002396,0.093257


In [None]:
df_object_exponential_decay

Unnamed: 0,Tag Name,Embedding Dimension tag,Batch tag,Learning Rate tag,Decay Multiplier tag,Test Loss,Test Hamming Loss,Hamming Loss,Zero_one Loss,F1_score,F1_score_samples,ROC_score,ROC_score_samples,Bias,Variance
0,classification_attention_layer_model-1,50,32,0.001,10,0.043349,0.049251,0.009713,1405.0,0.9537,0.956516,0.965463,0.974008,0.00354,0.092124


In [None]:
df_object_polynomial_decay

Unnamed: 0,Tag Name,Embedding Dimension tag,Batch tag,Learning Rate tag,Decay Multiplier tag,Test Loss,Test Hamming Loss,Hamming Loss,Zero_one Loss,F1_score,F1_score_samples,ROC_score,ROC_score_samples,Bias,Variance
0,classification_attention_layer_model-1,50,32,0.001,10,0.047539,0.048919,0.010751,1570.0,0.948167,0.94705,0.957903,0.967099,0.005447,0.090243


In [None]:
df_object_piecewise_constant_decay

Unnamed: 0,Tag Name,Embedding Dimension tag,Batch tag,Learning Rate tag,Decay Multiplier tag,Test Loss,Test Hamming Loss,Hamming Loss,Zero_one Loss,F1_score,F1_score_samples,ROC_score,ROC_score_samples,Bias,Variance
0,classification_attention_layer_model-1,50,32,0.001,10,0.02837,0.04875,0.005901,913.0,0.972378,0.974431,0.98333,0.987632,0.000477,0.095167


In [None]:
df_object_no_decay

Unnamed: 0,Tag Name,Embedding Dimension tag,Batch tag,Learning Rate tag,Decay Multiplier tag,Test Loss,Test Hamming Loss,Hamming Loss,Zero_one Loss,Zero_one Loss (perce misclassification,F1_score,F1_score_samples,ROC_score,ROC_score_samples,Bias,Variance
0,classification_attention_layer_model-1,50,32,0.001,10,0.035577,0.048847,0.009377,1377.0,0.140525,0.955481,0.957845,0.968068,0.976012,0.002866,0.092791


In [None]:
history_dataframe_piecewise_constant_decay=pd.read_pickle(f"{folder_path_model_saved}/metrics_histogram_classification_attention_layer_{50}dim_{32}batchsize_{0.001}lr_{10}decaymultiplier_{saved_model_date}.pkl")

In [None]:
history_dataframe_piecewise_constant_decay

Unnamed: 0,loss,hamming_loss,f1_score_micro,f1_score_none,f1_score_macro,val_loss,val_hamming_loss,val_f1_score_micro,val_f1_score_none,val_f1_score_macro,epoch
1,0.321761,0.094573,0.431774,"[0.02782071, 0.018044027, 0.0, 0.0, 0.5701574,...",0.121993,0.220275,0.064282,0.613656,"[0.0, 0.0, 0.0, 0.0, 0.83583605, 0.0, 0.939264...",0.230908,1
2,0.203607,0.065014,0.609358,"[0.2375, 0.0, 0.0, 0.0, 0.8748191, 0.0, 0.9344...",0.232531,0.180767,0.059687,0.641274,"[0.5081561, 0.0, 0.0, 0.0, 0.81845385, 0.0, 0....",0.256285,2
3,0.168691,0.06034,0.637461,"[0.48584092, 0.0016090106, 0.0, 0.0, 0.8786994...",0.257939,0.147926,0.057945,0.651744,"[0.537057, 0.0, 0.0, 0.0, 0.81872255, 0.0, 0.9...",0.271879,3
4,0.140534,0.057593,0.653913,"[0.5422905, 0.07028112, 0.0942623, 0.0, 0.8889...",0.28758,0.124546,0.055167,0.668442,"[0.55095184, 0.13691932, 0.4342105, 0.0, 0.861...",0.316275,4
5,0.118214,0.054268,0.673909,"[0.5967223, 0.121561974, 0.544008, 0.016794961...",0.352252,0.100711,0.051337,0.691457,"[0.63925236, 0.17150398, 0.63883847, 0.0, 0.96...",0.388432,5
6,0.099244,0.052351,0.685519,"[0.6206474, 0.13651137, 0.6020408, 0.16041397,...",0.396619,0.08553,0.050376,0.697234,"[0.6534898, 0.16, 0.6679462, 0.25853658, 0.836...",0.411849,6
7,0.085954,0.051534,0.690356,"[0.6282966, 0.14051692, 0.56971514, 0.37233427...",0.418037,0.073683,0.050466,0.696692,"[0.62965304, 0.14076246, 0.59677416, 0.4263736...",0.41307,7
8,0.076089,0.051235,0.692169,"[0.62515485, 0.13441058, 0.5737293, 0.4451754,...",0.432411,0.064628,0.050031,0.69931,"[0.6446384, 0.13469985, 0.6104418, 0.40178573,...",0.430166,8
9,0.067986,0.050857,0.6944,"[0.6219856, 0.14589444, 0.57963705, 0.4771136,...",0.448537,0.057724,0.049926,0.699941,"[0.63522017, 0.13333334, 0.42727274, 0.6031128...",0.436575,9
10,0.062014,0.050536,0.6963,"[0.62879497, 0.15226185, 0.55885357, 0.5290456...",0.459975,0.052321,0.04964,0.701656,"[0.51775956, 0.13670133, 0.41647598, 0.5949119...",0.441641,10


#### Import the first 36 models trained by the Adam Optimizer - Keras custom neural network

Run this cell only if model_method_creation="adam"

In [None]:
model_one, model_two, model_three, model_four, model_five, model_six=list_models
df_scores_one, df_scores_two, df_scores_three, df_scores_four, df_scores_five, df_scores_six=list_df

frames_adam_32batch_10multiplier=[df_scores_one, df_scores_two, df_scores_three, df_scores_four, df_scores_five, df_scores_six]
frames_adam_32batch_10multiplier=pd.concat(frames_adam_32batch_10multiplier)
frames_adam_32batch_10multiplier=frames_adam_32batch_10multiplier.reset_index(drop=True)
frames_adam_32batch_10multiplier=frames_adam_32batch_10multiplier.sort_values(by=['Hamming Loss', 'Zero_one Loss'])
frames_adam_32batch_10multiplier.index += 1
#result_adam_32batch.to_pickle(f"/content/drive/MyDrive/AttentionLayer/attention_layer_{initialize_notebook_variables.approach_type}_{initialize_notebook_variables.batch_size_value}_{initialize_notebook_variables.labelsmoothing_value}/{f'result_adam_{initialize_notebook_variables.batch_size_value}_{initialize_notebook_variables.labelsmoothing_value}'}.pkl")
frames_adam_32batch_10multiplier # Note: The best model should match the best performed model from tensorboard visualizations.

Unnamed: 0,Tag Name,Embedding Dimension tag,Batch tag,Learning Rate tag,Decay Multiplier tag,Test Loss,Test Hamming Loss,Hamming Loss,Zero_one Loss,Zero_one Loss (perce),F1_score,F1_score_samples,ROC_score,ROC_score_samples,Bias,Variance
2,classification_attention_layer_model-2,50,128,0.01,10,0.037176,0.049066,0.005505,821.0,0.083784,0.974212,0.974322,0.984021,0.987076,0.00061,0.095034
4,classification_attention_layer_model-4,100,128,0.01,10,0.048551,0.049048,0.00793,1185.0,0.120931,0.962299,0.96553,0.97127,0.978868,0.003092,0.092567
5,classification_attention_layer_model-5,150,128,0.001,10,0.040042,0.049054,0.00811,1248.0,0.12736,0.961515,0.960018,0.971613,0.977248,0.002774,0.092881
3,classification_attention_layer_model-3,100,128,0.001,10,0.037694,0.049055,0.008908,1342.0,0.136953,0.957564,0.954925,0.967862,0.973852,0.003424,0.092238
6,classification_attention_layer_model-6,150,128,0.01,10,0.051789,0.049049,0.009059,1396.0,0.142464,0.95685,0.957189,0.967482,0.974592,0.003419,0.092243
1,classification_attention_layer_model-1,50,128,0.001,10,0.070723,0.049062,0.017655,2499.0,0.255026,0.915665,0.918502,0.943804,0.956149,0.003892,0.091775


In [None]:
frames_adam_32batch_20multiplier

Unnamed: 0,Tag Name,Embedding Dimension tag,Batch tag,Learning Rate tag,Decay Multiplier tag,Test Loss,Test Hamming Loss,Hamming Loss,Zero_one Loss,Zero_one Loss (perce),F1_score,F1_score_samples,ROC_score,ROC_score_samples,Bias,Variance
2,classification_attention_layer_model-2,50,128,0.01,20,0.04539,0.048974,0.006976,989.0,0.100929,0.967455,0.971124,0.981964,0.986521,-7.5e-05,0.095719
4,classification_attention_layer_model-4,100,128,0.01,20,0.05046,0.048968,0.007096,1063.0,0.10848,0.966419,0.967859,0.975412,0.98101,0.002329,0.093324
5,classification_attention_layer_model-5,150,128,0.001,20,0.035031,0.048973,0.007402,1127.0,0.115012,0.964977,0.957692,0.974698,0.977035,0.002295,0.093357
3,classification_attention_layer_model-3,100,128,0.001,20,0.035805,0.04898,0.007996,1244.0,0.126952,0.962243,0.963835,0.974044,0.980081,0.001946,0.093704
6,classification_attention_layer_model-6,150,128,0.01,20,0.053285,0.04897,0.009245,1366.0,0.139402,0.955882,0.959102,0.966194,0.97508,0.003733,0.091933
1,classification_attention_layer_model-1,50,128,0.001,20,0.043897,0.048972,0.01125,1680.0,0.171446,0.946041,0.940446,0.958783,0.96523,0.004585,0.091092


In [None]:
final_result = pd.concat([frames_adam_32batch_10multiplier, frames_adam_32batch_20multiplier])
final_result.to_pickle(f"/content/drive/MyDrive/AttentionLayer/attention_layer_{initialize_notebook_variables.approach_type}_{initialize_notebook_variables.batch_size_value}_{initialize_notebook_variables.labelsmoothing_value}/{f'result_adam_{initialize_notebook_variables.batch_size_value}_{initialize_notebook_variables.labelsmoothing_value}'}.pkl")

In [None]:
final_result

Unnamed: 0,Tag Name,Embedding Dimension tag,Batch tag,Learning Rate tag,Decay Multiplier tag,Test Loss,Test Hamming Loss,Hamming Loss,Zero_one Loss,Zero_one Loss (perce),F1_score,F1_score_samples,ROC_score,ROC_score_samples,Bias,Variance
2,classification_attention_layer_model-2,50,64,0.01,10,0.035318,0.048866,0.0047,733.0,0.074804,0.97811,0.979336,0.988713,0.99148,-0.000382,0.096025
4,classification_attention_layer_model-4,100,64,0.01,10,0.042303,0.048955,0.004802,739.0,0.075416,0.977645,0.977351,0.988655,0.990602,-0.000462,0.096105
3,classification_attention_layer_model-3,100,64,0.001,10,0.024138,0.04884,0.00488,757.0,0.077253,0.977107,0.977888,0.985085,0.988655,0.000828,0.094817
1,classification_attention_layer_model-1,50,64,0.001,10,0.030873,0.048767,0.006375,964.0,0.098377,0.969883,0.969812,0.978084,0.982923,0.002023,0.093628
5,classification_attention_layer_model-5,150,64,0.001,10,0.030027,0.048921,0.006453,982.0,0.100214,0.969416,0.970966,0.976486,0.982558,0.002563,0.093091
6,classification_attention_layer_model-6,150,64,0.01,10,0.051989,0.048945,0.006687,1039.0,0.106031,0.968528,0.969854,0.97892,0.983816,0.001383,0.094263
2,classification_attention_layer_model-2,50,64,0.01,20,0.042315,0.048926,0.005799,897.0,0.09154,0.972815,0.974183,0.982944,0.987263,0.000728,0.094916
5,classification_attention_layer_model-5,150,64,0.001,20,0.033051,0.048948,0.005835,897.0,0.09154,0.972572,0.973283,0.981666,0.98585,0.001184,0.094462
3,classification_attention_layer_model-3,100,64,0.001,20,0.036165,0.048915,0.007348,1106.0,0.112869,0.96513,0.965485,0.973569,0.979631,0.002789,0.092867
4,classification_attention_layer_model-4,100,64,0.01,20,0.050231,0.048955,0.00745,1109.0,0.113175,0.964811,0.965469,0.975337,0.980635,0.001999,0.093651


In [None]:
#32 batch: Approach 1k
model_one, model_two, model_three, model_four, model_five, model_six, model_seven, model_eight, model_nine = list_models[0:9]
model_ten, model_eleven, model_twelve, model_thirteen, model_fourteen, model_fifteen, model_sixteen, model_seventeen, model_eighteen = list_models[9:]

df_scores_one, df_scores_two, df_scores_three, df_scores_four, df_scores_five, df_scores_six, df_scores_seven, df_scores_eight, df_scores_nine = list_df[0:9]
df_scores_ten, df_scores_eleven, df_scores_twelve, df_scores_thirteen, df_scores_fourteen, df_scores_fifteen, df_scores_sixteen, df_scores_seventeen, df_scores_eighteen = list_df[9:]

frames_adam_32batch=[df_scores_one, df_scores_two, df_scores_three, df_scores_four, df_scores_five, df_scores_six, df_scores_seven, df_scores_eight,
                 df_scores_nine, df_scores_ten, df_scores_eleven, df_scores_twelve, df_scores_thirteen, df_scores_fourteen, df_scores_fifteen, df_scores_sixteen, df_scores_seventeen, df_scores_eighteen]

result_adam_32batch=pd.concat(frames_adam_32batch)
result_adam_32batch=result_adam_32batch.reset_index(drop=True)
result_adam_32batch=result_adam_32batch.sort_values(by=['Hamming Loss', 'Zero_one Loss'])
result_adam_32batch.index += 1
result_adam_32batch.to_pickle(f"/content/drive/MyDrive/AttentionLayer/attention_layer_{initialize_notebook_variables.approach_type}_{initialize_notebook_variables.batch_size_value}_{initialize_notebook_variables.labelsmoothing_value}/{f'result_adam_{initialize_notebook_variables.batch_size_value}_{initialize_notebook_variables.labelsmoothing_value}'}.pkl")
result_adam_32batch # Note: The best model should match the best performed model from tensorboard visualizations.

In [None]:
batch32_approach1=pd.read_pickle(f"/content/drive/MyDrive/AttentionLayer/attention_layer_approach1/{'result_adam_32batch_approach1'}.pkl")
batch32_approach2=pd.read_pickle(f"/content/drive/MyDrive/AttentionLayer/attention_layer_approach2/{'result_adam_32batch_approach2'}.pkl")
batch32_approach3=pd.read_pickle(f"/content/drive/MyDrive/AttentionLayer/attention_layer_approach3/{'result_adam_32batch_approach3'}.pkl")

In [None]:
batch32_approach1

In [None]:
batch32_approach2

In [None]:
batch32_approach3

In [None]:
#32 batch: Approach 2
model_one, model_two, model_three, model_four, model_five, model_six, model_seven, model_eight, model_nine = list_models[0:9]
model_ten, model_eleven, model_twelve, model_thirteen, model_fourteen, model_fifteen, model_sixteen, model_seventeen, model_eighteen = list_models[9:]

df_scores_one, df_scores_two, df_scores_three, df_scores_four, df_scores_five, df_scores_six, df_scores_seven, df_scores_eight, df_scores_nine = list_df[0:9]
df_scores_ten, df_scores_eleven, df_scores_twelve, df_scores_thirteen, df_scores_fourteen, df_scores_fifteen, df_scores_sixteen, df_scores_seventeen, df_scores_eighteen = list_df[9:]

frames_adam_32batch=[df_scores_one, df_scores_two, df_scores_three, df_scores_four, df_scores_five, df_scores_six, df_scores_seven, df_scores_eight,
                 df_scores_nine, df_scores_ten, df_scores_eleven, df_scores_twelve, df_scores_thirteen, df_scores_fourteen, df_scores_fifteen, df_scores_sixteen, df_scores_seventeen, df_scores_eighteen]

result_adam_32batch=pd.concat(frames_adam_32batch)
result_adam_32batch=result_adam_32batch.reset_index(drop=True)
result_adam_32batch=result_adam_32batch.sort_values(by=['Hamming Loss', 'Zero_one Loss'])
result_adam_32batch.index += 1
result_adam_32batch.to_pickle(f"/content/drive/MyDrive/AttentionLayer/attention_layer_approach2_13_12_2020/{'result_adam_32batch_approach2'}_{datetime.now()}.pkl")
result_adam_32batch # Note: The best model should match the best performed model from tensorboard visualizations.

Compare the models from the 11 different metric-scores dataframes trained on the Attention layer neural networks.

In [None]:
df_score1 = pd.read_pickle(f"/content/drive/MyDrive/AttentionLayer/metric_score_results/result_adam_approach1_32batch_nolabelsmoothing.pkl")
df_score2 = pd.read_pickle(f"/content/drive/MyDrive/AttentionLayer/metric_score_results/result_adam_approach2_32batch_nolabelsmoothing.pkl")
df_score3 = pd.read_pickle(f"/content/drive/MyDrive/AttentionLayer/metric_score_results/result_adam_approach3_32batch_nolabelsmoothing.pkl")
df_score4 = pd.read_pickle(f"/content/drive/MyDrive/AttentionLayer/metric_score_results/result_adam_approach1_64batch_nolabelsmoothing.pkl")
df_score5 = pd.read_pickle(f"/content/drive/MyDrive/AttentionLayer/metric_score_results/result_adam_approach1_128batch_nolabelsmoothing.pkl")
df_score6 = pd.read_pickle(f"/content/drive/MyDrive/AttentionLayer/metric_score_results/result_adam_approach1_256batch_nolabelsmoothing.pkl")
df_score7 = pd.read_pickle(f"/content/drive/MyDrive/AttentionLayer/metric_score_results/result_adam_approach1_200embedding_64batch_nolabelsmoothing.pkl")
df_score8 = pd.read_pickle(f"/content/drive/MyDrive/AttentionLayer/metric_score_results/result_adam_approach1_300embedding_64batch_nolabelsmoothing.pkl")
df_score9 = pd.read_pickle(f"/content/drive/MyDrive/AttentionLayer/metric_score_results/result_adam_approach2_64batch_nolabelsmoothing.pkl")
df_score10 = pd.read_pickle(f"/content/drive/MyDrive/AttentionLayer/metric_score_results/result_adam_approach1_64batch.pkl")
df_score11 = pd.read_pickle(f"/content/drive/MyDrive/AttentionLayer/metric_score_results/result_adam_approach1_64batch_0.1labelsmoothing.pkl")

In [None]:
df_score1 = df_score1.head(1)
df_score2 = df_score2.head(1)
df_score3 = df_score3.head(1)
df_score4 = df_score4.head(1)
df_score5 = df_score5.head(1)
df_score6 = df_score6.head(1)
df_score7 = df_score7.head(1)
df_score8 = df_score8.head(1)
df_score9 = df_score9.head(1)
df_score10 = df_score10.head(1)
df_score11 = df_score11.head(1)

In [None]:
df_score_final = pd.concat([df_score1,df_score2,df_score3,df_score4,df_score5,df_score6,df_score7,df_score8,df_score9,df_score10,df_score11]).reset_index()
df_score_final.to_pickle("/content/drive/MyDrive/AttentionLayer/metric_score_results/final_dataframe_all_models_trained")
df_score_final

Unnamed: 0,index,Tag Name,Embedding Dimension tag,Batch tag,Learning Rate tag,Decay Multiplier tag,Test Loss,Test Hamming Loss,Hamming Loss,Zero_one Loss,F1_score,F1_score_samples,ROC_score,ROC_score_samples,Bias,Variance
0,13,classification_attention_layer_model-13,150,32,0.001,10,0.023548,0.060081,0.004496,698.0,0.979081,0.980495,0.98969,0.992377,-0.000551,0.096195
1,15,classification_attention_layer_model-15,150,32,0.01,10,0.040359,0.057421,0.004778,745.0,0.977861,0.979594,0.99079,0.993137,-0.00125,0.096896
2,15,classification_attention_layer_model-15,150,32,0.01,10,0.047936,0.058783,0.004862,758.0,0.977475,0.979324,0.990644,0.993051,-0.001278,0.096924
3,7,classification_attention_layer_model-7,100,64,0.001,10,0.022314,0.049645,0.00455,717.0,0.978887,0.97984,0.990745,0.992787,-0.001006,0.096651
4,14,classification_attention_layer_model-14,150,128,0.001,20,0.025769,0.05041,0.004454,697.0,0.979376,0.980455,0.991933,0.993862,-0.001362,0.097008
5,16,classification_attention_layer_model-16,150,256,0.01,20,0.039171,0.049367,0.004946,761.0,0.977011,0.977824,0.988994,0.991706,-0.000734,0.096379
6,1,classification_attention_layer_model-1,200,64,0.001,10,0.026594,0.048979,0.00452,716.0,0.978989,0.979034,0.990046,0.991879,-0.000711,0.096355
7,2,classification_attention_layer_model-2,300,64,0.001,20,0.025427,0.048744,0.004502,701.0,0.979107,0.979708,0.990821,0.992887,-0.000987,0.096633
8,7,classification_attention_layer_model-7,100,64,0.001,10,0.026359,0.05099,0.005073,786.0,0.976233,0.97716,0.985077,0.988554,0.000638,0.095006
9,9,classification_attention_layer_model-9,100,64,0.01,10,0.338576,0.05015,0.00401,638.0,0.981523,0.982415,0.995264,0.996116,-0.002182,0.097834


----------------------------------------------------------------------------------------------------------------------------------------------------------------

In [None]:
model_one, model_two, model_three, model_four, model_five, model_six, model_seven, model_eight, model_nine, model_ten, model_eleven, model_twelve=list_models[0:12]
model_thirteen, model_fourteen, model_fifteen, model_sixteen, model_seventeen, model_eighteen, model_nineteen, model_twenty, model_twenty_one, model_twenty_two, model_twenty_three, model_twenty_four=list_models[12:24]
model_twenty_five, model_twenty_six, model_twenty_seven, model_twenty_eight, model_twenty_nine, model_twenty_thirty, model_thirty_one, model_thirty_two, model_thirty_three, model_thirty_four, model_thirty_five, model_thirty_six=list_models[24:36]

df_scores_one, df_scores_two, df_scores_three, df_scores_four, df_scores_five, df_scores_six, df_scores_seven, df_scores_eight, df_scores_nine, df_scores_ten, df_scores_eleven, df_scores_twelve=list_df[0:12]
df_scores_thirteen, df_scores_fourteen, df_scores_fifteen, df_scores_sixteen, df_scores_seventeen, df_scores_eighteen, df_scores_nineteen, df_scores_twenty, df_scores_twenty_one, df_scores_twenty_two, df_scores_twenty_three, df_scores_twenty_four=list_df[12:24]
df_scores_twenty_five, df_scores_twenty_six, df_scores_twenty_seven, df_scores_twenty_eight, df_scores_twenty_nine, df_scores_thirty, df_scores_thirty_one, df_scores_thirty_two, df_scores_thirty_three, df_scores_thirty_four, df_scores_thirty_five, df_scores_thirty_six=list_df[24:36]

frames_adam_one=[df_scores_one, df_scores_two, df_scores_three, df_scores_four, df_scores_five, df_scores_six, df_scores_seven, df_scores_eight, df_scores_nine, df_scores_ten, df_scores_eleven, df_scores_twelve,
                 df_scores_thirteen, df_scores_fourteen, df_scores_fifteen, df_scores_sixteen, df_scores_seventeen, df_scores_eighteen, df_scores_nineteen, df_scores_twenty, df_scores_twenty_one, df_scores_twenty_two, df_scores_twenty_three, df_scores_twenty_four,
                 df_scores_twenty_five, df_scores_twenty_six, df_scores_twenty_seven, df_scores_twenty_eight, df_scores_twenty_nine, df_scores_thirty, df_scores_thirty_one, df_scores_thirty_two, df_scores_thirty_three, df_scores_thirty_four, df_scores_thirty_five, df_scores_thirty_six]

result_adam_one=pd.concat(frames_adam_one)
result_adam_one=result_adam_one.reset_index(drop=True)
result_adam_one.sort_values(by=['Hamming Loss', 'Zero_one Loss'])

In [None]:
print(result_adam_one.to_latex(index=False))

In [None]:
result_adam_one.to_pickle(os.path.join(os.getcwd(), "{0}\\{1}_{2}dim_{3}batchsize_{4}lr_{5}decaymultiplier_{6}.pkl".format(folder_path_model_saved,
                                                                                                                           "results_table_adam_one",
                                                                                                                           str(100), #Embedding size of the the best model estimator
                                                                                                                           str(32), #Batch size of the the best model estimator
                                                                                                                           str(0.001), #Learning rate of the the best model estimator
                                                                                                                           str(10),  #Decay Steps Multiplayer of the the best model estimator
                                                                                                                           version_data_control)))

Best model of 36 presented above is the model 7 with:
* Embedding size: 100
* Batch size: 32
* Learning rate: 0.001
* Decay Steps Multiplier: 10
* Hamming loss & Zeron-one loss: 0.003620 - 566.0

In [None]:
# The rest 18 models (37-54)

list_models=[]
list_df=[]

if model_method_creation=="adam":

    HP_HIDDEN_UNITS = hp.HParam('batch_size', hp.Discrete([128]))
    HP_EMBEDDING_DIM = hp.HParam('embedding_dim', hp.Discrete([50, 100, 150]))
    HP_LEARNING_RATE = hp.HParam('learning_rate', hp.Discrete([0.001, 0.01, 0.1]))
    HP_DECAY_STEPS_MULTIPLIER = hp.HParam('decay_steps_multiplier', hp.Discrete([10, 20]))

    for batch_size in HP_HIDDEN_UNITS.domain.values:
        for embedding_dim in HP_EMBEDDING_DIM.domain.values:
            for learning_rate in HP_LEARNING_RATE.domain.values:
                for decay_steps_multiplier in HP_DECAY_STEPS_MULTIPLIER.domain.values:
                    hparams = {
                        HP_HIDDEN_UNITS: batch_size,
                        HP_EMBEDDING_DIM: embedding_dim,
                        HP_LEARNING_RATE: learning_rate,
                        HP_DECAY_STEPS_MULTIPLIER: decay_steps_multiplier
                      }
                    print("{0}/{1}".format(len(list_models)+1, (len(HP_HIDDEN_UNITS.domain.values)*len(HP_EMBEDDING_DIM.domain.values)*len(HP_LEARNING_RATE.domain.values)*len(HP_DECAY_STEPS_MULTIPLIER.domain.values))))
                    model_object=import_trained_keras_model("import custom trained model", "on", model_method_creation, hparams)
                    df_object=create_df_scoring_table("import custom trained model", "on", "{0}-{1}".format(saved_model_name, len(list_models)+1), hparams, model_object)
                    list_models.append(model_object)
                    list_df.append(df_object)

else:
    HP_HIDDEN_UNITS = hp.HParam('batch_size', hp.Discrete([32, 64, 128]))
    HP_EMBEDDING_DIM = hp.HParam('embedding_dim', hp.Discrete([50, 100, 150]))
    HP_LEARNING_RATE = hp.HParam('learning_rate', hp.Discrete([0.001, 0.01, 0.1]))

    for batch_size in HP_HIDDEN_UNITS.domain.values:
        for embedding_dim in HP_EMBEDDING_DIM.domain.values:
            for learning_rate in HP_LEARNING_RATE.domain.values:
                hparams = {
                    HP_HIDDEN_UNITS: batch_size,
                    HP_EMBEDDING_DIM: embedding_dim,
                    HP_LEARNING_RATE: learning_rate
                  }
                print("{0}/{1}".format(len(list_models)+1, (len(HP_HIDDEN_UNITS.domain.values)*len(HP_EMBEDDING_DIM.domain.values)*len(HP_LEARNING_RATE.domain.values))))
                model_object=import_trained_keras_model("import custom trained model", "off", model_method_creation, hparams)
                df_object=create_df_scoring_table("import custom trained model", "off", "{0}-{1}".format(saved_model_name, len(list_models)+1), hparams, model_object)
                list_models.append(model_object)
                list_df.append(df_object)

#### Import the rest 18 models trained by the Adam Optimizer - Keras custom neural network

Run this cell only if model_method_creation="adam"

In [None]:
model_one, model_two, model_three, model_four, model_five, model_six, model_seven, model_eight, model_nine, model_ten, model_eleven, model_twelve=list_models[0:12]
model_thirteen, model_fourteen, model_fifteen, model_sixteen, model_seventeen, model_eighteen=list_models[12:18]

df_scores_one, df_scores_two, df_scores_three, df_scores_four, df_scores_five, df_scores_six, df_scores_seven, df_scores_eight, df_scores_nine, df_scores_ten, df_scores_eleven, df_scores_twelve=list_df[0:12]
df_scores_thirteen, df_scores_fourteen, df_scores_fifteen, df_scores_sixteen, df_scores_seventeen, df_scores_eighteen=list_df[12:18]

frames_adam_two=[df_scores_one, df_scores_two, df_scores_three, df_scores_four, df_scores_five, df_scores_six, df_scores_seven, df_scores_eight, df_scores_nine, df_scores_ten, df_scores_eleven, df_scores_twelve,
                 df_scores_thirteen, df_scores_fourteen, df_scores_fifteen, df_scores_sixteen, df_scores_seventeen, df_scores_eighteen]

result_adam_two=pd.concat(frames_adam_two)
result_adam_two=result_adam_two.reset_index(drop=True)
result_adam_two.sort_values(by=['Hamming Loss', 'Zero_one Loss'])

In [None]:
print(result_adam_two.to_latex(index=False))

In [None]:
result_adam_two.to_pickle(os.path.join(os.getcwd(), "{0}\\{1}_{2}dim_{3}batchsize_{4}lr_{5}decaymultiplier_{6}.pkl".format(folder_path_model_saved,
                                                                                                                           "results_table_adam_two",
                                                                                                                           str(100), #Embedding size of the the best model estimator
                                                                                                                           str(128), #Batch size of the the best model estimator
                                                                                                                           str(0.001), #Learning rate of the the best model estimator
                                                                                                                           str(20),  #Decay Steps Multiplayer of the the best model estimator
                                                                                                                           version_data_control)))

Best model of 36 (0-36) presented above is the model 7 with:
* Embedding size: 100
* Batch size: 32
* Learning rate: 0.001
* Decay Steps Multiplier: 10
* Hamming loss & Zeron-one loss: 0.003620 - 566.0
    
Best model of 18 (36-54) presented above is the model 44 with:
* Embedding size: 100
* Batch size: 128
* Learning rate: 0.001
* Decay Steps Multiplier: 20
* Hamming loss & Zeron-one loss: 0.003986 - 622.0

The best out of the two is the model 7

#### -----------------------------------------------------------------

#### Import all the models trained by the SGD Optimizer - Keras custom neural network

Run this cell only if model_method_creation="sgd"

In [None]:
saved_model_name="multi_input_keras_model"
folder_path_model_saved="model_one\\sgd_models_20072020"
saved_df_scored_metric_name="df_metrics_multi_input_keras_model"

In [None]:
model_method_creation="sgd"

In [None]:
list_models=[]
list_df=[]

if model_method_creation=="adam":

    HP_HIDDEN_UNITS = hp.HParam('batch_size', hp.Discrete([32, 64, 128]))
    HP_EMBEDDING_DIM = hp.HParam('embedding_dim', hp.Discrete([50, 100, 150]))
    HP_LEARNING_RATE = hp.HParam('learning_rate', hp.Discrete([0.001, 0.01, 0.1]))
    HP_DECAY_STEPS_MULTIPLIER = hp.HParam('decay_steps_multiplier', hp.Discrete([10, 20])) #only used in adam optimizer

    for batch_size in HP_HIDDEN_UNITS.domain.values:
        for embedding_dim in HP_EMBEDDING_DIM.domain.values:
            for learning_rate in HP_LEARNING_RATE.domain.values:
                for decay_steps_multiplier in HP_DECAY_STEPS_MULTIPLIER.domain.values:
                    hparams = {
                        HP_HIDDEN_UNITS: batch_size,
                        HP_EMBEDDING_DIM: embedding_dim,
                        HP_LEARNING_RATE: learning_rate,
                        HP_DECAY_STEPS_MULTIPLIER: decay_steps_multiplier
                      }
                    print("{0}/{1}".format(len(list_models)+1, (len(HP_HIDDEN_UNITS.domain.values)*len(HP_EMBEDDING_DIM.domain.values)*len(HP_LEARNING_RATE.domain.values)*len(HP_DECAY_STEPS_MULTIPLIER.domain.values))))
                    model_object=import_trained_keras_model("import custom trained model", "on", model_method_creation, hparams)
                    df_object=create_df_scoring_table("import custom trained model", "on", "{0}-{1}".format(saved_model_name, len(list_models)+1), hparams, model_object)
                    list_models.append(model_object)
                    list_df.append(df_object)

else:
    HP_HIDDEN_UNITS = hp.HParam('batch_size', hp.Discrete([32, 64, 128]))
    HP_EMBEDDING_DIM = hp.HParam('embedding_dim', hp.Discrete([50, 100, 150]))
    HP_LEARNING_RATE = hp.HParam('learning_rate', hp.Discrete([0.001, 0.01, 0.1]))

    for batch_size in HP_HIDDEN_UNITS.domain.values:
        for embedding_dim in HP_EMBEDDING_DIM.domain.values:
            for learning_rate in HP_LEARNING_RATE.domain.values:
                hparams = {
                    HP_HIDDEN_UNITS: batch_size,
                    HP_EMBEDDING_DIM: embedding_dim,
                    HP_LEARNING_RATE: learning_rate
                  }
                print("{0}/{1}".format(len(list_models)+1, (len(HP_HIDDEN_UNITS.domain.values)*len(HP_EMBEDDING_DIM.domain.values)*len(HP_LEARNING_RATE.domain.values))))
                model_object=import_trained_keras_model("import custom trained model", "off", model_method_creation, hparams)
                df_object=create_df_scoring_table("import custom trained model", "off", "{0}-{1}".format(saved_model_name, len(list_models)+1), hparams, model_object)
                list_models.append(model_object)
                list_df.append(df_object)

In [None]:
model_one, model_two, model_three, model_four, model_five, model_six, model_seven, model_eight, model_nine, model_ten, model_eleven, model_twelve=list_models[0:12]
model_thirteen, model_fourteen, model_fifteen, model_sixteen, model_seventeen, model_eighteen, model_nineteen, model_twenty, model_twenty_one, model_twenty_two, model_twenty_three, model_twenty_four=list_models[12:24]
model_twenty_five, model_twenty_six, model_twenty_seven=list_models[24:27]

df_scores_one, df_scores_two, df_scores_three, df_scores_four, df_scores_five, df_scores_six, df_scores_seven, df_scores_eight, df_scores_nine, df_scores_ten, df_scores_eleven, df_scores_twelve=list_df[0:12]
df_scores_thirteen, df_scores_fourteen, df_scores_fifteen, df_scores_sixteen, df_scores_seventeen, df_scores_eighteen, df_scores_nineteen, df_scores_twenty, df_scores_twenty_one, df_scores_twenty_two, df_scores_twenty_three, df_scores_twenty_four=list_df[12:24]
df_scores_twenty_five, df_scores_twenty_six, df_scores_twenty_seven=list_df[24:27]

frames_glove=[df_scores_one, df_scores_two, df_scores_three, df_scores_four, df_scores_five, df_scores_six, df_scores_seven, df_scores_eight, df_scores_nine, df_scores_ten, df_scores_eleven, df_scores_twelve,
              df_scores_thirteen, df_scores_fourteen, df_scores_fifteen, df_scores_sixteen, df_scores_seventeen, df_scores_eighteen, df_scores_nineteen, df_scores_twenty, df_scores_twenty_one, df_scores_twenty_two, df_scores_twenty_three, df_scores_twenty_four,
              df_scores_twenty_five, df_scores_twenty_six, df_scores_twenty_seven]

result_sgd=pd.concat(frames_glove)
result_sgd=result_sgd.reset_index(drop=True)
result_sgd.sort_values(by=['Hamming Loss', 'Zero_one Loss'])

In [None]:
print(result_sgd.to_latex(index=False))

In [None]:
result_sgd.to_pickle(os.path.join(os.getcwd(), "{0}\\{1}_{2}dim_{3}batchsize_{4}lr_{5}.pkl".format(folder_path_model_saved,
                                                                                                   "results_table_sgd",
                                                                                                   str(100), #Embedding size of the the best model estimator
                                                                                                   str(64), #Batch size of the the best model estimator
                                                                                                   str(0.1), #Learning rate of the the best model estimator
                                                                                                   version_data_control)))

Best model of 27 presented above is the model 15 with:
* Embedding size: 100
* Batch size: 64
* Learning rate: 0.1
* Hamming loss & Zeron-one loss: 0.009035 - 1363.0

#### -----------------------------------------------------------------

#### Import all the models trained by the RMSprop Optimizer - Keras custom neural network

Run this cell only if model_method_creation="rmsprop"

In [None]:
# Initialize name variables

saved_model_name="multi_input_keras_model"
folder_path_model_saved="model_one\\rmsprop_models_20072020"
saved_df_scored_metric_name="df_metrics_multi_input_keras_model"

In [None]:
model_method_creation="rmsprop"

In [None]:
list_models=[]
list_df=[]

if model_method_creation=="adam":

    HP_HIDDEN_UNITS = hp.HParam('batch_size', hp.Discrete([32, 64, 128]))
    HP_EMBEDDING_DIM = hp.HParam('embedding_dim', hp.Discrete([50, 100, 150]))
    HP_LEARNING_RATE = hp.HParam('learning_rate', hp.Discrete([0.001, 0.01, 0.1]))
    HP_DECAY_STEPS_MULTIPLIER = hp.HParam('decay_steps_multiplier', hp.Discrete([10])) #only used in adam optimizer

    for batch_size in HP_HIDDEN_UNITS.domain.values:
        for embedding_dim in HP_EMBEDDING_DIM.domain.values:
            for learning_rate in HP_LEARNING_RATE.domain.values:
                for decay_steps_multiplier in HP_DECAY_STEPS_MULTIPLIER.domain.values:
                    hparams = {
                        HP_HIDDEN_UNITS: batch_size,
                        HP_EMBEDDING_DIM: embedding_dim,
                        HP_LEARNING_RATE: learning_rate,
                        HP_DECAY_STEPS_MULTIPLIER: decay_steps_multiplier
                      }
                    print("{0}/{1}".format(len(list_models)+1, (len(HP_HIDDEN_UNITS.domain.values)*len(HP_EMBEDDING_DIM.domain.values)*len(HP_LEARNING_RATE.domain.values)*len(HP_DECAY_STEPS_MULTIPLIER.domain.values))))
                    model_object=import_trained_keras_model("import custom trained model", "on", model_method_creation, hparams)
                    df_object=create_df_scoring_table("import custom trained model", "on", "{0}-{1}".format(saved_model_name, len(list_models)+1), hparams, model_object)
                    list_models.append(model_object)
                    list_df.append(df_object)

else:
    HP_HIDDEN_UNITS = hp.HParam('batch_size', hp.Discrete([32, 64, 128]))
    HP_EMBEDDING_DIM = hp.HParam('embedding_dim', hp.Discrete([50, 100, 150]))
    HP_LEARNING_RATE = hp.HParam('learning_rate', hp.Discrete([0.001, 0.01, 0.1]))

    for batch_size in HP_HIDDEN_UNITS.domain.values:
        for embedding_dim in HP_EMBEDDING_DIM.domain.values:
            for learning_rate in HP_LEARNING_RATE.domain.values:
                hparams = {
                    HP_HIDDEN_UNITS: batch_size,
                    HP_EMBEDDING_DIM: embedding_dim,
                    HP_LEARNING_RATE: learning_rate
                  }
                print("{0}/{1}".format(len(list_models)+1, (len(HP_HIDDEN_UNITS.domain.values)*len(HP_EMBEDDING_DIM.domain.values)*len(HP_LEARNING_RATE.domain.values))))
                model_object=import_trained_keras_model("import custom trained model", "off", model_method_creation, hparams)
                df_object=create_df_scoring_table("import custom trained model", "off", "{0}-{1}".format(saved_model_name, len(list_models)+1), hparams, model_object)
                list_models.append(model_object)
                list_df.append(df_object)

In [None]:
model_one, model_two, model_three, model_four, model_five, model_six, model_seven, model_eight, model_nine, model_ten, model_eleven, model_twelve=list_models[0:12]
model_thirteen, model_fourteen, model_fifteen, model_sixteen, model_seventeen, model_eighteen, model_nineteen, model_twenty, model_twenty_one, model_twenty_two, model_twenty_three, model_twenty_four=list_models[12:24]
model_twenty_five, model_twenty_six, model_twenty_seven=list_models[24:27]

df_scores_one, df_scores_two, df_scores_three, df_scores_four, df_scores_five, df_scores_six, df_scores_seven, df_scores_eight, df_scores_nine, df_scores_ten, df_scores_eleven, df_scores_twelve=list_df[0:12]
df_scores_thirteen, df_scores_fourteen, df_scores_fifteen, df_scores_sixteen, df_scores_seventeen, df_scores_eighteen, df_scores_nineteen, df_scores_twenty, df_scores_twenty_one, df_scores_twenty_two, df_scores_twenty_three, df_scores_twenty_four=list_df[12:24]
df_scores_twenty_five, df_scores_twenty_six, df_scores_twenty_seven=list_df[24:27]

frames_rmsprop=[df_scores_one, df_scores_two, df_scores_three, df_scores_four, df_scores_five, df_scores_six, df_scores_seven, df_scores_eight, df_scores_nine, df_scores_ten, df_scores_eleven, df_scores_twelve,
                df_scores_thirteen, df_scores_fourteen, df_scores_fifteen, df_scores_sixteen, df_scores_seventeen, df_scores_eighteen, df_scores_nineteen, df_scores_twenty, df_scores_twenty_one, df_scores_twenty_two, df_scores_twenty_three, df_scores_twenty_four,
                df_scores_twenty_five, df_scores_twenty_six, df_scores_twenty_seven]

result_rmsprop=pd.concat(frames_rmsprop)
result_rmsprop=result_rmsprop.reset_index(drop=True)
result_rmsprop.sort_values(by=['Hamming Loss', 'Zero_one Loss'])

In [None]:
print(result_rmsprop.to_latex(index=False))

In [None]:
result_rmsprop.to_pickle(os.path.join(os.getcwd(), "{0}\\{1}_{2}dim_{3}batchsize_{4}lr_{5}.pkl".format(folder_path_model_saved,
                                                                                                       "results_table_rmsprop",
                                                                                                       str(150), #Embedding size of the the best model estimator
                                                                                                       str(64), #Batch size of the the best model estimator
                                                                                                       str(0.001), #Learning rate of the the best model estimator
                                                                                                       version_data_control)))

Best model of 27 presented above is the model 16 with:
* Embedding size: 150
* Batch size: 64
* Learning rate: 0.001
* Hamming loss & Zeron-one loss: 0.004064 - 637.0

# <b>- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  </b>

The three final best models, one per optimizer, are:

**Adam**
* Model 7
* Embedding size: 100
* Batch size: 32
* Learning rate: 0.001
* Decay Steps Multiplier: 10
* Hamming loss & Zeron-one loss: 0.003620 - 566.0

**SGD**
* Model 15
* Embedding size: 100
* Batch size: 64
* Learning rate: 0.1
* Hamming loss & Zeron-one loss: 0.009035 - 1363.0

**RMSprop**
* Model 16
* Embedding size: 150
* Batch size: 64
* Learning rate: 0.001
* Hamming loss & Zeron-one loss: 0.004064 - 637.0

# <b>- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  </b>

**Comparison 2: Create a classification report and a confusion matrix for the two closest models (per optimizer)** <br>
Additionally, create a bias-variance tradeoff tample per optimizer

In [None]:
# Best model selected-Adam

HP_HIDDEN_UNITS = hp.HParam('batch_size', hp.Discrete([32]))
HP_EMBEDDING_DIM = hp.HParam('embedding_dim', hp.Discrete([100]))
HP_LEARNING_RATE = hp.HParam('learning_rate', hp.Discrete([0.001]))
HP_DECAY_STEPS_MULTIPLIER = hp.HParam('decay_steps_multiplier', hp.Discrete([10]))

for batch_size in HP_HIDDEN_UNITS.domain.values:
    for embedding_dim in HP_EMBEDDING_DIM.domain.values:
        for learning_rate in HP_LEARNING_RATE.domain.values:
            for decay_steps_multiplier in HP_DECAY_STEPS_MULTIPLIER.domain.values:
                hparams_adam = {
                    HP_HIDDEN_UNITS: batch_size,
                    HP_EMBEDDING_DIM: embedding_dim,
                    HP_LEARNING_RATE: learning_rate,
                    HP_DECAY_STEPS_MULTIPLIER: decay_steps_multiplier
                  }
                folder_path_model_saved="model_one\\adam_v2_models_20072020"
                model_one = import_trained_keras_model("import custom trained model", "on", "adam", hparams_adam)

In [None]:
# Best model selected-SGD

HP_HIDDEN_UNITS = hp.HParam('batch_size', hp.Discrete([64]))
HP_EMBEDDING_DIM = hp.HParam('embedding_dim', hp.Discrete([100]))
HP_LEARNING_RATE = hp.HParam('learning_rate', hp.Discrete([0.1]))

for batch_size in HP_HIDDEN_UNITS.domain.values:
    for embedding_dim in HP_EMBEDDING_DIM.domain.values:
        for learning_rate in HP_LEARNING_RATE.domain.values:
            hparams_sgd = {
                HP_HIDDEN_UNITS: batch_size,
                HP_EMBEDDING_DIM: embedding_dim,
                HP_LEARNING_RATE: learning_rate
              }
            folder_path_model_saved="model_one\\sgd_models_20072020"
            model_two = import_trained_keras_model("import custom trained model", "off", "sgd", hparams_sgd)

In [None]:
# Best model selected-RMSprop

HP_HIDDEN_UNITS = hp.HParam('batch_size', hp.Discrete([64]))
HP_EMBEDDING_DIM = hp.HParam('embedding_dim', hp.Discrete([150]))
HP_LEARNING_RATE = hp.HParam('learning_rate', hp.Discrete([0.001]))

for batch_size in HP_HIDDEN_UNITS.domain.values:
    for embedding_dim in HP_EMBEDDING_DIM.domain.values:
        for learning_rate in HP_LEARNING_RATE.domain.values:
            hparams = {
                HP_HIDDEN_UNITS: batch_size,
                HP_EMBEDDING_DIM: embedding_dim,
                HP_LEARNING_RATE: learning_rate
              }
            folder_path_model_saved="model_one\\rmsprop_models_20072020"
            model_three = import_trained_keras_model("import custom trained model", "off", "rmsprop", hparams)

In [None]:
saved_version_control="20072020"

history_dataframe_one=pd.read_pickle(os.path.join(os.getcwd(), "model_one\\adam_v2_models_{0}\\metrics_histogram_multi_input_keras_{1}dim_{2}batchsize_{3}lr_{4}decaymultiplier_16072020.pkl".format(saved_version_control, str(100), str(32), str(0.001), str(10))))
history_dataframe_two=pd.read_pickle(os.path.join(os.getcwd(), "model_one\\sgd_models_{0}\\metrics_histogram_multi_input_keras_{1}dim_{2}batchsize_{3}lr_16072020.pkl".format(saved_version_control, str(100), str(64), str(0.1))))
history_dataframe_three=pd.read_pickle(os.path.join(os.getcwd(), "model_one\\rmsprop_models_{0}\\metrics_histogram_multi_input_keras_{1}dim_{2}batchsize_{3}lr_16072020.pkl".format(saved_version_control, str(150), str(64), str(0.001))))

In [None]:
def create_classification_table(model):
    
    y_test_pred_probs = model.predict([X_test_seq_actors, X_test_seq_plot, X_test_seq_features, X_test_seq_reviews, X_test_seq_title])
    y_test_predictions = (y_test_pred_probs>0.5).astype(int)
    
    variance = np.var(y_test_predictions)
    sse = np.mean((np.mean(y_test_predictions) - y_test)**2)
    bias = sse - variance

    classification_table = classification_report(y_true=y_test, y_pred=y_test_predictions)
    
    return classification_table, variance, bias

def create_confusion_matrix(mode, decay_steps_mode, embedding_dim_mode,  model, hparams):

    if mode == "custom trained model":
        
        if decay_steps_mode=="on":
            
            if embedding_dim_mode=="on":
        
                y_test_pred_probs = model.predict([X_test_seq_actors, X_test_seq_plot, X_test_seq_features, X_test_seq_reviews, X_test_seq_title])
                y_test_predictions = (y_test_pred_probs>0.5).astype(int)

                conf_mat=confusion_matrix(y_test.argmax(axis=1), y_test_predictions.argmax(axis=1))

                conf_matrix=pd.DataFrame(conf_mat,
                                         columns=genres_list,
                                         index=genres_list)

                conf_matrix.to_pickle(os.path.join(os.getcwd(), "{0}\\{1}_{2}dim_{3}batchsize_{4}lr_{5}decaymultiplier_{6}.pkl".format(folder_path_model_saved,
                                                                                                                                       "confusion_matrix",
                                                                                                                                       str(hparams[HP_EMBEDDING_DIM]), 
                                                                                                                                       str(hparams[HP_HIDDEN_UNITS]), 
                                                                                                                                       str(hparams[HP_LEARNING_RATE]), 
                                                                                                                                       str(hparams[HP_DECAY_STEPS_MULTIPLIER]),
                                                                                                                                       version_data_control)))
            else:
                
                y_test_pred_probs = model.predict([X_test_seq_actors, X_test_seq_plot, X_test_seq_features, X_test_seq_reviews, X_test_seq_title])
                y_test_predictions = (y_test_pred_probs>0.5).astype(int)

                conf_mat=confusion_matrix(y_test.argmax(axis=1), y_test_predictions.argmax(axis=1))

                conf_matrix=pd.DataFrame(conf_mat,
                                         columns=genres_list,
                                         index=genres_list)

                conf_matrix.to_pickle(os.path.join(os.getcwd(), "{0}\\{1}_{2}batchsize_{3}lr_{4}decaymultiplier_{5}.pkl".format(folder_path_model_saved,
                                                                                                                                "confusion_matrix",
                                                                                                                                str(hparams[HP_HIDDEN_UNITS]),
                                                                                                                                str(hparams[HP_LEARNING_RATE]),
                                                                                                                                str(hparams[HP_DECAY_STEPS_MULTIPLIER]),
                                                                                                                                version_data_control)))
        else:
            y_test_pred_probs = model.predict([X_test_seq_actors, X_test_seq_plot, X_test_seq_features, X_test_seq_reviews, X_test_seq_title])
            y_test_predictions = (y_test_pred_probs>0.5).astype(int)

            conf_mat=confusion_matrix(y_test.argmax(axis=1), y_test_predictions.argmax(axis=1))

            conf_matrix=pd.DataFrame(conf_mat,
                                     columns=genres_list,
                                     index=genres_list)

            conf_matrix.to_pickle(os.path.join(os.getcwd(), "{0}\\{1}_{2}dim_{3}batchsize_{4}lr_{5}.pkl".format(folder_path_model_saved,
                                                                                                                "confusion_matrix",
                                                                                                                str(hparams[HP_EMBEDDING_DIM]), 
                                                                                                                str(hparams[HP_HIDDEN_UNITS]),
                                                                                                                str(hparams[HP_LEARNING_RATE]),
                                                                                                                version_data_control)))
    else:
        
        y_test_pred_probs = model.predict([test_bytes_list_plot, test_bytes_list_features, test_bytes_list_reviews, test_bytes_list_title])
        y_test_predictions = (y_test_pred_probs>0.5).astype(int)

        conf_mat=confusion_matrix(test_label.argmax(axis=1), y_test_predictions.argmax(axis=1))

        conf_matrix=pd.DataFrame(conf_mat,
                                 columns=genres_list,
                                 index=genres_list)

        conf_matrix.to_pickle(os.path.join(os.getcwd(), "{0}\\{1}_{2}batchsize_{3}lr_{4}decaymultiplier_{5}.pkl".format(folder_path_model_saved,
                                                                                                                        "confusion_matrix",
                                                                                                                        str(hparams[HP_HIDDEN_UNITS]), 
                                                                                                                        str(hparams[HP_LEARNING_RATE]), 
                                                                                                                        str(hparams[HP_DECAY_STEPS_MULTIPLIER]),
                                                                                                                        version_data_control)))
    return conf_matrix

In [None]:
#ADAM Optimizer

folder_path_model_saved="model_one\\adam_v2_models_20072020"

classification_table_one, variance_adam, bias_adam=create_classification_table(model_one)
print("Classification report for the best model estimator of the Adam optimizaion function:\n\n" + str(classification_table_one))

bias_variance_tradeoff_adam=pd.DataFrame({'Tag Name':pd.Series("model seven adam", dtype='str'),
                                          'Bias': pd.Series(bias_adam, dtype='str'),
                                          'Variance': pd.Series(variance_adam, dtype='str'), 
                                          'Average Training loss': pd.Series(np.mean(history_dataframe_one.loss), dtype='str'),
                                          'Average Validation loss': pd.Series(np.mean(history_dataframe_one.val_loss), dtype='str')})

In [None]:
#### ---------------------------------------------------------------------------#ADAM Optimizer

HP_HIDDEN_UNITS = hp.HParam('batch_size', hp.Discrete([32]))
HP_EMBEDDING_DIM = hp.HParam('embedding_dim', hp.Discrete([100]))
HP_LEARNING_RATE = hp.HParam('learning_rate', hp.Discrete([0.001]))
HP_DECAY_STEPS_MULTIPLIER = hp.HParam('decay_steps_multiplier', hp.Discrete([10]))

for batch_size in HP_HIDDEN_UNITS.domain.values:
    for embedding_dim in HP_EMBEDDING_DIM.domain.values:
        for learning_rate in HP_LEARNING_RATE.domain.values:
            for decay_steps_multiplier in HP_DECAY_STEPS_MULTIPLIER.domain.values:
                hparams_adam = {
                    HP_HIDDEN_UNITS: batch_size,
                    HP_EMBEDDING_DIM: embedding_dim,
                    HP_LEARNING_RATE: learning_rate,
                    HP_DECAY_STEPS_MULTIPLIER: decay_steps_multiplier
                  }
                folder_path_model_saved="model_one\\adam_v2_models_20072020"
                confusion_matrix_one=create_confusion_matrix("custom trained model", "on", "on", model_one, hparams_adam)
confusion_matrix_one

In [None]:
print(confusion_matrix_one.to_latex(index=True))

#### ---------------------------------------------------------------------------

In [None]:
#SGD Optimizer

folder_path_model_saved="model_one\\sgd_models_20072020"

classification_table_two, variance_sgd, bias_sgd=create_classification_table(model_two)
print("Classification report for the best model estimator of the SGD optimizaion function:\n\n" + str(classification_table_two))

bias_variance_tradeoff_sgd=pd.DataFrame({'Tag Name':pd.Series("model fifteen sgd", dtype='str'),
                                         'Bias': pd.Series(bias_sgd, dtype='str'),
                                         'Variance': pd.Series(variance_sgd, dtype='str'),
                                         'Average Training loss': pd.Series(np.mean(history_dataframe_two.loss), dtype='str'),
                                         'Average Validation loss': pd.Series(np.mean(history_dataframe_two.val_loss), dtype='str')})

In [None]:
#SGD Optimizer

HP_HIDDEN_UNITS = hp.HParam('batch_size', hp.Discrete([64]))
HP_EMBEDDING_DIM = hp.HParam('embedding_dim', hp.Discrete([100]))
HP_LEARNING_RATE = hp.HParam('learning_rate', hp.Discrete([0.1]))

for batch_size in HP_HIDDEN_UNITS.domain.values:
    for embedding_dim in HP_EMBEDDING_DIM.domain.values:
        for learning_rate in HP_LEARNING_RATE.domain.values:
            hparams_sgd = {
                HP_HIDDEN_UNITS: batch_size,
                HP_EMBEDDING_DIM: embedding_dim,
                HP_LEARNING_RATE: learning_rate
              }
            folder_path_model_saved="model_one\\sgd_models_20072020"
            confusion_matrix_two=create_confusion_matrix("custom trained model", "off", "on", model_two, hparams_sgd)
confusion_matrix_two

In [None]:
print(confusion_matrix_two.to_latex(index=True))

#### ---------------------------------------------------------------------------

In [None]:
#RMSprop Optimizer

folder_path_model_saved="model_one\\rmsprop_models_20072020"

classification_table_three, variance_rmsprop, bias_rmsprop=create_classification_table(model_three)
print("Classification report for the best model estimator of the SGD optimizaion function:\n\n" + str(classification_table_three))

bias_variance_tradeoff_rmsprop=pd.DataFrame({'Tag Name':pd.Series("model sixteen rmsprop", dtype='str'),
                                             'Bias': pd.Series(bias_rmsprop, dtype='str'),
                                             'Variance': pd.Series(variance_rmsprop, dtype='str'),
                                             'Average Training loss': pd.Series(np.mean(history_dataframe_three.loss), dtype='str'),
                                             'Average Validation loss': pd.Series(np.mean(history_dataframe_three.val_loss), dtype='str')})

In [None]:
#RMSprop Optimizer

HP_HIDDEN_UNITS = hp.HParam('batch_size', hp.Discrete([64]))
HP_EMBEDDING_DIM = hp.HParam('embedding_dim', hp.Discrete([150]))
HP_LEARNING_RATE = hp.HParam('learning_rate', hp.Discrete([0.001]))

for batch_size in HP_HIDDEN_UNITS.domain.values:
    for embedding_dim in HP_EMBEDDING_DIM.domain.values:
        for learning_rate in HP_LEARNING_RATE.domain.values:
            hparams_rmsprop = {
                HP_HIDDEN_UNITS: batch_size,
                HP_EMBEDDING_DIM: embedding_dim,
                HP_LEARNING_RATE: learning_rate
              }
            folder_path_model_saved="model_one\\rmsprop_models_20072020"
            confusion_matrix_three=create_confusion_matrix("custom trained model", "off", "on", model_three, hparams_rmsprop)
confusion_matrix_three

In [None]:
print(confusion_matrix_three.to_latex(index=True))

In [None]:
bias_variance_frames = [bias_variance_tradeoff_adam, bias_variance_tradeoff_sgd, bias_variance_tradeoff_rmsprop]
bias_variance_result = pd.concat(bias_variance_frames)
bias_variance_result = bias_variance_result.reset_index(drop=True)
bias_variance_result

In [None]:
print(bias_variance_result.to_latex(index=True))

**Comparison 3: Test Accuracy - Test Score/Loss**

In [None]:
# Concat the df_metric_score dataframes of the three models under review.

df_score_one=pd.read_pickle(os.path.join(os.getcwd(), "model_one\\adam_v2_models_20072020\\df_metrics_multi_input_keras_model_100dim_32batchsize_0.001lr_10decaymultiplier_16072020.pkl"))
df_score_two=pd.read_pickle(os.path.join(os.getcwd(), "model_one\\sgd_models_20072020\\df_metrics_multi_input_keras_model_100dim_64batchsize_0.1lr_16072020.pkl"))
df_score_three=pd.read_pickle(os.path.join(os.getcwd(), "model_one\\rmsprop_models_20072020\\df_metrics_multi_input_keras_model_150dim_64batchsize_0.001lr_16072020.pkl"))

result=pd.concat([df_score_one, df_score_two, df_score_three])
result=result.reset_index(drop=True)
result.head()

In [None]:
print(result.to_latex(index=True))

In [None]:
colormin = 'red'
colorother = 'black'
clrs_acc = [colormin if result['Test Hamming Loss'].iloc[row]== result['Test Hamming Loss'].min() else colorother for row in range(len(result['Test Hamming Loss']))]
clrs_loss = [colormin if result['Test Loss'].iloc[row]== result['Test Loss'].min() else colorother for row in range(len(result['Test Loss']))]

x=result['Tag Name'].values.tolist()
y=result['Test Hamming Loss'].values.tolist()
fig5 = go.Figure()
fig5.add_trace(go.Scatter(x=x, y=y,
                          mode='markers',
                          marker=dict(color=clrs_acc)
                        ))
fig5.update_layout(title="Hamming Loss on test set per model",
                   xaxis_title="Model number",
                   yaxis_title="Hamming Loss value/model")
fig5.show()

#--------------------------------------------------

fig6 = go.Figure()
fig6.add_trace(go.Scatter(x=result['Tag Name'].values.tolist(), 
                          y=result['Test Loss'].values.tolist(),
                          mode='markers',
                          marker=dict(color=clrs_loss)
                         ))

fig6.update_layout(title="Loss score on test set per model",
                  xaxis_title="Model number",
                  yaxis_title="Test loss/model")
fig6.show()

#### Comparison 4: Predicted vs Actual Genre Tags

In [None]:
X_test=pd.read_pickle(os.path.join(os.getcwd(), "80-20 split_non-balanced\\text_tokenization_padded_sequences_13072020\\x_test_13072020.pkl"))

In [None]:
def predict_genre_tags(indx, model, genres_list):
        
    test_sequence_actors = X_test_seq_actors[indx:indx+1]
    
    test_sequence_plot = X_test_seq_plot[indx:indx+1]
    
    test_sequence_features = X_test_seq_features[indx:indx+1]
    
    test_sequence_reviews = X_test_seq_reviews[indx:indx+1]
    
    test_sequence_title = X_test_seq_title[indx:indx+1]
    
    text_prediction = model.predict([test_sequence_actors, test_sequence_plot, test_sequence_features, test_sequence_reviews, test_sequence_title])
    
    [float(i) for i in text_prediction[0]]
    
    genres_length=len(X_test['reduced_genres'].iloc[indx])
    
    tag_probabilities = text_prediction[0][np.argsort(text_prediction[0])[-genres_length:]]
    
    indexes = np.argsort(text_prediction[0])[::-1][:genres_length]
    
    indexes = np.sort(indexes)
    
    predicted_tags = []
    
    predicted_tags = [genres_list[i] for i in indexes]
    
    return predicted_tags

def create_predictions_df(model, random_numbers_list, file_name, optimizer_name,  hparams):
    
    if optimizer_name=="adam":
    
        df_predictions = pd.DataFrame({'Movie Title': pd.Series([X_test['title'].iloc[random_numbers_list[0]]], dtype='str'),
                                       'Predicted Genre tags (top 3)': pd.Series([predict_genre_tags(random_numbers_list[0], model, genres_list)], dtype='str'),
                                       'Real Genre tags': pd.Series([X_test['reduced_genres'].iloc[random_numbers_list[0]]], dtype='str')})

        for i in range(len(random_numbers_list)):

            df_predictions = df_predictions.append({'Movie Title': X_test['title'].iloc[random_numbers_list[i]], 
                                                    'Predicted Genre tags (top 3)': predict_genre_tags(random_numbers_list[i], model, genres_list),
                                                    'Real Genre tags': X_test['reduced_genres'].iloc[random_numbers_list[i]]} , ignore_index=True)

        df_predictions = df_predictions.drop(df_predictions.index[0])
        df_predictions.to_pickle("model_one\\{0}\\{1}_df_predictions_{2}dim_{3}batchsize_{4}lr_{5}decatmultiplier_{6}.pkl".format(file_name, 
                                                                                                                                  optimizer_name, 
                                                                                                                                  str(hparams[HP_EMBEDDING_DIM]), 
                                                                                                                                  str(hparams[HP_HIDDEN_UNITS]), 
                                                                                                                                  str(hparams[HP_LEARNING_RATE]), 
                                                                                                                                  str(hparams[HP_DECAY_STEPS_MULTIPLIER]),
                                                                                                                                  version_data_control))
    else:
        
        df_predictions = pd.DataFrame({'Movie Title': pd.Series([X_test['title'].iloc[random_numbers_list[0]]], dtype='str'),
                                       'Predicted Genre tags (top 3)': pd.Series([predict_genre_tags(random_numbers_list[0], model, genres_list)], dtype='str'),
                                       'Real Genre tags': pd.Series([X_test['reduced_genres'].iloc[random_numbers_list[0]]], dtype='str')})

        for i in range(len(random_numbers_list)):

            df_predictions = df_predictions.append({'Movie Title': X_test['title'].iloc[random_numbers_list[i]], 
                                                    'Predicted Genre tags (top 3)': predict_genre_tags(random_numbers_list[i], model, genres_list),
                                                    'Real Genre tags': X_test['reduced_genres'].iloc[random_numbers_list[i]]} , ignore_index=True)

        df_predictions = df_predictions.drop(df_predictions.index[0])
        df_predictions.to_pickle("model_one\\{0}\\{1}_df_predictions_{2}dim_{3}batchsize_{4}lr_{5}.pkl".format(file_name, 
                                                                                                               optimizer_name, 
                                                                                                               str(hparams[HP_EMBEDDING_DIM]),
                                                                                                               str(hparams[HP_HIDDEN_UNITS]),
                                                                                                               str(hparams[HP_LEARNING_RATE]),
                                                                                                               version_data_control))
    return df_predictions

In [None]:
random_numbers = random.sample(range(1, y_test.shape[0]), 20)

save_index_of_numbers = random_numbers

print("Randomly saved numbers to make predictions: {}".format(save_index_of_numbers))

In [None]:
HP_HIDDEN_UNITS = hp.HParam('batch_size', hp.Discrete([32]))
HP_EMBEDDING_DIM = hp.HParam('embedding_dim', hp.Discrete([100]))
HP_LEARNING_RATE = hp.HParam('learning_rate', hp.Discrete([0.001]))
HP_DECAY_STEPS_MULTIPLIER = hp.HParam('decay_steps_multiplier', hp.Discrete([10]))

for batch_size in HP_HIDDEN_UNITS.domain.values:
    for embedding_dim in HP_EMBEDDING_DIM.domain.values:
        for learning_rate in HP_LEARNING_RATE.domain.values:
            for decay_steps_multiplier in HP_DECAY_STEPS_MULTIPLIER.domain.values:
                hparams_adam = {
                    HP_HIDDEN_UNITS: batch_size,
                    HP_EMBEDDING_DIM: embedding_dim,
                    HP_LEARNING_RATE: learning_rate,
                    HP_DECAY_STEPS_MULTIPLIER: decay_steps_multiplier
                  }
                predictions_dataframe_one=create_predictions_df(model_one, save_index_of_numbers, "adam_v2_models_20072020", "adam", hparams_adam)
predictions_dataframe_one

In [None]:
print(predictions_dataframe_one.to_latex(index=True))

In [None]:
HP_HIDDEN_UNITS = hp.HParam('batch_size', hp.Discrete([64]))
HP_EMBEDDING_DIM = hp.HParam('embedding_dim', hp.Discrete([100]))
HP_LEARNING_RATE = hp.HParam('learning_rate', hp.Discrete([0.1]))

for batch_size in HP_HIDDEN_UNITS.domain.values:
    for embedding_dim in HP_EMBEDDING_DIM.domain.values:
        for learning_rate in HP_LEARNING_RATE.domain.values:
            hparams_sgd = {
                HP_HIDDEN_UNITS: batch_size,
                HP_EMBEDDING_DIM: embedding_dim,
                HP_LEARNING_RATE: learning_rate
              }
            predictions_dataframe_two=create_predictions_df(model_two, save_index_of_numbers, "sgd_models_20072020", "sgd", hparams_sgd)
predictions_dataframe_two

In [None]:
print(predictions_dataframe_two.to_latex(index=True))

In [None]:
for batch_size in HP_HIDDEN_UNITS.domain.values:
    for embedding_dim in HP_EMBEDDING_DIM.domain.values:
        for learning_rate in HP_LEARNING_RATE.domain.values:
            hparams_rmsprop = {
                HP_HIDDEN_UNITS: batch_size,
                HP_EMBEDDING_DIM: embedding_dim,
                HP_LEARNING_RATE: learning_rate
              }
            predictions_dataframe_three=create_predictions_df(model_three, save_index_of_numbers, "rmsprop_models_20072020", "rmsprop", hparams_rmsprop)
predictions_dataframe_three

In [None]:
print(predictions_dataframe_three.to_latex(index=True))

#### Comparison 5: Training and Validation plots

Before creating the learning curves for each of the three best model estimators, some examples of underfitting and overfitting learning curves are presents with random data.

In [None]:
# Example 1-Underfitting Learning Curve part 1

fig1=go.Figure()

fig1.add_trace(go.Scatter(x=[0, 20, 40, 60, 80, 100], 
                          y=[0.005, 0.003, 0.001, 0.0009, 0.0007,  0.0005],
                          mode='lines+markers',
                          name="Train",
                          line=dict(color='rgb(102, 194, 165)')))

fig1.add_trace(go.Scatter(x=[0, 20, 40, 60, 80, 100], 
                          y=[0.05, 0.055, 0.065, 0.068, 0.07, 0.075],
                          mode='lines+markers',
                          name="Validation",
                          line=dict(color='rgb(252, 141, 98)')))

fig1.update_layout(template="simple_white",
                   title="Loss - Underfitting learning curve",
                   xaxis_title="Number of epochs",
                   yaxis_title="Loss/epoch")
fig1.show()

In [None]:
# Example 2-Underfitting Learning Curve part 2

fig1=go.Figure()

fig1.add_trace(go.Scatter(x=[0, 20, 40, 60, 80, 100], 
                          y=[1.075, 1.071, 1.066, 1.061, 1.057, 1.037],
                          mode='lines+markers',
                          name="Train",
                          line=dict(color='rgb(102, 194, 165)')))

fig1.add_trace(go.Scatter(x=[0, 20, 40, 60, 80, 100], 
                          y=[1.065, 1.061, 1.056, 1.051, 1.047, 1.027],
                          mode='lines+markers',
                          name="Validation",
                          line=dict(color='rgb(252, 141, 98)')))

fig1.update_layout(template="simple_white",
                   title="Loss - Underfitting learning curve",
                   xaxis_title="Number of epochs",
                   yaxis_title="Loss/epoch")
fig1.show()

In [None]:
# Example 3-Underfitting Learning Curve

fig1=go.Figure()

fig1.add_trace(go.Scatter(x=[0, 20, 40, 60, 80, 100], 
                          y=[1.075, 0.85, 0.45, 0.35, 0.25, 0.05],
                          mode='lines+markers',
                          name="Train",
                          line=dict(color='rgb(102, 194, 165)')))

fig1.add_trace(go.Scatter(x=[0, 20, 40, 60, 80, 100], 
                          y=[1.005, 0.75, 0.51, 0.28, 0.38, 0.49],
                          mode='lines+markers',
                          name="Validation",
                          line=dict(color='rgb(252, 141, 98)')))

fig1.update_layout(template="simple_white",
                   title="Loss - Overfitting learning curve",
                   xaxis_title="Number of epochs",
                   yaxis_title="Loss/epoch")
fig1.show()

In [None]:
# Example 4-Good Fit Learning Curves

fig1=go.Figure()

fig1.add_trace(go.Scatter(x=[0, 20, 40, 60, 80, 100], 
                          y=[1.075, 0.85, 0.45, 0.35, 0.25, 0.05],
                          mode='lines+markers',
                          name="Train",
                          line=dict(color='rgb(102, 194, 165)')))

fig1.add_trace(go.Scatter(x=[0, 20, 40, 60, 80, 100], 
                          y=[1.069, 0.82, 0.43, 0.32, 0.22, 0.03],
                          mode='lines+markers',
                          name="Validation",
                          line=dict(color='rgb(252, 141, 98)')))

fig1.update_layout(template="simple_white",
                   title="Loss - Good fit learning curve",
                   xaxis_title="Number of epochs",
                   yaxis_title="Loss/epoch")
fig1.show()

Start of the pre-final step of the selection plan | Training-Validation Accuracy/Loss Learning Curves

**Hamming Loss performance models**

In [None]:
colormin = 'black'
colormax = 'black'
colorother = 'rgb(252, 141, 98)'

clrs_acc_model_adam = [colormax if history_dataframe_one.val_hamming_loss.iloc[row]==history_dataframe_one.val_hamming_loss.min() else colorother for row in range(len(history_dataframe_one.val_hamming_loss))]
clrs_acc_model_sgd = [colormax if history_dataframe_two.val_hamming_loss.iloc[row]==history_dataframe_two.val_hamming_loss.min() else colorother for row in range(len(history_dataframe_two.val_hamming_loss))]
clrs_acc_model_rmsprop = [colormax if history_dataframe_three.val_hamming_loss.iloc[row]==history_dataframe_three.val_hamming_loss.min() else colorother for row in range(len(history_dataframe_three.val_hamming_loss))]

#Hamming Loss of Adam optimizer model
fig1=go.Figure()

fig1.add_trace(go.Scatter(x=history_dataframe_one.epoch.tolist(), 
                          y=history_dataframe_one.hamming_loss.tolist(),
                          mode='lines+markers',
                          name='Training Hamming Loss',
                          line=dict(color='rgb(102, 194, 165)')))

fig1.add_trace(go.Scatter(x=history_dataframe_one.epoch.tolist(), 
                          y=history_dataframe_one.val_hamming_loss.tolist(),
                          mode='lines+markers',
                          name='Validation Hamming Loss',
                          line=dict(color='rgb(252, 141, 98)'),
                          marker=dict(color=clrs_acc_model_adam)))

fig1.update_layout(template="simple_white",
                   title="Hamming Loss score on train & validation sets (Model estimator of the Adam Optimizer)",
                   xaxis_title="Number of epochs",
                   yaxis_title="Hamming Loss/epoch")

fig1.update_layout(showlegend=True,
                   annotations=[dict(x=history_dataframe_one.epoch[history_dataframe_one.val_hamming_loss==history_dataframe_one.val_hamming_loss.min()].tolist()[0],
                                     y=history_dataframe_one.val_hamming_loss.min(),
                                     xref="x",yref="y",
                                     text="Epoch with the highest validation Hamming Loss",
                                     showarrow=True,
                                     arrowhead=5,
                                     ax=0,ay=40)])

fig1.update_layout(legend_title_text='Training & Validation Hamming Loss points per epoch')

fig1.show()
#---------------------------------------------------------

#Hamming Loss of SGD optimizer model

fig2=go.Figure()

fig2.add_trace(go.Scatter(x=history_dataframe_two.epoch.tolist(), 
                          y=history_dataframe_two.hamming_loss.tolist(),
                          mode='lines+markers',
                          name='Training Hamming Loss',
                          line=dict(color='rgb(102, 194, 165)')))

fig2.add_trace(go.Scatter(x=history_dataframe_two.epoch.tolist(), 
                          y=history_dataframe_two.val_hamming_loss.tolist(),
                          mode='lines+markers',
                          name='Validation Hamming Loss',
                          line=dict(color='rgb(252, 141, 98)'),
                          marker=dict(color=clrs_acc_model_sgd)))

fig2.update_layout(template="simple_white",
                   title="Hamming Loss score on train & validation sets (Model estimator of the SGD Optimizer)",
                   xaxis_title="Number of epochs",
                   yaxis_title="Hamming Loss/epoch")

fig2.update_layout(showlegend=True,
                   annotations=[dict(x=history_dataframe_two.epoch[history_dataframe_two.val_hamming_loss==history_dataframe_two.val_hamming_loss.min()].tolist()[0],
                                     y=history_dataframe_two.val_hamming_loss.min(),
                                     xref="x",yref="y",
                                     text="Epoch with the highest validation Hamming Loss",
                                     showarrow=True,
                                     arrowhead=5,
                                     ax=0,ay=40)])

fig2.update_layout(legend_title_text='Training & Validation Hamming Loss points per epoch')

fig2.show()

#---------------------------------------------------------

#Hamming Loss of RMSprop optimizer model

fig3=go.Figure()

fig3.add_trace(go.Scatter(x=history_dataframe_three.epoch.tolist(),
                          y=history_dataframe_three.hamming_loss.tolist(),
                          mode='lines+markers',
                          name='Training Hamming Loss',
                          line=dict(color='rgb(102, 194, 165)')))

fig3.add_trace(go.Scatter(x=history_dataframe_three.epoch.tolist(), 
                          y=history_dataframe_three.val_hamming_loss.tolist(),
                          mode='lines+markers',
                          name='Validation Hamming Loss',
                          line=dict(color='rgb(252, 141, 98)'),
                          marker=dict(color=clrs_acc_model_rmsprop)))

fig3.update_layout(template="simple_white",
                   title="Hamming Loss score on train & validation sets (Model estimator of the RMSprop Optimizer)",
                   xaxis_title="Number of epochs",
                   yaxis_title="Hamming Loss/epoch")

fig3.update_layout(showlegend=True,
                   annotations=[dict(x=history_dataframe_three.epoch[history_dataframe_three.val_hamming_loss==history_dataframe_three.val_hamming_loss.min()].tolist()[0],
                                     y=history_dataframe_three.val_hamming_loss.min(),
                                     xref="x",yref="y",
                                     text="Epoch with the highest validation Hamming Loss",
                                     showarrow=True,
                                     arrowhead=5,
                                     ax=0,ay=40)])

fig3.update_layout(legend_title_text='Training & Validation Hamming Loss points per epoch')

fig3.show()

In [None]:
colormin = 'black'
colorother = 'rgb(252, 141, 98)'

clrs_loss_model_adam=[colormin if history_dataframe_one.val_loss.iloc[row]==history_dataframe_one.val_loss.min() else colorother for row in range(len(history_dataframe_one.val_loss))]
clrs_loss_model_sgd=[colormin if history_dataframe_two.val_loss.iloc[row]==history_dataframe_two.val_loss.min() else colorother for row in range(len(history_dataframe_two.val_loss))]
clrs_loss_model_rmsprop=[colormin if history_dataframe_three.val_loss.iloc[row]==history_dataframe_three.val_loss.min() else colorother for row in range(len(history_dataframe_three.val_loss))]

#Loss of Adam optimizer model

fig3=go.Figure()

fig3.add_trace(go.Scatter(x=history_dataframe_one.epoch.tolist(), 
                          y=history_dataframe_one.loss.tolist(),
                          mode='lines+markers',
                          name='Training Loss',
                          line=dict(color='rgb(102, 194, 165)')))

fig3.add_trace(go.Scatter(x=history_dataframe_one.epoch.tolist(), 
                          y=history_dataframe_one.val_loss.tolist(),
                          mode='lines+markers',
                          name='Validation Loss',
                          line=dict(color='rgb(252, 141, 98)'),
                          marker=dict(color=clrs_loss_model_adam)))

fig3.update_layout(template="simple_white",
                   title="Loss score on train & validation sets (Model estimator of the Adam Optimizer)",
                   xaxis_title="Number of epochs",
                   yaxis_title="Loss/epoch")

fig3.update_layout(showlegend=True,
                   annotations=[dict(x=history_dataframe_one.epoch[history_dataframe_one.val_loss==history_dataframe_one.val_loss.min()].tolist()[0],
                                     y=history_dataframe_one.val_loss.min(),
                                     xref="x",yref="y",
                                     text="Epoch with the lowest validation loss",
                                     showarrow=True,
                                     arrowhead=5,
                                     ax=0,ay=-40)])
fig3.show()

#---------------------------------------------------------

#Loss of SGD optimizer model

fig4=go.Figure()

fig4.add_trace(go.Scatter(x=history_dataframe_two.epoch.tolist(), 
                          y=history_dataframe_two.loss.tolist(),
                          mode='lines+markers',
                          name='Training Loss',
                          line=dict(color='rgb(102, 194, 165)')))

fig4.add_trace(go.Scatter(x=history_dataframe_two.epoch.tolist(), 
                          y=history_dataframe_two.val_loss.tolist(),
                          mode='lines+markers',
                          name='Validation Loss',
                          line=dict(color='rgb(252, 141, 98)'),
                          marker=dict(color=clrs_loss_model_sgd)))

fig4.update_layout(template="simple_white",
                   title="Loss score on train & validation sets (Model estimator of the SGD Optimizer)",
                   xaxis_title="Number of epochs",
                   yaxis_title="Loss/epoch")

fig4.update_layout(showlegend=True,
                   annotations=[dict(x=history_dataframe_two.epoch[history_dataframe_two.val_loss==history_dataframe_two.val_loss.min()].tolist()[0],
                                     y=history_dataframe_two.val_loss.min(),
                                     xref="x",yref="y",
                                     text="Epoch with the lowest validation loss",
                                     showarrow=True,
                                     arrowhead=5,
                                     ax=0,ay=-65)])
fig4.show()

#---------------------------------------------------------

#Loss of RMSprop optimizer model

fig5=go.Figure()

fig5.add_trace(go.Scatter(x=history_dataframe_three.epoch.tolist(), 
                          y=history_dataframe_three.loss.tolist(),
                          mode='lines+markers',
                          name='Training Loss',
                          line=dict(color='rgb(102, 194, 165)')))

fig5.add_trace(go.Scatter(x=history_dataframe_three.epoch.tolist(), 
                          y=history_dataframe_three.val_loss.tolist(),
                          mode='lines+markers',
                          name='Validation Loss',
                          line=dict(color='rgb(252, 141, 98)'),
                          marker=dict(color=clrs_loss_model_rmsprop)))

fig5.update_layout(template="simple_white",
                   title="Loss score on train & validation sets (Model estimator of the RMSprop Optimizer)",
                   xaxis_title="Number of epochs",
                   yaxis_title="Loss/epoch")

fig5.update_layout(showlegend=True,
                   annotations=[dict(x=history_dataframe_three.epoch[history_dataframe_three.val_loss==history_dataframe_three.val_loss.min()].tolist()[0],
                                     y=history_dataframe_three.val_loss.min(),
                                     xref="x",yref="y",
                                     text="Epoch with the lowest validation loss",
                                     showarrow=True,
                                     arrowhead=5,
                                     ax=0,ay=-65)])
fig5.show()

In [None]:
def visualize_model(model):
    return SVG(model_to_dot(model, show_shapes= True, show_layer_names=True, dpi=65,).create(prog='dot', format='svg'))
visualize_model(model_one)

In [None]:
tf.keras.utils.plot_model(
model_one,
to_file="model.png",
show_shapes=True,
show_layer_names=True,
rankdir="TB",
expand_nested=False,
dpi=96,
)

<b>- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  </b>

Model estimator trained on binary accuracy performance metric

In [None]:
saved_version_data_control="22042020"

X_train_seq_actors=np.load(os.path.join(os.getcwd(), "80-20 split_non-balanced\\20000_max_features_cleaned_numbers\\x_train_seq_actors_80-20_non-balanced_20000_{0}.npy".format(saved_version_data_control)))
X_train_seq_plot=np.load(os.path.join(os.getcwd(), "80-20 split_non-balanced\\20000_max_features_cleaned_numbers\\x_train_seq_plot_80-20_non-balanced_20000_{0}.npy".format(saved_version_data_control)))
X_train_seq_features=np.load(os.path.join(os.getcwd(), "80-20 split_non-balanced\\20000_max_features_cleaned_numbers\\x_train_seq_features_80-20_non-balanced_20000_{0}.npy".format(saved_version_data_control)))
X_train_seq_reviews=np.load(os.path.join(os.getcwd(), "80-20 split_non-balanced\\20000_max_features_cleaned_numbers\\x_train_seq_reviews_80-20_non-balanced_20000_{0}.npy".format(saved_version_data_control)))

print("X_train data inputs have been loaded!\n")

X_test_seq_actors=np.load(os.path.join(os.getcwd(), "80-20 split_non-balanced\\20000_max_features_cleaned_numbers\\x_test_seq_actors_80-20_non-balanced_20000_{0}.npy".format(saved_version_data_control)))
X_test_seq_plot=np.load(os.path.join(os.getcwd(), "80-20 split_non-balanced\\20000_max_features_cleaned_numbers\\x_test_seq_plot_80-20_non-balanced_20000_{0}.npy".format(saved_version_data_control)))
X_test_seq_features=np.load(os.path.join(os.getcwd(), "80-20 split_non-balanced\\20000_max_features_cleaned_numbers\\x_test_seq_features_80-20_non-balanced_20000_{0}.npy".format(saved_version_data_control)))
X_test_seq_reviews=np.load(os.path.join(os.getcwd(), "80-20 split_non-balanced\\20000_max_features_cleaned_numbers\\x_test_seq_reviews_80-20_non-balanced_20000_{0}.npy".format(saved_version_data_control)))

print("X_test data inputs have been loaded!\n")

y_train=np.load(os.path.join(os.getcwd(), "80-20 split_non-balanced\\20000_max_features_cleaned_numbers\\y_train_80-20_non-balanced_20000_{0}.npy".format(saved_version_data_control)))
y_test=np.load(os.path.join(os.getcwd(), "80-20 split_non-balanced\\20000_max_features_cleaned_numbers\\y_test_80-20_non-balanced_20000_{0}.npy".format(saved_version_data_control)))

print("y_train & y_test have been loaded!\n")

In [None]:
"""
Import the tokenizers of each input, fitted on part 3.1
"""
with open(os.path.join(os.getcwd(), '80-20 split_non-balanced\\20000_max_features_cleaned_numbers\\actors_tokenizer_20000_{0}.pkl'.format(saved_version_data_control)),'rb') as f:
    actors_tokenizer = pickle.load(f)
    
with open(os.path.join(os.getcwd(), '80-20 split_non-balanced\\20000_max_features_cleaned_numbers\\plot_tokenizer_20000_{0}.pkl'.format(saved_version_data_control)),'rb') as f:
    plot_tokenizer = pickle.load(f)
    
with open(os.path.join(os.getcwd(), '80-20 split_non-balanced\\20000_max_features_cleaned_numbers\\features_tokenizer_20000_{0}.pkl'.format(saved_version_data_control)),'rb') as f:
    features_tokenizer = pickle.load(f)
    
with open(os.path.join(os.getcwd(), '80-20 split_non-balanced\\20000_max_features_cleaned_numbers\\reviews_tokenizer_20000_{0}.pkl'.format(saved_version_data_control)),'rb') as f:
    reviews_tokenizer = pickle.load(f)

try:
    assert len(actors_tokenizer.word_index)==20000
    assert len(plot_tokenizer.word_index)==20000
    assert len(features_tokenizer.word_index)==20000
    assert len(reviews_tokenizer.word_index)==20000
except AssertionError:
    print("ERROR: The vocabulary length for some of the tokenizers, is not equal to 20000. Please verify their lengths by running the following: len(actors_tokenizer.word_index)")

print("Tokenizers are loaded successfully!")

In [None]:
"""
Import the X_train, X_test, y_train & y_test data pickled from dataset part 3.1
"""
saved_version_data_control="22042020"

X_train=pd.read_pickle(os.path.join(os.getcwd(), "pickled_data_per_part\\version_{0}\\X_train_all_inputs_{0}.pkl".format(saved_version_data_control)))
X_test=pd.read_pickle(os.path.join(os.getcwd(), "pickled_data_per_part\\version_{0}\\X_test_all_inputs_{0}.pkl".format(saved_version_data_control)))
y_train=pd.read_pickle(os.path.join(os.getcwd(), "pickled_data_per_part\\version_{0}\\y_train_all_inputs_{0}.pkl".format(saved_version_data_control)))
y_test=pd.read_pickle(os.path.join(os.getcwd(), "pickled_data_per_part\\version_{0}\\y_test_all_inputs_{0}.pkl".format(saved_version_data_control)))

assert X_train.shape[0]==y_train.shape[0]
assert X_test.shape[0]==y_test.shape[0]

In [None]:
neural_network_parameters={}
optimizer_parameters={}

neural_network_parameters['model_loss'] = 'binary_crossentropy'
neural_network_parameters['model_metric'] = 'accuracy'
validation_split_ratio=0.8

def optimizer_adam_v2_accuracy(batch_size_value):

    optimizer_parameters['steps_per_epoch'] = int(np.ceil((X_train_seq_features.shape[0]*validation_split_ratio)//batch_size_value))
    optimizer_parameters['lr_schedule_learning_rate'] = 0.01
    optimizer_parameters['lr_schedule_decay_steps'] = optimizer_parameters['steps_per_epoch']*1000
    optimizer_parameters['lr_schedule_decay_rate'] = 1
    optimizer_parameters['staircase'] = False
    
    lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(
        optimizer_parameters['lr_schedule_learning_rate'],
        decay_steps=optimizer_parameters['lr_schedule_decay_steps'],
        decay_rate=optimizer_parameters['lr_schedule_decay_rate'],
        staircase=optimizer_parameters['staircase'])
    
    return keras.optimizers.Adam(lr_schedule)

#### Model 16 - 300 embedding dimenstion & 128 batch size

In [None]:
with open(os.path.join(os.getcwd(), 'model_one\\adam_v2_models_22042020\\multi_input_keras_model_{0}dim_{1}batchsize_{2}lr_{3}decaymultiplier_22042020.json'.format(str(300), str(128), str(0.01), str(1000))),'r') as f:
    model_json = json.load(f)

model_sixteen = model_from_json(model_json)

model_sixteen.load_weights(os.path.join(os.getcwd(), 'model_one\\adam_v2_models_22042020\\multi_input_keras_model_{0}dim_{1}batchsize_{2}lr_{3}decaymultiplier_22042020.h5'.format(str(300), str(128), str(0.01), str(1000))))

model_sixteen.compile(optimizer=optimizer_adam_v2_accuracy(128),
                      loss=neural_network_parameters['model_loss'],
                      metrics=[neural_network_parameters['model_metric']])

print(type(model_sixteen))
print("\nModel is loaded successfully")

In [None]:
df_scores_adam_accuracy=pd.read_pickle(os.path.join(os.getcwd(), "model_one\\adam_v2_models_22042020\\df_metrics_multy_input_keras_300dim_128batchsize_26052020.pkl"))
df_scores_adam_accuracy['Bias'], df_scores_adam_accuracy['Variance']=[-0.00047,0.096426]
df_scores_adam_accuracy

In [None]:
print(df_scores_adam_accuracy.to_latex(index=False))

In [None]:
history_dataframe_one=pd.read_pickle(os.path.join(os.getcwd(), "model_one\\adam_v2_models_22042020\\metrics_histogram_multi_input_keras_300dim_128batchsize_22042020.pkl"))

In [None]:
colormax = 'black'
colorother = 'rgb(252, 141, 98)'
clrs_acc_model = [colormax if history_dataframe_one.val_accuracy.iloc[row]==history_dataframe_one.val_accuracy.max() else colorother for row in range(len(history_dataframe_one.val_accuracy))]

clrs_loss=[colormin if history_dataframe_one.val_loss.iloc[row]==history_dataframe_one.val_loss.min() else colorother for row in range(len(history_dataframe_one.val_loss))]

#Accuracy of model four
fig11=go.Figure()

fig11.add_trace(go.Scatter(x=history_dataframe_one.epoch.tolist(), 
                          y=history_dataframe_one.accuracy.tolist(),
                          mode='lines+markers',
                          name='Training Accuracy',
                          line=dict(color='rgb(102, 194, 165)')))

fig11.add_trace(go.Scatter(x=history_dataframe_one.epoch.tolist(), 
                          y=history_dataframe_one.val_accuracy.tolist(),
                          mode='lines+markers',
                          name='Validation Accuracy',
                          line=dict(color='rgb(252, 141, 98)'),
                          marker=dict(color=clrs_acc_model)))

fig11.update_layout(template="simple_white",
                   title="Accuracy score on train & validation sets (adam model accuracy)",
                   xaxis_title="Number of epochs",
                   yaxis_title="Accuracy/epoch")

fig11.update_layout(showlegend=True,
                   annotations=[dict(x=history_dataframe_one.epoch[history_dataframe_one.val_accuracy==history_dataframe_one.val_accuracy.max()].tolist()[0],
                                     y=history_dataframe_one.val_accuracy.max(),
                                     xref="x",yref="y",
                                     text="Epoch with the highest validation accuracy",
                                     showarrow=True,
                                     arrowhead=5,
                                     ax=0,ay=40)])
fig11.show()

In [None]:
colormin = 'black'
colorother = 'rgb(252, 141, 98)'

clrs_loss_model=[colormin if history_dataframe_one.val_loss.iloc[row]==history_dataframe_one.val_loss.min() else colorother for row in range(len(history_dataframe_one.val_loss))]

#Loss of model four

fig12=go.Figure()

fig12.add_trace(go.Scatter(x=history_dataframe_one.epoch.tolist(), 
                          y=history_dataframe_one.loss.tolist(),
                          mode='lines+markers',
                          name='Training Loss',
                          line=dict(color='rgb(102, 194, 165)')))

fig12.add_trace(go.Scatter(x=history_dataframe_one.epoch.tolist(), 
                          y=history_dataframe_one.val_loss.tolist(),
                          mode='lines+markers',
                          name='Validation Loss',
                          line=dict(color='rgb(252, 141, 98)'),
                          marker=dict(color=clrs_loss_model)))

fig12.update_layout(template="simple_white",
                   title="Loss score on train & validation sets (adam model trained on accuracy)",
                   xaxis_title="Number of epochs",
                   yaxis_title="Loss/epoch")

fig12.update_layout(showlegend=True,
                   annotations=[dict(x=history_dataframe_one.epoch[history_dataframe_one.val_loss==history_dataframe_one.val_loss.min()].tolist()[0],
                                     y=history_dataframe_one.val_loss.min(),
                                     xref="x",yref="y",
                                     text="Epoch with the lowest validation loss",
                                     showarrow=True,
                                     arrowhead=5,
                                     ax=0,ay=-40)])
fig12.show()