### Part 3.2 - Model 1: Multi-Input Keras neural model (Train the model with the SGD Optimizer)


Set the version data control parameter (to save the outputs of this notebook at their latest date)

In [None]:
version_data_control="16072020"

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Install those libraries if the notebook is executed on Google Colab

!pip install --quiet git+https://github.com/tensorflow/docs
!pip install --quiet humanfriendly

#### Import standard libraries

In [None]:
import pandas as pd
import numpy as np
import os
import time
import random
import pickle
import json
import shutil
import math

%matplotlib inline
import matplotlib.image as mpimg
import matplotlib.pyplot as plt

from datetime import datetime
from tabulate import tabulate
from packaging import version
from humanfriendly import format_timespan
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
date_format='%Y-%m-%d %H-%M-%S'

#### Improt visualization libraries


In [None]:
%matplotlib inline
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
from pylab import rcParams

import pydot
import pydotplus
import graphviz

from IPython.display import SVG

#### Import Tensorflow

In [None]:
import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow_docs as tfdocs #!pip install git+https://github.com/tensorflow/docs
import tensorflow_docs.plots as tfplots
import tensorflow_docs.modeling as tfmodel

from tensorflow.keras import layers, regularizers, models
from tensorflow.keras import models
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.utils import model_to_dot, plot_model
from tensorflow.keras.models import load_model, model_from_json

In [None]:
import multiprocessing as mp
number_cores=mp.cpu_count()
print("Number of available cores: {0}".format(number_cores))
tf.config.threading.set_intra_op_parallelism_threads(number_cores-1)

#### Import Tensorboard

In [None]:
%load_ext tensorboard
from tensorboard.plugins.hparams import api as hp

#### Clear any logs from previous runs

In [None]:
logging_directory=os.path.join(os.getcwd(), "model_one/sgd/logs/hparam_tuning")

In [None]:
shutil.rmtree(logging_directory, ignore_errors=True)

#### Import Tensorflow Hub

In [None]:
import tensorflow_hub as hub

#### Import Keras

In [None]:
import keras.backend as K
from tensorflow import keras

In [None]:
%tensorflow_version 2.x

print("TensorFlow version: ", tf.__version__)
assert version.parse(tf.__version__).release[0] >= 2, \
    "This notebook requires TensorFlow 2.0 or above."
print("Eager mode: ", tf.executing_eagerly())
print("Hub version: ", hub.__version__)
print("GPU is", "available" if tf.config.list_physical_devices('GPU') else "NOT AVAILABLE")

#### Import the data already tokenized and transformed from Part 3.1

* 80-20 split - Non-balanced data

In [None]:
saved_version_data_control="13072020"
tokenization_history_folder="text_tokenization_padded_sequences"

with open(os.path.join(os.getcwd(), 'drive/My Drive/data/{0}_{1}/words_tokenized_{1}.pkl'.format(tokenization_history_folder, saved_version_data_control)), 'rb') as handle:
    words_tokenized = pickle.load(handle)
words_tokenized

In [None]:
"""
Import the tokenizers of each input
"""
with open(os.path.join(os.getcwd(), 'drive/My Drive/data/{0}_{1}/actors_tokenizer_{2}_{1}.pkl'.format(tokenization_history_folder, saved_version_data_control, words_tokenized['actors_tokenized'])),'rb') as f:
    actors_tokenizer = pickle.load(f)
    
with open(os.path.join(os.getcwd(), 'drive/My Drive/data/{0}_{1}/plot_tokenizer_{2}_{1}.pkl'.format(tokenization_history_folder, saved_version_data_control, words_tokenized['plot_words_tokenized'])),'rb') as f:
    plot_tokenizer = pickle.load(f)
    
with open(os.path.join(os.getcwd(), 'drive/My Drive/data/{0}_{1}/features_tokenizer_{2}_{1}.pkl'.format(tokenization_history_folder, saved_version_data_control, words_tokenized['features_words_tokenized'])),'rb') as f:
    features_tokenizer = pickle.load(f)
    
with open(os.path.join(os.getcwd(), 'drive/My Drive/data/{0}_{1}/reviews_tokenizer_{2}_{1}.pkl'.format(tokenization_history_folder, saved_version_data_control, words_tokenized['reviews_words_tokenized'])),'rb') as f:
    reviews_tokenizer = pickle.load(f)
    
with open(os.path.join(os.getcwd(), 'drive/My Drive/data/{0}_{1}/title_tokenizer_{2}_{1}.pkl'.format(tokenization_history_folder, saved_version_data_control, words_tokenized['title_words_tokenized'])),'rb') as f:
    title_tokenizer = pickle.load(f)

try:
    assert len(actors_tokenizer.word_index)==words_tokenized['actors_tokenized']
    assert len(plot_tokenizer.word_index)==words_tokenized['plot_words_tokenized']
    assert len(features_tokenizer.word_index)==words_tokenized['features_words_tokenized']
    assert len(reviews_tokenizer.word_index)==words_tokenized['reviews_words_tokenized']
    assert len(title_tokenizer.word_index)==words_tokenized['title_words_tokenized']
except AssertionError:
    print("ERROR: The vocabulary length for some of the tokenizers, is not equal to 20000. Please verify their lengths by running the following: len(actors_tokenizer.word_index)")

print("Tokenizers are loaded successfully!")

In [None]:
"""
Import the cleaned, preprocessed and padded X independent features and the target variable y
"""
saved_version_data_control="13072020"
tokenization_history_folder="text_tokenization_padded_sequences"

X_train_seq_actors=np.load(os.path.join(os.getcwd(), "drive/My Drive/data/{0}_{1}/x_train_seq_actors_80-20_non-balanced_{2}_{1}.npy".format(tokenization_history_folder, saved_version_data_control, words_tokenized['actors_tokenized'])))
X_train_seq_plot=np.load(os.path.join(os.getcwd(), "drive/My Drive/data/{0}_{1}/x_train_seq_plot_80-20_non-balanced_{2}_{1}.npy".format(tokenization_history_folder, saved_version_data_control, words_tokenized['plot_words_tokenized'])))
X_train_seq_features=np.load(os.path.join(os.getcwd(), "drive/My Drive/data/{0}_{1}/x_train_seq_features_80-20_non-balanced_{2}_{1}.npy".format(tokenization_history_folder, saved_version_data_control, words_tokenized['features_words_tokenized'])))
X_train_seq_reviews=np.load(os.path.join(os.getcwd(), "drive/My Drive/data/{0}_{1}/x_train_seq_reviews_80-20_non-balanced_{2}_{1}.npy".format(tokenization_history_folder, saved_version_data_control, words_tokenized['reviews_words_tokenized'])))
X_train_seq_title=np.load(os.path.join(os.getcwd(), "drive/My Drive/data/{0}_{1}/x_train_seq_title_80-20_non-balanced_{2}_{1}.npy".format(tokenization_history_folder, saved_version_data_control, words_tokenized['title_words_tokenized'])))

print("X_train data inputs have been loaded!\n")

X_test_seq_actors=np.load(os.path.join(os.getcwd(), "drive/My Drive/data/{0}_{1}/x_test_seq_actors_80-20_non-balanced_{2}_{1}.npy".format(tokenization_history_folder, saved_version_data_control, words_tokenized['actors_tokenized'])))
X_test_seq_plot=np.load(os.path.join(os.getcwd(), "drive/My Drive/data/{0}_{1}/x_test_seq_plot_80-20_non-balanced_{2}_{1}.npy".format(tokenization_history_folder, saved_version_data_control, words_tokenized['plot_words_tokenized'])))
X_test_seq_features=np.load(os.path.join(os.getcwd(), "drive/My Drive/data/{0}_{1}/x_test_seq_features_80-20_non-balanced_{2}_{1}.npy".format(tokenization_history_folder, saved_version_data_control, words_tokenized['features_words_tokenized'])))
X_test_seq_reviews=np.load(os.path.join(os.getcwd(), "drive/My Drive/data/{0}_{1}/x_test_seq_reviews_80-20_non-balanced_{2}_{1}.npy".format(tokenization_history_folder, saved_version_data_control, words_tokenized['reviews_words_tokenized'])))
X_test_seq_title=np.load(os.path.join(os.getcwd(), "drive/My Drive/data/{0}_{1}/x_test_seq_title_80-20_non-balanced_{2}_{1}.npy".format(tokenization_history_folder, saved_version_data_control, words_tokenized['title_words_tokenized'])))

print("X_test data inputs have been loaded!\n")

y_train=np.load(os.path.join(os.getcwd(), "drive/My Drive/data/{0}_{1}/y_train_80-20_non-balanced_{1}.npy".format(tokenization_history_folder, saved_version_data_control)))
y_test=np.load(os.path.join(os.getcwd(), "drive/My Drive/data/{0}_{1}/y_test_80-20_non-balanced_{1}.npy".format(tokenization_history_folder, saved_version_data_control)))

print("y_train & y_test have been loaded!\n")

In [None]:
"""
Import genres
"""
with open(os.path.join(os.getcwd(), 'drive/My Drive/data/{0}_{1}/genres_list_06032020.pkl'.format(tokenization_history_folder, saved_version_data_control)),'rb') as f:
    genres_list = pickle.load(f)
genres_list

## <b>- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  </b>

#### Python Cell no.1
--------------------------

Define the functions that will be used accross the notebook. <br>
* visualise_model: This function is used to create a more informative image of the neural network structure.

* callback: The callbacks function with Early Stopping, Model checkpoints and Learning rate monitoring.

* save_model: Save the model's weights and structure at the end of the training

* plot_keras_history: Plot the learning curves of validation and training datasets across the training epochs

* hamming_loss: calculate the hamming loss value to evaluate the performance of the algorithm on test data. Not to be confused to the Hamming loss used as the performance metric.

In [None]:
# Function 1 - Visualize Neural Network structure.

def visualize_model(model):
    return SVG(model_to_dot(model, show_shapes= True, show_layer_names=True, dpi=65).create(prog='dot', format='svg'))

#------------------------------------------#------------------------------------------

# Fucntion 2 - Callback function with early stopping to avod overfit.

def callback(mode, folder_path, saved_model_name, patience_value, logdir, hparams):
    
    # Initialize parameters
    monitor_metric = 'val_loss'
    minimum_delta = 0.009
    patience_limit = patience_value
    verbose_value = 1
    mode_value = 'min'
    weights_fname = os.path.join(os.getcwd(), '{0}/{1}.h5'.format(folder_path, saved_model_name))
    
    if mode == "step decay":
        # Initialize callbacks
        callbacks = [
            
            EarlyStopping(monitor=monitor_metric,
                          min_delta=minimum_delta,
                          patience=patience_limit,
                          verbose=verbose_value,
                          mode=mode_value,
                          restore_best_weights=True),

            ModelCheckpoint(filepath=weights_fname,
                            monitor=monitor_metric,
                            verbose=verbose_value,
                            save_best_only=True,
                            save_weights_only=True),
            
            tf.keras.callbacks.TensorBoard(logdir),
            
            hp.KerasCallback(logdir, hparams),

            tf.keras.callbacks.LearningRateScheduler(step_decay, verbose=1)
        ]

    else:
        # Initialize callbacks
        callbacks = [
            
            EarlyStopping(monitor=monitor_metric,
                          min_delta=minimum_delta,
                          patience=patience_limit,
                          verbose=verbose_value,
                          mode=mode_value,
                          restore_best_weights=True),

            ModelCheckpoint(filepath=weights_fname,
                            monitor=monitor_metric,
                            verbose=verbose_value,
                            save_best_only=True,
                            save_weights_only=True),
            
            tf.keras.callbacks.TensorBoard(logdir),
            
            hp.KerasCallback(logdir, hparams)
        ]

    return callbacks

#------------------------------------------#------------------------------------------

# Fucntion 3 - Save the neural network's weights and structure

def save_model(model, folder_path, model_name):
    
    model_json = model.to_json()

    with open(os.path.join(os.getcwd(), "{0}/{1}.json".format(folder_path, model_name)), "w") as json_file:
        json.dump(model_json, json_file)

    model.save_weights(os.path.join(os.getcwd(), "{0}/{1}.h5".format(folder_path, model_name)))
    
    print("\nModel's weights are saved")

#------------------------------------------#------------------------------------------

# Fucntion 4 - Plot the learning curves of the neural network

def plot_keras_history(history, folder_path, embeddings_dimension, batch_size, lr_value, version_data_control):
    
    metrics_names = [key for key in history.keys() if not key.startswith('val_')][0:3]
    
    fig = plt.gcf()
    fig, axs = plt.subplots(3, figsize=(20,18))
    
    for i, metric in enumerate(metrics_names):
        
        # getting the training values
        metric_train_values = history.get(metric, [])

        # getting the validation values
        metric_val_values = history.get("val_{0}".format(metric), [])

        # As loss always exists as a metric we use it to find the 
        epochs = range(1, len(metric_train_values) + 1)
        
        # leaving extra spaces to allign with the validation text
        training_text = "   Training {}: {:.5f}".format(metric,
                                                        metric_train_values[-1])
        print(training_text)

        axs[i].plot(epochs, 
                    metric_train_values,
                    'b',
                    label=training_text,
                    marker=".")
        
        axs[i].yaxis.label.set_color('white')
        axs[i].xaxis.label.set_color('white')
        
        axs[i].tick_params(axis='x', colors='white')
        axs[i].tick_params(axis='y', colors='white')
        
        axs[i].title.set_color('white')
        axs[i].spines['left'].set_color('white')
        axs[i].spines['bottom'].set_color('white')

        # if we validation metric exists, then plot that as well
        if metric_val_values:
            
            validation_text = "Validation {}: {:.5f}".format(metric,
                                                             metric_val_values[-1])

            axs[i].plot(epochs,
                        metric_val_values,
                        'g',
                        label=validation_text,
                        marker=".")
            
            axs[i].yaxis.label.set_color('white')
            axs[i].xaxis.label.set_color('white')

            axs[i].tick_params(axis='x', colors='white')
            axs[i].tick_params(axis='y', colors='white')

            axs[i].title.set_color('white')
            axs[i].spines['left'].set_color('white')
            axs[i].spines['bottom'].set_color('white')
        
        # add title, xlabel, ylabe, and legend
        axs[i].set_title('Model Metric: {}'.format(metric))
        axs[i].set_xlabel('Epochs')
        axs[i].set_ylabel(metric.title())
        axs[i].legend()
    
    fig.savefig(os.path.join(os.getcwd(), '{0}/ploting_training_validation_performance_{1}dim_{2}batchsize_{3}lr_{4}.png'.format(folder_path, str(embeddings_dimension), str(batch_size), str(lr_value), version_data_control)), dpi=100)
    plt.show()
    plt.draw()
    plt.close()
            
#------------------------------------------#------------------------------------------

# Fucntion 5 - Calculate the Hamming loss of a multilabeled dataset

#source:https://github.com/tensorflow/addons/issues/305

def hamming_loss(y_true, y_pred, mode='multilabel'):
    if mode not in ['multiclass', 'multilabel']:
        raise TypeError('mode must be: [multiclass, multilabel])')

    if mode == 'multiclass':
        nonzero = tf.cast(tf.math.count_nonzero(y_true * y_pred, axis=-1), tf.float32)
        print("Hamming loss for multi-class classification is: ", 1.0-nonzero)
        return 1.0 - nonzero

    else:
        nonzero = tf.cast(tf.math.count_nonzero(y_true - y_pred, axis=-1), tf.float32)
        print("Hamming loss for multi-label classification is: ", nonzero / y_true.shape[-1] )
        return nonzero / y_true.shape[-1]

class HammingLoss(tfa.metrics.MeanMetricWrapper):
    def __init__(self, name='hamming_loss', dtype=None, mode='multilabel'):
        super(HammingLoss, self).__init__(
                hamming_loss, name, dtype=dtype, mode=mode)

#### Python Cell no.2
--------------------------

Initialize the hyper parameters of the model and the parameters used by the Neural Network not hyperparameter tuned.

**HyperParameters - Initialized**


In [None]:
hp_logging_directory=os.path.join(os.getcwd(), "model_one/sgd/logs/hparam_tuning")

In [None]:
HP_HIDDEN_UNITS = hp.HParam('batch_size', hp.Discrete([32, 64, 128]))
HP_EMBEDDING_DIM = hp.HParam('embedding_dim', hp.Discrete([50, 100, 150]))
HP_LEARNING_RATE = hp.HParam('learning_rate', hp.Discrete([0.001, 0.01, 0.1])) # Adam default: 0.001, SGD default: 0.01, RMSprop default: 0.001

METRIC_ACCURACY = "hamming_loss"

In [None]:
with tf.summary.create_file_writer(hp_logging_directory).as_default():
    hp.hparams_config(
    hparams=[HP_HIDDEN_UNITS, HP_EMBEDDING_DIM, HP_LEARNING_RATE],
    metrics=[hp.Metric(METRIC_ACCURACY, display_name='hamming_loss')],
  )

try:
    os.path.exists(hp_logging_directory)
    print("Directory of hyper parameters logging exists!")
except Exception as e:
    print(e)
    print("Directory not found!")

**Initialize the parameters non-tuned and the optimizers**

In [None]:
neural_network_parameters = {}
fit_parameters = {}
optimizer_parameters = {}

#======================================================================
#           PARAMETERS THAT DEFINE THE NEURAL NETWORK STRUCTURE       =
#======================================================================

neural_network_parameters['l2_regularization'] = 0.01
neural_network_parameters['dropout_rate'] = 0.1
neural_network_parameters['dense_activation'] = 'relu'
neural_network_parameters['output_activation'] = 'sigmoid'
neural_network_parameters['number_target_variables'] = y_train[0].shape[-1]

neural_network_parameters['model_loss'] = tf.keras.losses.BinaryCrossentropy(from_logits=False, name='binary_crossentropy')

neural_network_parameters['model_metric'] = [tfa.metrics.HammingLoss(mode="multilabel", name="hamming_loss"), 
                                             tfa.metrics.F1Score(y_train[0].shape[-1], average="micro", name="f1_score_micro"), 
                                             tfa.metrics.F1Score(y_train[0].shape[-1], average=None, name="f1_score_none")]

#---------------------------------------------------------------------------------------

#===================================================
#       PARAMETERS THAT DEFINE EACH OPTIMIZER      =
#===================================================

# Define a function to monitor the learning rate per epoch (Option 1)
def get_lr_metric(optimizer):
    def lr(y_true, y_pred):
        return optimizer.lr
    return lr

# learning rate schedule ((Option 2)
def step_decay(epoch):
    initial_lrate = hparams[HP_LEARNING_RATE]
    drop = 0.5
    epochs_drop = 20.0
    lrate = initial_lrate * math.pow(drop, math.floor((1+epoch)/epochs_drop))
    return lrate

# Optimizer: ADAM (Learning scheduler with Inverse Time Decay)

optimizer_parameters['lr_scheduler_decay_rate'] = 0.1
optimizer_parameters['staircase'] = False
optimizer_parameters['validation_split_ratio']=0.7

def optimizer_adam_v2(haparms):
    
    print("Decay Steps of Inverse Time Decay: {0}".format(int(np.ceil((X_train_seq_actors.shape[0]*optimizer_parameters['validation_split_ratio'])//hparams[HP_HIDDEN_UNITS]))*hparams[HP_DECAY_STEPS_MULTIPLIER]))
    
    return keras.optimizers.Adam(tf.keras.optimizers.schedules.InverseTimeDecay(
        initial_learning_rate=hparams[HP_LEARNING_RATE],
        decay_steps=int(np.ceil((X_train_seq_actors.shape[0]*optimizer_parameters['validation_split_ratio'])//hparams[HP_HIDDEN_UNITS]))*hparams[HP_DECAY_STEPS_MULTIPLIER],
        decay_rate=optimizer_parameters['lr_scheduler_decay_rate'],
        staircase=optimizer_parameters['staircase']))

#---------------------------------------------------------------------------------------

# Optimizer: SDG (version 1)

optimizer_parameters['SGD_momentum'] = 0.2 #default 0.0
optimizer_parameters['SGD_nesterov'] = True #default False

def optimizer_sgd_v1(haparms, mode):

    if mode=="step decay":

        return keras.optimizers.SGD(lr=0.0, #Notice that we set the learning rate in the SGD class to 0 to clearly indicate that it is not used.
                                    momentum=0.9 #Use a large momentum. Using a larger momentum value will help the optimization algorithm to continue to make updates in the right direction when your learning rate shrinks to small values.
                                   )
    else:
        return keras.optimizers.SGD(lr=hparams[HP_LEARNING_RATE],
                                    momentum=optimizer_parameters['SGD_momentum'],
                                    nesterov=optimizer_parameters['SGD_nesterov'])

#---------------------------------------------------------------------------------------

# Optimizer: RMSprop (version 1)

optimizer_parameters['RMSprop_momentum'] = 0.5
optimizer_parameters['RMSprop_centered'] = True

def optimizer_rmsprop_v1(haparms):
    
    return keras.optimizers.RMSprop(lr=hparams[HP_LEARNING_RATE],
                                    momentum=optimizer_parameters['RMSprop_momentum'],
                                    centered=optimizer_parameters['RMSprop_centered'])

#--------------------------------------------------------------------------------------

#==================================================
#       PARAMETERS THAT DEFINE THE MODEL FIT      =
#==================================================

fit_parameters["epoch"] = 150
fit_parameters["patience_value"] = 10
fit_parameters["verbose_fit"] = 1
fit_parameters['validation_data_ratio']=1-optimizer_parameters['validation_split_ratio']

#### Python Cell no.3
--------------------------

The function creates the neural network structure, and fits the model on the dataset.

In [None]:
# Initialize variables specific to the training of the model

sequential_model_name="MultyInput_Keras_Classification_model"
network_structure_file_name="network_structure_multy_input_keras"
folder_path_model_saved="drive/My Drive/model_one/sgd_{0}".format(version_data_control)
saved_model_name="multi_input_keras_model"
saved_metrics_dataframe_name="metrics_histogram_multi_input_keras"

In [None]:
def create_fit_keras_model(hparams,
                           version_data_control,
                           optimizer_name,
                           validation_method,
                           callbacks,
                           optimizer_version = None):

    sentenceLength_actors = X_train_seq_actors.shape[1]
    vocab_size_frequent_words_actors = len(actors_tokenizer.word_index)

    sentenceLength_plot = X_train_seq_plot.shape[1]
    vocab_size_frequent_words_plot = len(plot_tokenizer.word_index)

    sentenceLength_features = X_train_seq_features.shape[1]
    vocab_size_frequent_words_features = len(features_tokenizer.word_index)

    sentenceLength_reviews = X_train_seq_reviews.shape[1]
    vocab_size_frequent_words_reviews = len(reviews_tokenizer.word_index)

    sentenceLength_title = X_train_seq_title.shape[1]
    vocab_size_frequent_words_title = len(title_tokenizer.word_index)

    model = keras.Sequential(name='{0}_{1}dim_{2}batchsize_{3}lr_{4}'.format(sequential_model_name, 
                                                                             str(hparams[HP_EMBEDDING_DIM]), 
                                                                             str(hparams[HP_HIDDEN_UNITS]),
                                                                             str(hparams[HP_LEARNING_RATE]),
                                                                             version_data_control))
    actors = keras.Input(shape=(sentenceLength_actors,), name='actors_input')
    plot = keras.Input(shape=(sentenceLength_plot,), name='plot_input')
    features = keras.Input(shape=(sentenceLength_features,), name='features_input')
    reviews = keras.Input(shape=(sentenceLength_reviews,), name='reviews_input')
    title = keras.Input(shape=(sentenceLength_title,), name='title_input')

    emb1 = layers.Embedding(input_dim = vocab_size_frequent_words_actors + 2,
                            output_dim = 16,
                            embeddings_initializer = 'uniform',
                            mask_zero = True,
                            input_length = sentenceLength_actors,
                            name="actors_embedding_layer")(actors)
    
    encoded_layer1 = layers.GlobalMaxPooling1D(name="globalmaxpooling_actors_layer")(emb1)
    
    emb2 = layers.Embedding(input_dim = vocab_size_frequent_words_plot + 2,
                            output_dim = hparams[HP_EMBEDDING_DIM],
                            embeddings_initializer = 'uniform',
                            mask_zero = True,
                            input_length = sentenceLength_plot,
                            name="plot_embedding_layer")(plot)

    encoded_layer2 = layers.GlobalMaxPooling1D(name="globalmaxpooling_plot_summary_Layer")(emb2)

    emb3 = layers.Embedding(input_dim = vocab_size_frequent_words_features + 2,
                            output_dim = hparams[HP_EMBEDDING_DIM],
                            embeddings_initializer = 'uniform',
                            mask_zero = True,
                            input_length = sentenceLength_features,
                            name="features_embedding_layer")(features)
    
    encoded_layer3 = layers.GlobalMaxPooling1D(name="globalmaxpooling_movie_features_layer")(emb3)
    
    emb4 = layers.Embedding(input_dim = vocab_size_frequent_words_reviews + 2,
                            output_dim = hparams[HP_EMBEDDING_DIM],
                            embeddings_initializer = 'uniform',
                            mask_zero = True,
                            input_length = sentenceLength_reviews,
                            name="reviews_embedding_layer")(reviews)
    
    encoded_layer4 = layers.GlobalMaxPooling1D(name="globalmaxpooling_user_reviews_layer")(emb4)

    emb5 = layers.Embedding(input_dim = vocab_size_frequent_words_title + 2,
                            output_dim = hparams[HP_EMBEDDING_DIM],
                            embeddings_initializer = 'uniform',
                            mask_zero = True,
                            input_length = sentenceLength_title,
                            name="title_embedding_layer")(title)
    
    encoded_layer5 = layers.GlobalMaxPooling1D(name="globalmaxpooling_movie_title_layer")(emb5)

    merged = layers.concatenate([encoded_layer1, encoded_layer2, encoded_layer3, encoded_layer4, encoded_layer5], axis=-1)

    dense_layer_1 = layers.Dense(hparams[HP_HIDDEN_UNITS],
                                 kernel_regularizer=regularizers.l2(neural_network_parameters['l2_regularization']),
                                 activation=neural_network_parameters['dense_activation'],
                                 name="1st_dense_hidden_layer_concatenated_inputs")(merged)
    
    layers.Dropout(neural_network_parameters['dropout_rate'])(dense_layer_1)
    
    output_layer = layers.Dense(neural_network_parameters['number_target_variables'],
                                activation=neural_network_parameters['output_activation'],
                                name='output_layer')(dense_layer_1)

    model = keras.Model(inputs=[actors, plot, features, reviews, title], outputs=output_layer, name='{0}_{1}dim_{2}batchsize_{3}lr_{4}'.format(sequential_model_name, 
                                                                                                                                               str(hparams[HP_EMBEDDING_DIM]), 
                                                                                                                                               str(hparams[HP_HIDDEN_UNITS]),
                                                                                                                                               str(hparams[HP_LEARNING_RATE]),
                                                                                                                                               version_data_control))
    print(model.summary())
    
    if optimizer_name=="adam" and optimizer_version is None:
        
        optimizer = optimizer_adam_v2(hparams)
        
    elif optimizer_name=="sgd" and optimizer_version is None:
        
        optimizer = optimizer_sgd_v1(hparams, "step decay")
        
    elif optimizer_name=="rmsprop" and optimizer_version is None:
        
        optimizer = optimizer_rmsprop_v1(hparams)

    print("Type of optimizer LR: {0}".format(optimizer.lr.dtype))

    lr_metric = [get_lr_metric(optimizer)]
    
    if optimizer.lr.dtype == np.float32:

        print("Learning Rate's type is Float or Integer")

        model.compile(optimizer=optimizer,
                      loss=neural_network_parameters['model_loss'],
                      metrics=neural_network_parameters['model_metric'] + lr_metric, )
    else:
        print("Learning Rate's type is not Float or Integer, but rather {0}".format(type(optimizer.lr)))
        model.compile(optimizer=optimizer,
                      loss=neural_network_parameters['model_loss'],
                      metrics=neural_network_parameters['model_metric'])
    
    #plot model's structure
    plot_model(model, to_file=os.path.join(os.getcwd(), '{0}/{1}_{2}dim_{3}batchsize_{4}lr_{5}.png'.format(folder_path_model_saved, 
                                                                                                           network_structure_file_name,
                                                                                                           str(hparams[HP_EMBEDDING_DIM]), 
                                                                                                           str(hparams[HP_HIDDEN_UNITS]),
                                                                                                           str(hparams[HP_LEARNING_RATE]),
                                                                                                           version_data_control)))
    start_time = time.time()
    
    steps_per_epoch=int(np.ceil((X_train_seq_actors.shape[0]*optimizer_parameters['validation_split_ratio'])//hparams[HP_HIDDEN_UNITS]))
    
    print("\nSteps per epoch on current run: {0}".format(steps_per_epoch))
    
    if validation_method=="validation_split":

        fitted_model=model.fit([X_train_seq_actors, X_train_seq_plot, X_train_seq_features, X_train_seq_reviews, X_train_seq_title],
                                y_train,
                                steps_per_epoch=int(np.ceil((X_train_seq_actors.shape[0]*optimizer_parameters['validation_split_ratio'])//hparams[HP_HIDDEN_UNITS])),
                                epochs=fit_parameters["epoch"],
                                batch_size=hparams[HP_HIDDEN_UNITS],
                                validation_split=fit_parameters['validation_data_ratio'],
                                callbacks=callbacks,
                                use_multiprocessing=True
                              )

    elif validation_method=="validation_data":
        
        fitted_model=model.fit([X_train_seq_actors, X_train_seq_plot, X_train_seq_features, X_train_seq_reviews, X_train_seq_title], 
                               y_train,
                               steps_per_epoch=int(np.ceil((X_train_seq_actors.shape[0]*optimizer_parameters['validation_split_ratio'])//hparams[HP_HIDDEN_UNITS])),
                               epochs=fit_parameters["epoch"],
                               verbose=fit_parameters["verbose_fit"],
                               batch_size=hparams[HP_HIDDEN_UNITS],
                               validation_data=([X_test_seq_actors, X_test_seq_plot, X_test_seq_features, X_test_seq_reviews, X_test_seq_title],
                                                y_test),
                               callbacks=callbacks
                              )
    #save the model
    save_model(model,
               folder_path_model_saved,
               "{0}_{1}dim_{2}batchsize_{3}lr_{4}".format(saved_model_name,
                                                          str(hparams[HP_EMBEDDING_DIM]), 
                                                          str(hparams[HP_HIDDEN_UNITS]), 
                                                          str(hparams[HP_LEARNING_RATE]), 
                                                          version_data_control))
    elapsed_time = time.time() - start_time
    
    print("\nTraining time of the multi-input keras model has finished. Duration {} secs".format(format_timespan(elapsed_time)))
    
    evaluation = model.evaluate([X_test_seq_actors, X_test_seq_plot, X_test_seq_features, X_test_seq_reviews, X_test_seq_title], y_test, batch_size=hparams[HP_HIDDEN_UNITS], verbose=2)

    loss = evaluation[0] # single number
    model_metric = evaluation[1:] # is a list of 3 elements (hamming loss, f1_score_micro, f1_score_none)

    hamming_loss_value=evaluation[1]
    print('Hamming loss value on test data: {0}'.format(hamming_loss_value))

    return hamming_loss_value, model, fitted_model

#### Python Cell no.4
--------------------------

*run* is a method that call the above function *create_fit_keras_model*

In [None]:
def run(run_dir, hparams, version_data_control, optimizer_name, validation_method, callbacks):
    with tf.summary.create_file_writer(run_dir).as_default():
        hp.hparams(hparams)
        hamming_loss_value, model, fitted_model = create_fit_keras_model(hparams, version_data_control, optimizer_name, validation_method, callbacks)
        
        tf.summary.scalar(METRIC_ACCURACY, hamming_loss_value, step=2)
    
    return model, fitted_model

#### Python Cell no.5
--------------------------

#### Prior to fitting the model: 

* X_train, X_test should have the form of an array with sequence of numbers.
* y_train, y_test should have the form of a multi-hot encoded dataframe.

<b> General observations: </b>

* Reducing batch size can produce a better model (I should grid search on batch size).
* Reducing the general number of parameters can produce better results.
* Removing the second dense layer improved the results.
* Removing regularization also affected the results.

Everything is set. In the next coding cells the training is executed! <br>
<b>- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  </b>

In [None]:
if os.path.exists(os.path.join(os.getcwd(), "{0}".format(folder_path_model_saved))) is True:
    print("Folder already exists!\n")
else:
    print("Folder not found!\n")
    os.mkdir(os.path.join(os.getcwd(), "{0}".format(folder_path_model_saved)))
    print("Folder is created!\n")

In [None]:
"""
Model Training
"""
begin_time=time.time()
print("{0}: Start execution of the cell\n".format(datetime.utcnow().strftime(date_format)))

session_num = 1

for batch_size in HP_HIDDEN_UNITS.domain.values:
    for embedding_dim in HP_EMBEDDING_DIM.domain.values:
        for learning_rate in HP_LEARNING_RATE.domain.values:
            hparams = {
                HP_HIDDEN_UNITS: batch_size,
                HP_EMBEDDING_DIM: embedding_dim,
                HP_LEARNING_RATE: learning_rate,
              }
            run_name = "run-id {0}".format(session_num)
            total_number_models=(len(HP_HIDDEN_UNITS.domain.values)*len(HP_EMBEDDING_DIM.domain.values)*len(HP_LEARNING_RATE.domain.values))
            print('--- Starting trial: {0}/{1}\n'.format(run_name, total_number_models))
            print({h.name: hparams[h] for h in hparams}, '\n')

            starting_training=time.time()

            model_struture, model_history=run('{0}/'.format(hp_logging_directory) + run_name, 
                                              hparams, 
                                              version_data_control, 
                                              "sgd", 
                                              "validation_split",
                                              callback("step decay",
                                                        folder_path_model_saved, 
                                                        "{0}_{1}dim_{2}batchsize_{3}lr_{4}".format(saved_model_name,
                                                                                                  str(embedding_dim), 
                                                                                                  str(batch_size), 
                                                                                                  str(learning_rate), 
                                                                                                  version_data_control),
                                                        fit_parameters["patience_value"],
                                                        "{0}/".format(hp_logging_directory) + datetime.now().strftime("%Y%m%d-%H%M%S"), 
                                                        hparams))
            
            print("Average time per epoch: {0}\n".format(format_timespan((time.time()-starting_training)/len(model_history.epoch))))
            
            hist = pd.DataFrame(model_history.history)
            hist['epoch'] = model_history.epoch
            hist['epoch']+= 1
            hist.index += 1
            print("Table of training the {0} text classification model\n".format(sequential_model_name))
            print(tabulate(hist, headers='keys', tablefmt='psql'))

            hist.to_pickle(os.path.join(os.getcwd(), folder_path_model_saved+"/{0}_{1}dim_{2}batchsize_{3}lr_{4}.pkl".format(saved_metrics_dataframe_name,
                                                                                                                              str(embedding_dim), 
                                                                                                                              str(batch_size), 
                                                                                                                              str(learning_rate),
                                                                                                                              version_data_control)))

            #plot the model's model_metric (Hamming Loss, F1-score) & loss
            plot_keras_history(model_history.history, folder_path_model_saved, embedding_dim, batch_size, learning_rate, version_data_control)

            #evaluate the model
            model_evaluation = model_struture.evaluate([X_test_seq_actors, X_test_seq_plot, X_test_seq_features, X_test_seq_reviews, X_test_seq_title], 
                                                        y_test,
                                                        batch_size=batch_size,
                                                        verbose=2)

            print("\nTest Score (evalution of the model's loss/error on the test sequences): {0}".format(model_evaluation[0]))
            print("\nTest model_metric (evalution of the hamming loss on the test sequences): {0}\n".format(model_evaluation[1]))

            fig = plt.figure()
            ax = fig.add_subplot(111)
            
            plt.rcParams["figure.figsize"] = (16,13)
            
            ax.bar(genres_list, model_evaluation[3])
            
            ax.set_title('F1 score per genre tag')
            ax.set_xlabel('Movie Genre')
            ax.set_ylabel('F1 score')
            ax.set_xticklabels(genres_list)
            
            ax.spines['left'].set_color('white')
            ax.spines['bottom'].set_color('white')

            ax.tick_params(axis='x', colors='white')
            ax.tick_params(axis='y', colors='white')
            
            ax.yaxis.label.set_color('white')
            ax.xaxis.label.set_color('white')
            ax.title.set_color('white')

            plt.xticks(rotation=45, ha='right')

            for i, f1 in enumerate(model_evaluation[3]):
                ax.annotate(round(f1, 2), (i, f1), ha='center', va='bottom')
            
            plt.show()

            session_num += 1

total_time=time.time() - begin_time
print("{0}: Total cell execution time: {1}".format(datetime.utcnow().strftime(date_format), format_timespan(total_time)))