**Load Libs**

In [83]:
# Native python libs
import os
import math
from functools import lru_cache
from datetime import datetime
from typing import Any, Union, NoReturn

In [84]:
# pip installed libs
import numpy as np
import pandas as pd
import tensorflow as tf
import keras_tuner as kt
import sklearn
import kerastuner_tensorboard_logger as kt_logger

**Paths**

In [85]:
BASE_PATH = f"{os.path.abspath('')}"

**Kaggle**

In [86]:
KAGGLE = False

In [87]:
KAGGLE_PATH = "/kaggle" if KAGGLE else f"{BASE_PATH}\\kaggle"

In [88]:
def submission_path_exists() -> str:
    directory = f"{KAGGLE_PATH}\\working\\{datetime.now().strftime('%d%m%Y')}"
    if not os.path.exists(directory):
        os.mkdir(directory)
        print(f"Created new output directory for today at '{directory}'")
    return directory

In [89]:
INPUT_PATH = f"{KAGGLE_PATH}\\input\\goodreads-books-reviews-290312"
OUTPUT_PATH = submission_path_exists()
SUBMISSION_PATH = f"{OUTPUT_PATH}\\{datetime.now().strftime('%H%M%S')}_submission.csv"

**Tensorboard & General Monitoring**

In [90]:
TENSORBOARD_LOGS_PATH = f"{BASE_PATH}\\tensorboard_logs"
KERAS_TUNER_MONITOR_PATH = f"{OUTPUT_PATH}\\keras_tuner_monitoring"
MONITOR_PATH = f"{OUTPUT_PATH}\\monitoring.csv"

In [91]:
# Machine Learning tensorboard paths
TENSORBOARD_LOGS_PATH_ML = f"{TENSORBOARD_LOGS_PATH}\\ML"
LINEAR = f"{TENSORBOARD_LOGS_PATH_ML}\\Linear"
MLP = f"{TENSORBOARD_LOGS_PATH_ML}\\MLP"

In [92]:
# Deep Learning tensorboard paths
TENSORBOARD_LOGS_PATH_DL = f"{TENSORBOARD_LOGS_PATH}\\DL"
CNN = f"{TENSORBOARD_LOGS_PATH_DL}\\CNN"
RESNET = f"{TENSORBOARD_LOGS_PATH_DL}\\ResNet"
RNN = f"{TENSORBOARD_LOGS_PATH_DL}\\RNN"
SIMPLE_RNN = f"{TENSORBOARD_LOGS_PATH_DL}\\SimpleRNN"
TRANSFORMER = f"{TENSORBOARD_LOGS_PATH_DL}\\Transformer"

In [93]:
# Test if path is good
os.path.abspath(TENSORBOARD_LOGS_PATH)

'C:\\Users\\juanm\\OneDrive\\Bureau\\ESGI - Projets\\4IABD\\Projet Deep Learning\\tensorboard_logs'

**Hyperparameters**

In [94]:
# Fix
CLASSES = 6

In [95]:
# Adjustable
BATCH_SIZE = 1024  # Big batch size, small learning rate
VOCAB_SIZE = 20000
SEQUENCE_LENGTH = 256
EMBEDDING_DIMS = 128
EPOCHS = 100
TRIALS = 10

**Load Datasets**

In [96]:
train_dataset = pd.read_csv(f"{INPUT_PATH}\\goodreads_train.csv",
                            usecols=['review_text', 'rating'])

In [97]:
test_dataset = pd.read_csv(f"{INPUT_PATH}\\goodreads_test.csv",
                           usecols=['review_text'])

**GPU/TPU MultiThreading Setup**

In [98]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)

    strategy = tf.distribute.experimental.TPUStrategy
except ValueError:
    strategy = tf.distribute.get_strategy()
    print('Number of replicas:', strategy.num_replicas_in_sync)

Number of replicas: 1


In [99]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection
except ValueError:
    tpu = None
    gpus = tf.config.experimental.list_logical_devices("GPU")

In [100]:
if tpu:
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu, )
    print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])
elif len(gpus) > 1:
    strategy = tf.distribute.MultiWorkerMirroredStrategy([gpu.name for gpu in gpus])
    print('Running on multiple GPUs ', [gpu.name for gpu in gpus])
elif len(gpus) == 1:
    strategy = tf.distribute.get_strategy()
    print('Running on single GPU ', gpus[0].name)
else:
    strategy = tf.distribute.get_strategy()
    print('Running on CPU')
print("Number of accelerators: ", strategy.num_replicas_in_sync)

Running on single GPU  /device:GPU:0
Number of accelerators:  1


**NLP**

In [101]:
# Create a TextVectorization layer
vectorize_layer = tf.keras.layers.TextVectorization(max_tokens=VOCAB_SIZE,
                                                    standardize=None,
                                                    output_sequence_length=SEQUENCE_LENGTH,
                                                    output_mode='int')

In [102]:
%%time
with strategy.scope():
    vectorize_layer.adapt(train_dataset['review_text'], batch_size=BATCH_SIZE * strategy.num_replicas_in_sync)

CPU times: total: 20.2 s
Wall time: 46.7 s


In [103]:
vectorize_layer.get_vocabulary()[5]

'to'

In [104]:
def vectorize_text(text: Any, label: Any) -> Any:
    text = tf.expand_dims(text, -1)
    return vectorize_layer(text), label

**Creating Dataset For Models**

In [105]:
train_dataset, validation_dataset = sklearn.model_selection.train_test_split(train_dataset, test_size=0.2)

In [106]:
def dataset_from_raw_data(x: np.ndarray, y: np.ndarray, batch_size: int = BATCH_SIZE) -> Any:
    # Create dataset
    dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(batch_size)
    # Vectorize
    dataset = dataset.map(vectorize_text)
    print(dataset.element_spec)
    return dataset

In [107]:
train_dataset = dataset_from_raw_data(train_dataset['review_text'], train_dataset['rating'])

(TensorSpec(shape=(None, 256), dtype=tf.int64, name=None), TensorSpec(shape=(None,), dtype=tf.int64, name=None))


In [108]:
validation_dataset = dataset_from_raw_data(validation_dataset['review_text'], validation_dataset['rating'])

(TensorSpec(shape=(None, 256), dtype=tf.int64, name=None), TensorSpec(shape=(None,), dtype=tf.int64, name=None))


**Linear**

In [109]:
def linear(hp: kt.HyperParameters) -> tf.keras.Sequential:
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(VOCAB_SIZE + 1, EMBEDDING_DIMS),
        tf.keras.layers.GlobalAveragePooling1D(),
        tf.keras.layers.Dense(CLASSES, activation='sigmoid'),
    ])

    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=hp_learning_rate),
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  metrics=["accuracy"])
    return model

**MLP**

In [110]:
def mlp(hp: kt.HyperParameters) -> tf.keras.Sequential:
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Embedding(VOCAB_SIZE + 1, EMBEDDING_DIMS))
    model.add(tf.keras.layers.GlobalAveragePooling1D())
    model.add(tf.keras.layers.Dense(units=hp.Int('units_0', min_value=32, max_value=512, step=32),
                                    activation='relu'))
    model.add(tf.keras.layers.Dense(units=hp.Int('units_1', min_value=32, max_value=512, step=32),
                                    activation='relu'))
    model.add(tf.keras.layers.Dense(CLASSES, activation='sigmoid'))

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=["accuracy"])
    return model

**CNN**

In [111]:
def cnn(hp: kt.HyperParameters) -> tf.keras.Sequential:
    model = tf.keras.Sequential()
    model.add(tf.keras.Input(shape=(SEQUENCE_LENGTH,), dtype=tf.int32))
    model.add(tf.keras.layers.Embedding(VOCAB_SIZE + 1, EMBEDDING_DIMS))
    model.add(tf.keras.layers.Reshape((math.isqrt(SEQUENCE_LENGTH), math.isqrt(SEQUENCE_LENGTH), -1),
                                      input_shape=(None, SEQUENCE_LENGTH)))

    # Conv & pooling tf.keras.layers
    hp_filters_0 = hp.Int('filters_0', min_value=8, max_value=32, step=8)
    model.add(tf.keras.layers.Conv2D(filters=hp_filters_0, kernel_size=(3, 3), activation='tanh', padding='same'))
    model.add(tf.keras.layers.Conv2D(filters=hp_filters_0, kernel_size=(3, 3), activation='tanh', padding='same'))
    model.add(tf.keras.layers.MaxPool2D())
    hp_filters_1 = hp.Int('filters_1', min_value=16, max_value=64, step=16)
    model.add(tf.keras.layers.Conv2D(filters=hp_filters_1, kernel_size=(3, 3), activation='tanh', padding='same'))
    model.add(tf.keras.layers.Conv2D(filters=hp_filters_1, kernel_size=(3, 3), activation='tanh', padding='same'))
    model.add(tf.keras.layers.MaxPool2D())
    hp_filters_2 = hp.Int('filters_2', min_value=32, max_value=128, step=32)
    model.add(tf.keras.layers.Conv2D(filters=hp_filters_2, kernel_size=(3, 3), activation='tanh', padding='same'))
    model.add(tf.keras.layers.Conv2D(filters=hp_filters_2, kernel_size=(3, 3), activation='tanh', padding='same'))
    model.add(tf.keras.layers.MaxPool2D())
    hp_filters_3 = hp.Int('filters_3', min_value=64, max_value=256, step=64)
    model.add(tf.keras.layers.Conv2D(filters=hp_filters_3, kernel_size=(3, 3), activation='tanh', padding='same'))
    model.add(tf.keras.layers.Conv2D(filters=hp_filters_3, kernel_size=(3, 3), activation='tanh', padding='same'))
    model.add(tf.keras.layers.MaxPool2D())

    # Fully connected tf.keras.layers
    model.add(tf.keras.layers.Flatten())
    hp_units_0 = hp.Int('units_0', min_value=64, max_value=256, step=64)
    model.add(tf.keras.layers.Dense(units=hp_units_0, activation='relu'))
    hp_units_1 = hp.Int('units_1', min_value=32, max_value=128, step=32)
    model.add(tf.keras.layers.Dense(units=hp_units_1, activation='relu'))
    hp_units_2 = hp.Int('units_2', min_value=16, max_value=64, step=16)
    model.add(tf.keras.layers.Dense(units=hp_units_2, activation='relu'))
    model.add(tf.keras.layers.Dense(CLASSES, activation='softmax'))

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=["accuracy"])
    return model

**ResNet**

In [112]:
def residual_module(data,
                    filters,
                    stride,
                    reduce,
                    reg,
                    bn_eps,
                    bn_momentum):
    shortcut = 0
    bn_1 = tf.keras.layers.BatchNormalization(axis=-1,
                                              epsilon=bn_eps,
                                              momentum=bn_momentum)(data)
    act_1 = tf.keras.layers.ReLU()(bn_1)
    conv_1 = tf.keras.layers.Conv2D(filters=int(filters / 4.),
                                    kernel_size=(1, 1),
                                    use_bias=False,
                                    kernel_regularizer=tf.keras.regularizers.l2(reg))(act_1)
    bn_2 = tf.keras.layers.BatchNormalization(axis=-1,
                                              epsilon=bn_eps,
                                              momentum=bn_momentum)(conv_1)
    act_2 = tf.keras.layers.ReLU()(bn_2)
    conv_2 = tf.keras.layers.Conv2D(filters=int(filters / 4.),
                                    kernel_size=(3, 3),
                                    strides=stride,
                                    padding='same',
                                    use_bias=False,
                                    kernel_regularizer=tf.keras.regularizers.l2(reg))(act_2)
    bn_3 = tf.keras.layers.BatchNormalization(axis=-1,
                                              epsilon=bn_eps,
                                              momentum=bn_momentum)(conv_2)
    act_3 = tf.keras.layers.ReLU()(bn_3)
    conv_3 = tf.keras.layers.Conv2D(filters=filters,
                                    kernel_size=(1, 1),
                                    use_bias=False,
                                    kernel_regularizer=tf.keras.regularizers.l2(reg))(act_3)

    if reduce:
        shortcut = tf.keras.layers.Conv2D(filters=filters,
                                          kernel_size=(1, 1),
                                          strides=stride,
                                          use_bias=False,
                                          kernel_regularizer=tf.keras.regularizers.l2(reg))(act_1)

    x = tf.keras.layers.Add()([conv_3, shortcut])
    return x

In [113]:
def resnet(hp: kt.HyperParameters) -> tf.keras.Sequential:
    inputs = tf.keras.Input(shape=(SEQUENCE_LENGTH,))
    x = inputs
    x = tf.keras.layers.Embedding(VOCAB_SIZE + 1, EMBEDDING_DIMS)(x)
    x = tf.keras.layers.Reshape((SEQUENCE_LENGTH,))(x)
    x = tf.keras.layers.BatchNormalization(axis=-1,
                                           epsilon=bn_eps,
                                           momentum=bn_momentum)(inputs)
    x = tf.keras.layers.Conv2D(filters[0], (3, 3),
                               use_bias=False,
                               padding='same',
                               kernel_regularizer=tf.keras.regularizers.l2(reg))(x)
    for i in range(len(stages)):
        stride = (1, 1) if i == 0 else (2, 2)
        x = residual_module(data=x, filters=filters[i + 1], stride=stride,
                            reduce=True, bn_eps=bn_eps, bn_momentum=bn_momentum)
        for j in range(stages[i] - 1):
            x = residual_module(data=x,
                                filters=filters[i + 1],
                                stride=(1, 1),
                                bn_eps=bn_eps,
                                bn_momentum=bn_momentum)
    x = tf.keras.layers.BatchNormalization(axis=-1,
                                           epsilon=bn_eps,
                                           momentum=bn_momentum)(x)
    x = tf.keras.layers.ReLU()(x)
    x = tf.keras.layers.AveragePooling2D((8, 8))(x)
    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(CLASSES, activation="softmax", kernel_regularizer=tf.keras.regularizers.l2(reg))(x)
    model = tf.keras.Model(inputs, x)

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=["accuracy"])
    return model

**RNN**

In [114]:
def rnn(hp: kt.HyperParameters) -> tf.keras.Sequential:
    inputs = tf.keras.Input(shape=(SEQUENCE_LENGTH,), dtype=tf.int32)
    x = tf.keras.layers.Embedding(VOCAB_SIZE + 1, EMBEDDING_DIMS)(inputs)
    for i in range(hp.Int('num_layers', min_value=1, max_value=3, step=1)):
        x = tf.keras.layers.Bidirectional(
            tf.keras.layers.LSTM(hp.Int('units_' + str(i), min_value=32, max_value=512, step=1),
                                 return_sequences=True))(x)
    x = tf.keras.layers.Bidirectional(
        tf.keras.layers.LSTM(hp.Int('lstm_units', min_value=16, max_value=64, step=1),
                             return_sequences=False))(x)
    x = tf.keras.layers.Dense(hp.Int('dense_units', min_value=16, max_value=64, step=1),
                                  activation='relu')(x)
    x = tf.keras.layers.Dense(CLASSES, activation='softmax')(x)
    model = tf.keras.Model(inputs, x)

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=["accuracy"])
    return model

In [115]:
def rnn_test() -> tf.keras.Sequential:
    inputs = tf.keras.Input(shape=(SEQUENCE_LENGTH,), dtype=tf.int32)
    x = tf.keras.layers.Embedding(VOCAB_SIZE + 1, EMBEDDING_DIMS)(inputs)
    for i in range(2):
        x = tf.keras.layers.Bidirectional(
            tf.keras.layers.LSTM(32, return_sequences=True))(x)
    x = tf.keras.layers.Bidirectional(
        tf.keras.layers.LSTM(16, return_sequences=False))(x)
    x = tf.keras.layers.Dense(16, activation='relu')(x)
    outputs = tf.keras.layers.Dense(CLASSES, activation='softmax')(x)
    model = tf.keras.Model(inputs, outputs)

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=1e-2),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=["accuracy"])
    return model

**Transformer**

In [35]:
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Normalization and Attention
    x = tf.keras.layers.LayerNormalization(epsilon=1e-6)(inputs)
    x = tf.keras.layers.MultiHeadAttention(key_dim=head_size, num_heads=num_heads, dropout=dropout)(x, x)
    x = tf.keras.layers.Dropout(dropout)(x)
    res = x + inputs

    # Feed Forward Part
    x = tf.keras.layers.LayerNormalization(epsilon=1e-6)(res)
    x = tf.keras.layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(x)
    x = tf.keras.layers.Dropout(dropout)(x)
    x = tf.keras.layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    return x + res

In [37]:
def transformer(hp: kt.HyperParameters) -> tf.keras.Sequential:
    inputs = tf.keras.Input(shape=(SEQUENCE_LENGTH,))
    x = inputs
    x = tf.keras.layers.Embedding(VOCAB_SIZE + 1, EMBEDDING_DIMS)(x)
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)
    x = tf.keras.layers.GlobalAveragePooling1D(data_format="channels_first")(x)
    for dim in mlp_units:
        x = tf.keras.layers.Dense(dim, activation="relu")(x)
        x = tf.keras.layers.Dropout(mlp_dropout)(x)
    outputs = tf.keras.layers.Dense(CLASSES, activation="softmax")(x)
    model = tf.keras.Model(inputs, outputs)

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=["accuracy"])
    return model

In [124]:
def transformer_test() -> tf.keras.Sequential:
    inputs = tf.keras.Input(shape=(SEQUENCE_LENGTH,))
    x = tf.keras.layers.Embedding(VOCAB_SIZE + 1, EMBEDDING_DIMS)(inputs)
    for _ in range(2):
        x = transformer_encoder(x, 32, 2, 32, 0.1)
    x = tf.keras.layers.GlobalAveragePooling1D(data_format="channels_first")(x)
    for dim in [32]:
        x = tf.keras.layers.Dense(dim, activation="relu")(x)
        x = tf.keras.layers.Dropout(0.1)(x)
    outputs = tf.keras.layers.Dense(CLASSES, activation="softmax")(x)
    model = tf.keras.Model(inputs, outputs)

    model.compile(
        optimizer=tf.keras.optimizers.Adam(1e-2),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=["accuracy"])
    return model

In [125]:
model = transformer_test()
model.summary()
with strategy.scope():
    model.fit(train_dataset, epochs=10, validation_data=validation_dataset)

Model: "model_5"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_6 (InputLayer)           [(None, 256)]        0           []                               
                                                                                                  
 embedding_5 (Embedding)        (None, 256, 128)     2560128     ['input_6[0][0]']                
                                                                                                  
 layer_normalization_14 (LayerN  (None, 256, 128)    256         ['embedding_5[0][0]']            
 ormalization)                                                                                    
                                                                                                  
 multi_head_attention_7 (MultiH  (None, 256, 128)    33088       ['layer_normalization_14[0]

ResourceExhaustedError: Graph execution error:

Detected at node 'model_5/dropout_19/dropout/Mul' defined at (most recent call last):
    File "C:\Users\juanm\.conda\envs\DeepLearning4IABD\lib\runpy.py", line 196, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "C:\Users\juanm\.conda\envs\DeepLearning4IABD\lib\runpy.py", line 86, in _run_code
      exec(code, run_globals)
    File "C:\Users\juanm\.conda\envs\DeepLearning4IABD\lib\site-packages\ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "C:\Users\juanm\.conda\envs\DeepLearning4IABD\lib\site-packages\traitlets\config\application.py", line 846, in launch_instance
      app.start()
    File "C:\Users\juanm\.conda\envs\DeepLearning4IABD\lib\site-packages\ipykernel\kernelapp.py", line 712, in start
      self.io_loop.start()
    File "C:\Users\juanm\.conda\envs\DeepLearning4IABD\lib\site-packages\tornado\platform\asyncio.py", line 215, in start
      self.asyncio_loop.run_forever()
    File "C:\Users\juanm\.conda\envs\DeepLearning4IABD\lib\asyncio\base_events.py", line 600, in run_forever
      self._run_once()
    File "C:\Users\juanm\.conda\envs\DeepLearning4IABD\lib\asyncio\base_events.py", line 1896, in _run_once
      handle._run()
    File "C:\Users\juanm\.conda\envs\DeepLearning4IABD\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "C:\Users\juanm\.conda\envs\DeepLearning4IABD\lib\site-packages\ipykernel\kernelbase.py", line 510, in dispatch_queue
      await self.process_one()
    File "C:\Users\juanm\.conda\envs\DeepLearning4IABD\lib\site-packages\ipykernel\kernelbase.py", line 499, in process_one
      await dispatch(*args)
    File "C:\Users\juanm\.conda\envs\DeepLearning4IABD\lib\site-packages\ipykernel\kernelbase.py", line 406, in dispatch_shell
      await result
    File "C:\Users\juanm\.conda\envs\DeepLearning4IABD\lib\site-packages\ipykernel\kernelbase.py", line 730, in execute_request
      reply_content = await reply_content
    File "C:\Users\juanm\.conda\envs\DeepLearning4IABD\lib\site-packages\ipykernel\ipkernel.py", line 383, in do_execute
      res = shell.run_cell(
    File "C:\Users\juanm\.conda\envs\DeepLearning4IABD\lib\site-packages\ipykernel\zmqshell.py", line 528, in run_cell
      return super().run_cell(*args, **kwargs)
    File "C:\Users\juanm\.conda\envs\DeepLearning4IABD\lib\site-packages\IPython\core\interactiveshell.py", line 2881, in run_cell
      result = self._run_cell(
    File "C:\Users\juanm\.conda\envs\DeepLearning4IABD\lib\site-packages\IPython\core\interactiveshell.py", line 2936, in _run_cell
      return runner(coro)
    File "C:\Users\juanm\.conda\envs\DeepLearning4IABD\lib\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "C:\Users\juanm\.conda\envs\DeepLearning4IABD\lib\site-packages\IPython\core\interactiveshell.py", line 3135, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "C:\Users\juanm\.conda\envs\DeepLearning4IABD\lib\site-packages\IPython\core\interactiveshell.py", line 3338, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "C:\Users\juanm\.conda\envs\DeepLearning4IABD\lib\site-packages\IPython\core\interactiveshell.py", line 3398, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\juanm\AppData\Local\Temp\ipykernel_45800\3957619528.py", line 4, in <cell line: 3>
      model.fit(train_dataset, epochs=10, validation_data=validation_dataset)
    File "C:\Users\juanm\.conda\envs\DeepLearning4IABD\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\juanm\.conda\envs\DeepLearning4IABD\lib\site-packages\keras\engine\training.py", line 1564, in fit
      tmp_logs = self.train_function(iterator)
    File "C:\Users\juanm\.conda\envs\DeepLearning4IABD\lib\site-packages\keras\engine\training.py", line 1160, in train_function
      return step_function(self, iterator)
    File "C:\Users\juanm\.conda\envs\DeepLearning4IABD\lib\site-packages\keras\engine\training.py", line 1146, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\juanm\.conda\envs\DeepLearning4IABD\lib\site-packages\keras\engine\training.py", line 1135, in run_step
      outputs = model.train_step(data)
    File "C:\Users\juanm\.conda\envs\DeepLearning4IABD\lib\site-packages\keras\engine\training.py", line 993, in train_step
      y_pred = self(x, training=True)
    File "C:\Users\juanm\.conda\envs\DeepLearning4IABD\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\juanm\.conda\envs\DeepLearning4IABD\lib\site-packages\keras\engine\training.py", line 557, in __call__
      return super().__call__(*args, **kwargs)
    File "C:\Users\juanm\.conda\envs\DeepLearning4IABD\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\juanm\.conda\envs\DeepLearning4IABD\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "C:\Users\juanm\.conda\envs\DeepLearning4IABD\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\juanm\.conda\envs\DeepLearning4IABD\lib\site-packages\keras\engine\functional.py", line 510, in call
      return self._run_internal_graph(inputs, training=training, mask=mask)
    File "C:\Users\juanm\.conda\envs\DeepLearning4IABD\lib\site-packages\keras\engine\functional.py", line 667, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "C:\Users\juanm\.conda\envs\DeepLearning4IABD\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\juanm\.conda\envs\DeepLearning4IABD\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "C:\Users\juanm\.conda\envs\DeepLearning4IABD\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\juanm\.conda\envs\DeepLearning4IABD\lib\site-packages\keras\layers\regularization\dropout.py", line 116, in call
      output = control_flow_util.smart_cond(
    File "C:\Users\juanm\.conda\envs\DeepLearning4IABD\lib\site-packages\keras\utils\control_flow_util.py", line 108, in smart_cond
      return tf.__internal__.smart_cond.smart_cond(
    File "C:\Users\juanm\.conda\envs\DeepLearning4IABD\lib\site-packages\keras\layers\regularization\dropout.py", line 112, in dropped_inputs
      return self._random_generator.dropout(
    File "C:\Users\juanm\.conda\envs\DeepLearning4IABD\lib\site-packages\keras\backend.py", line 2162, in dropout
      return tf.nn.dropout(
Node: 'model_5/dropout_19/dropout/Mul'
failed to allocate memory
	 [[{{node model_5/dropout_19/dropout/Mul}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:__inference_train_function_97170]

**Utilitary For Monitoring**

In [59]:
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

In [39]:
def tensorboard_logs(model_name: str) -> tf.keras.callbacks.TensorBoard:
    return tf.keras.callbacks.TensorBoard(f"{globals()[model_name.upper()]}"
                                          f"_BS_{BATCH_SIZE}"
                                          f"_MAXFEAT_{VOCAB_SIZE}"
                                          f"_EMBEDDING_{EMBEDDING_DIMS}"
                                          f"_SEQLEN_{SEQUENCE_LENGTH}"
                                          f"_EPOCHS_{EPOCHS}"
                                          f"_TRIALS_{TRIALS}")

**Training & Hyperparameter Optimization**

In [60]:
def optimizer_choice(model: tf.keras.Sequential, model_name: str, optimizer: str):
    if optimizer == "RandomSearch":
        tuner = kt.RandomSearch(model,
                                objective=kt.Objective('val_accuracy', direction='max'),
                                max_trials=TRIALS,
                                overwrite=True,
                                project_name=f"{OUTPUT_PATH}\\{model_name}_tuner",
                                directory=f"{KERAS_TUNER_MONITOR_PATH}_{model_name}")
    elif optimizer == "BayesianOptimization":
        tuner = kt.BayesianOptimization(model,
                                        objective=kt.Objective('val_accuracy', direction='max'),
                                        max_trials=TRIALS,
                                        overwrite=True,
                                        project_name=f"{OUTPUT_PATH}\\{model_name}_tuner",
                                        directory=f"{KERAS_TUNER_MONITOR_PATH}_{model_name}")
    elif optimizer == "Hyperband":
        tuner = kt.Hyperband(model,
                             objective=kt.Objective('val_accuracy', direction='max'),
                             max_epochs=TRIALS,
                             overwrite=True,
                             project_name=f"{OUTPUT_PATH}\\{model_name}_tuner",
                             logger=kt_logger.TensorBoardLogger(metrics=['val_accuracy'],
                                                                logdir=f"{KERAS_TUNER_MONITOR_PATH}_{model_name}"))
    else:
        raise ValueError("optimizer_choice must be 0, 1 or 2")

    return tuner

In [61]:
def hp_optimization_and_training(model: Any, optimizer: str = "Hyperband") -> NoReturn:
    model_name = model.__name__
    with strategy.scope():
        tuner = optimizer_choice(model, model_name, optimizer=optimizer)

        # Search for best hyperparameters
        tuner.search(train_dataset,
                     epochs=EPOCHS,
                     validation_data=validation_dataset,
                     callbacks=[stop_early,
                                tensorboard_logs(model_name)])
        # Get the optimal hyperparameters
        best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
        print(best_hps)

        # Build model with optimal hyperparameters
        model = tuner.hypermodel.build(best_hps)
        history = model.fit(train_dataset,
                            epochs=EPOCHS,
                            validation_data=validation_dataset,
                            callbacks=[stop_early,
                                       tensorboard_logs(model_name)])
        val_acc_per_epoch = history.history['val_accuracy']
        best_epoch = val_acc_per_epoch.index(max(val_acc_per_epoch)) + 1
        print(f"best_epoch : {best_epoch}")

        hypermodel = tuner.hypermodel.build(best_hps)
        # Retrain the model with epoch with highest accuracy value
        hypermodel.fit(train_dataset,
                       epochs=best_epoch,
                       callbacks=[stop_early,
                                  tensorboard_logs(model_name)])

        eval_result = hypermodel.evaluate(validation_dataset)

        hypermodel.save(f"{OUTPUT_PATH}\\"
                        f"{model_name}"
                        f"_loss_{eval_result[0]}"
                        f"_acc_{eval_result[1]}"
                        f"_best_epoch_{best_epoch}")

**Evaluation**

In [None]:
models = [dir for root, dirs, files in os.walk(f'{KAGGLE_PATH}/working') for dir in dirs if "acc" in dir]
sort_models_per_acc = sorted(models, key=lambda x: float(x[x.find('_acc_') + 5:]), reverse=True)
sort_models_per_loss = sorted(models, key=lambda x: float(x[x.find('_loss_') + 6:x.find('_acc_')]))
print(sort_models_per_acc)
print(sort_models_per_loss)

In [None]:
best_model = tf.keras.models.load_model(f"{KAGGLE_PATH}/working/{sort_models_per_acc[0]}")

**Submission**

In [None]:
submission = pd.DataFrame()
submission['review_id'] = [data.decode("utf-8") for data in test_dataset['review_id']]
submission['rating'] = best_model.predict(test_dataset)

In [None]:
submission.to_csv(SUBMISSION_PATH, index=False)
print(f"Submission registered at {SUBMISSION_PATH}")