**Load Libs**

In [271]:
# Native python libs
import os
import math
from functools import lru_cache
from datetime import datetime
from typing import Any, Union, NoReturn

In [272]:
# pip installed libs
import numpy as np
import pandas as pd
import tensorflow as tf
import keras_tuner as kt
import sklearn
import kerastuner_tensorboard_logger as kt_logger

**Paths**

In [273]:
BASE_PATH = f"{os.path.abspath('')}"

**Kaggle**

In [274]:
KAGGLE = False

In [275]:
KAGGLE_PATH = "/kaggle" if KAGGLE else f"{BASE_PATH}\\kaggle"

In [276]:
def submission_path_exists() -> str:
    directory = f"{KAGGLE_PATH}\\working\\{datetime.now().strftime('%d%m%Y')}"
    if not os.path.exists(directory):
        os.mkdir(directory)
        print(f"Created new output directory for today at '{directory}'")
    return directory

In [277]:
INPUT_PATH = f"{KAGGLE_PATH}\\input\\goodreads-books-reviews-290312"
OUTPUT_PATH = submission_path_exists()
SUBMISSION_PATH = f"{OUTPUT_PATH}\\{datetime.now().strftime('%H%M%S')}_submission.csv"

**Tensorboard & General Monitoring**

In [278]:
TENSORBOARD_LOGS_PATH = f"{BASE_PATH}\\tensorboard_logs"
KERAS_TUNER_MONITOR_PATH = f"{OUTPUT_PATH}\\keras_tuner_monitoring"
MONITOR_PATH = f"{OUTPUT_PATH}\\monitoring.csv"

In [279]:
# Machine Learning tensorboard paths
TENSORBOARD_LOGS_PATH_ML = f"{TENSORBOARD_LOGS_PATH}\\ML"
LINEAR = f"{TENSORBOARD_LOGS_PATH_ML}\\Linear"
MLP = f"{TENSORBOARD_LOGS_PATH_ML}\\MLP"

In [280]:
# Deep Learning tensorboard paths
TENSORBOARD_LOGS_PATH_DL = f"{TENSORBOARD_LOGS_PATH}\\DL"
CNN = f"{TENSORBOARD_LOGS_PATH_DL}\\CNN"
RESNET = f"{TENSORBOARD_LOGS_PATH_DL}\\ResNet"
RNN = f"{TENSORBOARD_LOGS_PATH_DL}\\RNN"
TRANSFORMER = f"{TENSORBOARD_LOGS_PATH_DL}\\Transformer"

In [281]:
# Test if path is good
os.path.abspath(TENSORBOARD_LOGS_PATH)

'C:\\Users\\juanm\\OneDrive\\Bureau\\ESGI - Projets\\4IABD\\Projet Deep Learning\\tensorboard_logs'

**Hyperparameters**

In [282]:
# Fix
CLASSES = 6

In [283]:
# Adjustable
BATCH_SIZE = 1024  # Big batch size, small learning rate
VOCAB_SIZE = 20000
SEQUENCE_LENGTH = 256
EMBEDDING_DIMS = 128
EPOCHS = 100

**Load Datasets**

In [284]:
% % time
train_dataset = pd.read_csv(f"{INPUT_PATH}\\goodreads_train.csv",
                            usecols=['review_text', 'rating'])

CPU times: total: 2.55 s
Wall time: 6.37 s


In [285]:
test_dataset = pd.read_csv(f"{INPUT_PATH}\\goodreads_test.csv",
                           usecols=['review_text'])

**GPU/TPU MultiThreading Setup**

In [286]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)

    strategy = tf.distribute.experimental.TPUStrategy
except ValueError:
    strategy = tf.distribute.get_strategy()
    print('Number of replicas:', strategy.num_replicas_in_sync)

Number of replicas: 1


In [287]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection
except ValueError:
    tpu = None
    gpus = tf.config.experimental.list_logical_devices("GPU")

In [288]:
if tpu:
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu, )
    print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])
elif len(gpus) > 1:
    strategy = tf.distribute.MultiWorkerMirroredStrategy([gpu.name for gpu in gpus])
    print('Running on multiple GPUs ', [gpu.name for gpu in gpus])
elif len(gpus) == 1:
    strategy = tf.distribute.get_strategy()
    print('Running on single GPU ', gpus[0].name)
else:
    strategy = tf.distribute.get_strategy()
    print('Running on CPU')
print("Number of accelerators: ", strategy.num_replicas_in_sync)

Running on single GPU  /device:GPU:0
Number of accelerators:  1


**NLP**

In [289]:
# Create a TextVectorization layer
vectorize_layer = tf.keras.layers.TextVectorization(standardize=None,
                                                    output_sequence_length=SEQUENCE_LENGTH,
                                                    output_mode='int')

In [290]:
% % time
with strategy.scope():
    vectorize_layer.adapt(train_dataset['review_text'], batch_size=BATCH_SIZE * strategy.num_replicas_in_sync)

CPU times: total: 16.5 s
Wall time: 46.8 s


In [291]:
vectorize_layer.get_vocabulary()

['',
 '[UNK]',
 'the',
 'and',
 'I',
 'to',
 'a',
 'of',
 'is',
 'was',
 'in',
 'that',
 'it',
 'this',
 'for',
 'but',
 'with',
 'book',
 'her',
 'as',
 'The',
 'so',
 'not',
 'she',
 'have',
 'be',
 'on',
 'you',
 'like',
 'just',
 'my',
 'about',
 'are',
 'really',
 'he',
 'me',
 'at',
 'his',
 'read',
 'all',
 'one',
 'more',
 'from',
 'they',
 'what',
 'an',
 'story',
 'love',
 'has',
 'had',
 'how',
 'It',
 'This',
 'who',
 'because',
 'out',
 'up',
 'or',
 'by',
 'when',
 'were',
 "I'm",
 'would',
 'their',
 'much',
 'some',
 'get',
 "didn't",
 'very',
 'if',
 '-',
 'there',
 'characters',
 'will',
 'into',
 'can',
 "it's",
 'even',
 'think',
 'first',
 'And',
 "don't",
 'than',
 'But',
 'know',
 'book.',
 'also',
 'been',
 'it.',
 'other',
 'loved',
 'only',
 'good',
 'reading',
 'time',
 'see',
 'did',
 'way',
 'we',
 'him',
 'could',
 'them',
 'no',
 'which',
 'little',
 'do',
 'still',
 'going',
 'being',
 'books',
 'She',
 'things',
 'too',
 'felt',
 "It's",
 'made',
 'feel

In [292]:
def vectorize_text(text: Any, label: Any) -> Any:
    text = tf.expand_dims(text, -1)
    return vectorize_layer(text), label

**Creating Dataset For Models**

In [293]:
train_dataset, validation_dataset = sklearn.model_selection.train_test_split(train_dataset, test_size=0.2)

In [294]:
def dataset_from_raw_data(x: np.ndarray, y: np.ndarray, batch_size: int = BATCH_SIZE) -> Any:
    # Create dataset
    dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(batch_size)
    # Vectorize
    dataset = dataset.map(vectorize_text)
    print(dataset.element_spec)
    return dataset

In [295]:
train_dataset = dataset_from_raw_data(train_dataset['review_text'], train_dataset['rating'])

(TensorSpec(shape=(None, 256), dtype=tf.int64, name=None), TensorSpec(shape=(None,), dtype=tf.int64, name=None))


In [296]:
validation_dataset = dataset_from_raw_data(validation_dataset['review_text'], validation_dataset['rating'])

(TensorSpec(shape=(None, 256), dtype=tf.int64, name=None), TensorSpec(shape=(None,), dtype=tf.int64, name=None))


**Linear**

In [297]:
def linear(hp: kt.HyperParameters) -> tf.keras.Sequential:
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(VOCAB_SIZE + 1, EMBEDDING_DIMS),
        tf.keras.layers.GlobalAveragePooling1D(),
        tf.keras.layers.Dense(CLASSES, activation='sigmoid'),
    ])

    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=hp_learning_rate),
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  metrics=["accuracy"])
    return model

**MLP**

In [319]:
def mlp(hp: kt.HyperParameters) -> tf.keras.Sequential:
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Embedding(VOCAB_SIZE + 1, EMBEDDING_DIMS))
    model.add(tf.keras.layers.GlobalAveragePooling1D())
    hp_units_0 = hp.Int('units_0', min_value=32, max_value=512, step=32)
    model.add(tf.keras.layers.Dense(units=hp_units_0, activation='relu'))
    hp_units_1 = hp.Int('units_1', min_value=32, max_value=512, step=32)
    model.add(tf.keras.layers.Dense(units=hp_units_1, activation='relu'))
    model.add(tf.keras.layers.Dense(CLASSES, activation='sigmoid'))

    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=hp_learning_rate),
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  metrics=["accuracy"])
    return model

**CNN**

In [299]:
def cnn(hp: kt.HyperParameters) -> tf.keras.Sequential:
    model = tf.keras.Sequential()
    model.add(tf.keras.Input(shape=(SEQUENCE_LENGTH,), dtype=tf.int32))
    model.add(tf.keras.layers.Embedding(VOCAB_SIZE + 1, EMBEDDING_DIMS))
    model.add(tf.keras.layers.Reshape((math.isqrt(SEQUENCE_LENGTH), math.isqrt(SEQUENCE_LENGTH), -1),
                                      input_shape=(None, SEQUENCE_LENGTH)))

    # Conv & pooling tf.keras.layers
    hp_filters_0 = hp.Int('filters_0', min_value=8, max_value=32, step=8)
    model.add(tf.keras.layers.Conv2D(filters=hp_filters_0, kernel_size=(3, 3), activation='tanh', padding='same'))
    model.add(tf.keras.layers.Conv2D(filters=hp_filters_0, kernel_size=(3, 3), activation='tanh', padding='same'))
    model.add(tf.keras.layers.MaxPool2D())
    hp_filters_1 = hp.Int('filters_1', min_value=16, max_value=64, step=16)
    model.add(tf.keras.layers.Conv2D(filters=hp_filters_1, kernel_size=(3, 3), activation='tanh', padding='same'))
    model.add(tf.keras.layers.Conv2D(filters=hp_filters_1, kernel_size=(3, 3), activation='tanh', padding='same'))
    model.add(tf.keras.layers.MaxPool2D())
    hp_filters_2 = hp.Int('filters_2', min_value=32, max_value=128, step=32)
    model.add(tf.keras.layers.Conv2D(filters=hp_filters_2, kernel_size=(3, 3), activation='tanh', padding='same'))
    model.add(tf.keras.layers.Conv2D(filters=hp_filters_2, kernel_size=(3, 3), activation='tanh', padding='same'))
    model.add(tf.keras.layers.MaxPool2D())
    hp_filters_3 = hp.Int('filters_3', min_value=64, max_value=256, step=64)
    model.add(tf.keras.layers.Conv2D(filters=hp_filters_3, kernel_size=(3, 3), activation='tanh', padding='same'))
    model.add(tf.keras.layers.Conv2D(filters=hp_filters_3, kernel_size=(3, 3), activation='tanh', padding='same'))
    model.add(tf.keras.layers.MaxPool2D())

    # Fully connected tf.keras.layers
    model.add(tf.keras.layers.Flatten())
    hp_units_0 = hp.Int('units_0', min_value=64, max_value=256, step=64)
    model.add(tf.keras.layers.Dense(units=hp_units_0, activation='relu'))
    hp_units_1 = hp.Int('units_1', min_value=32, max_value=128, step=32)
    model.add(tf.keras.layers.Dense(units=hp_units_1, activation='relu'))
    hp_units_2 = hp.Int('units_2', min_value=16, max_value=64, step=16)
    model.add(tf.keras.layers.Dense(units=hp_units_2, activation='relu'))
    model.add(tf.keras.layers.Dense(CLASSES, activation='softmax'))

    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=hp_learning_rate),
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  metrics=["accuracy"])
    return model

**ResNet**

In [None]:
def residual_module(data,
                    filters,
                    stride,
                    reduce=False,
                    reg=0.0001,
                    bn_eps=2e-5,
                    bn_momentum=0.9):
    bn_1 = tf.keras.layers.BatchNormalization(axis=-1,
                                              epsilon=bn_eps,
                                              momentum=bn_momentum)(data)
    act_1 = tf.keras.layers.ReLU()(bn_1)
    conv_1 = tf.keras.layers.Conv2D(filters=int(filters / 4.),
                                    kernel_size=(1, 1),
                                    use_bias=False,
                                    kernel_regularizer=tf.keras.regularizers.l2(reg))(act_1)
    bn_2 = tf.keras.layers.BatchNormalization(axis=-1,
                                              epsilon=bn_eps,
                                              momentum=bn_momentum)(conv_1)
    act_2 = tf.keras.layers.ReLU()(bn_2)
    conv_2 = tf.keras.layers.Conv2D(filters=int(filters / 4.),
                                    kernel_size=(3, 3),
                                    strides=stride,
                                    padding='same',
                                    use_bias=False,
                                    kernel_regularizer=tf.keras.regularizers.l2(reg))(act_2)
    bn_3 = tf.keras.layers.BatchNormalization(axis=-1,
                                              epsilon=bn_eps,
                                              momentum=bn_momentum)(conv_2)
    act_3 = tf.keras.layers.ReLU()(bn_3)
    conv_3 = tf.keras.layers.Conv2D(filters=filters,
                                    kernel_size=(1, 1),
                                    use_bias=False,
                                    kernel_regularizer=tf.keras.regularizers.l2(reg))(act_3)

    if reduce:
        shortcut = tf.keras.layers.Conv2D(filters=filters,
                                          kernel_size=(1, 1),
                                          strides=stride,
                                          use_bias=False,
                                          kernel_regularizer=tf.keras.regularizers.l2(reg))(act_1)

    x = tf.keras.layers.Add()([conv_3, shortcut])
    return x

In [None]:
def build_resnet(stages,
                 filters,
                 reg,
                 bn_eps,
                 bn_momentum):
    inputs = tf.keras.Input(shape=(SEQUENCE_LENGTH,))
    x = inputs
    x = tf.keras.layers.Embedding(VOCAB_SIZE + 1, EMBEDDING_DIMS)(x)
    x = tf.keras.layers.Reshape((SEQUENCE_LENGTH, 1))(x)
    x = tf.keras.layers.BatchNormalization(axis=-1,
                                           epsilon=bn_eps,
                                           momentum=bn_momentum)(inputs)
    x = tf.keras.layers.Conv2D(filters[0], (3, 3),
                               use_bias=False,
                               padding='same',
                               kernel_regularizer=tf.keras.regularizers.l2(reg))(x)
    for i in range(len(stages)):
        stride = (1, 1) if i == 0 else (2, 2)
        x = residual_module(data=x, filters=filters[i + 1], stride=stride,
                            reduce=True, bn_eps=bn_eps, bn_momentum=bn_momentum)
        for j in range(stages[i] - 1):
            x = residual_module(data=x,
                                filters=filters[i + 1],
                                stride=(1, 1),
                                bn_eps=bn_eps,
                                bn_momentum=bn_momentum)
    x = tf.keras.layers.BatchNormalization(axis=-1,
                                           epsilon=bn_eps,
                                           momentum=bn_momentum)(x)
    x = tf.keras.layers.ReLU()(x)
    x = tf.keras.layers.AveragePooling2D((8, 8))(x)
    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(CLASSES, activation="softmax", kernel_regularizer=tf.keras.regularizers.l2(reg))(x)
    return tf.keras.Model(inputs, x)

In [300]:
def resnet(hp: kt.HyperParameters) -> tf.keras.Sequential:
    model = build_resnet(stages=hp.Choice('stages', values=[[3, 4, 6, 3], [2, 2, 2, 2], [1, 1, 1, 1]]),
                         filters=hp.Choice('filters', values=[[64, 64, 128, 256, 512], [32, 32, 64, 128, 256]]),
                         reg=hp.Float('reg', min_value=1e-4, max_value=1e-2, sampling='LOG', default=1e-3),
                         bn_eps=hp.Float('bn_eps', min_value=1e-5, max_value=1e-3, sampling='LOG', default=2e-5),
                         bn_momentum=hp.Float('bn_momentum', min_value=0.8, max_value=0.99, sampling='LOG', default=0.9))
    return model

**RNN**

In [301]:
def rnn(hp: kt.HyperParameters) -> tf.keras.Sequential:
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Embedding(VOCAB_SIZE + 1, EMBEDDING_DIMS))
    hp_units_0 = hp.Int('units_0', min_value=32, max_value=512, step=32)
    hp_dropout = hp.Float('dropout', min_value=0.0, max_value=0.5, step=0.1)
    model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(hp_units_0, dropout=hp_dropout,
                                                                 recurrent_dropout=hp_dropout, return_sequences=True)))
    hp_units_1 = hp.Int('units_1', min_value=32, max_value=512, step=32)
    model.add(tf.keras.layers.Dense(hp_units_1, activation='sigmoid'))
    model.add(tf.keras.layers.Dense(CLASSES, activation='sigmoid'))

    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=hp_learning_rate),
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  metrics=["accuracy"])
    return model

**Simpler RNN**

In [302]:
def simple_rnn(hp: kt.HyperParameters) -> tf.keras.Sequential:
    model = tf.keras.Sequential()
    model.add(tf.keras.Input(shape=(SEQUENCE_LENGTH,), dtype=tf.int32))
    model.add(tf.keras.layers.Embedding(VOCAB_SIZE + 1, EMBEDDING_DIMS))
    hp_units = hp.Int('units', min_value=32, max_value=512, step=32)
    model.add(tf.keras.layers.SimpleRNN(hp_units))
    model.add(tf.keras.layers.Dense(CLASSES, activation='sigmoid'))

    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=hp_learning_rate),
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  metrics=["accuracy"])
    return model

**Transformer**

In [303]:
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Normalization and Attention
    x = tf.keras.layers.LayerNormalization(epsilon=1e-6)(inputs)
    x = tf.keras.layers.MultiHeadAttention(key_dim=head_size, num_heads=num_heads, dropout=dropout)(x, x)
    x = tf.keras.layers.Dropout(dropout)(x)
    res = x + inputs

    # Feed Forward Part
    x = tf.keras.layers.LayerNormalization(epsilon=1e-6)(res)
    x = tf.keras.layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(x)
    x = tf.keras.layers.Dropout(dropout)(x)
    x = tf.keras.layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    return x + res

In [304]:
def build_model(head_size: int, num_heads: int, ff_dim: int, num_transformer_blocks: int, mlp_units: list[int], dropout: float, mlp_dropout: float):
    inputs = tf.keras.Input(shape=(SEQUENCE_LENGTH,))
    x = inputs
    x = tf.keras.layers.Embedding(VOCAB_SIZE + 1, EMBEDDING_DIMS)(x)
    x = tf.keras.layers.Reshape((SEQUENCE_LENGTH, 1))(x)
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)
    x = tf.keras.layers.GlobalAveragePooling1D(data_format="channels_first")(x)
    for dim in mlp_units:
        x = tf.keras.layers.Dense(dim, activation="relu")(x)
        x = tf.keras.layers.Dropout(mlp_dropout)(x)
    outputs = tf.keras.layers.Dense(CLASSES, activation="softmax")(x)
    return tf.keras.Model(inputs, outputs)

In [305]:
def transformer(hp: kt.HyperParameters) -> tf.keras.Sequential:
    model = build_model(head_size=hp.Int('head_size', min_value=32, max_value=512, step=32),
                        num_heads=hp.Int('num_heads', min_value=2, max_value=8, step=1),
                        ff_dim=hp.Int('ff_dim', min_value=32, max_value=512, step=32),
                        num_transformer_blocks=hp.Int('num_transformer_blocks', min_value=1, max_value=4, step=1),
                        mlp_units=[hp.Int('mlp_units', min_value=32, max_value=512, step=32)],
                        mlp_dropout=hp.Float('mlp_dropout', min_value=0.0, max_value=0.5, step=0.1),
                        dropout=hp.Float('dropout', min_value=0.0, max_value=0.5, step=0.1))

    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=hp_learning_rate),
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  metrics=["accuracy"])
    return model

**Utilitary For Monitoring**

In [306]:
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

In [307]:
def tensorboard_logs(model_name: str) -> tf.keras.callbacks.TensorBoard:
    return tf.keras.callbacks.TensorBoard(f"{globals()[model_name.upper()]}"
                                          f"_BS_{BATCH_SIZE}"
                                          f"_MAXFEAT_{VOCAB_SIZE}"
                                          f"_EMBEDDING_{EMBEDDING_DIMS}"
                                          f"_SEQLEN_{SEQUENCE_LENGTH}"
                                          f"_EPOCHS_{EPOCHS}")

**Training & Hyperparameter Optimization**

In [312]:
def optimizer_choice(model: tf.keras.Sequential, model_name: str, optimizer: str):
    if optimizer == "RandomSearch":
        tuner = kt.RandomSearch(model,
                                objective=kt.Objective('val_accuracy', direction='max'),
                                max_trials=EPOCHS,
                                overwrite=True,
                                project_name=f"{OUTPUT_PATH}\\{model_name}_tuner",
                                directory=f"{KERAS_TUNER_MONITOR_PATH}_{model_name}")
    elif optimizer == "BayesianOptimization":
        tuner = kt.BayesianOptimization(model,
                                        objective=kt.Objective('val_accuracy', direction='max'),
                                        max_trials=EPOCHS,
                                        overwrite=True,
                                        project_name=f"{OUTPUT_PATH}\\{model_name}_tuner",
                                        directory=f"{KERAS_TUNER_MONITOR_PATH}_{model_name}")
    elif optimizer == "Hyperband":
        tuner = kt.Hyperband(model,
                             objective=kt.Objective('val_accuracy', direction='max'),
                             max_epochs=EPOCHS,
                             overwrite=True,
                             project_name=f"{OUTPUT_PATH}\\{model_name}_tuner",
                             logger=kt_logger.TensorBoardLogger(metrics=['val_accuracy'],
                                                                logdir=f"{KERAS_TUNER_MONITOR_PATH}_{model_name}"))
    else:
        raise ValueError("optimizer_choice must be 0, 1 or 2")

    return tuner

In [317]:
def hp_optimization_and_training(model: Any, optimizer: str = "Hyperband") -> NoReturn:
    model_name = model.__name__
    with strategy.scope():
        tuner = optimizer_choice(model, model_name, optimizer=optimizer)

        # Search for best hyperparameters
        tuner.search(train_dataset,
                     epochs=EPOCHS,
                     validation_data=validation_dataset,
                     callbacks=[stop_early,
                                tensorboard_logs(model_name)])
        # Get the optimal hyperparameters
        best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
        print(best_hps)

        # Build model with optimal hyperparameters
        model = tuner.hypermodel.build(best_hps)
        history = model.fit(train_dataset,
                            epochs=EPOCHS,
                            validation_data=validation_dataset,
                            callbacks=[stop_early,
                                       tensorboard_logs(model_name)])
        val_acc_per_epoch = history.history['val_accuracy']
        best_epoch = val_acc_per_epoch.index(max(val_acc_per_epoch)) + 1
        print(f"best_epoch : {best_epoch}")

        hypermodel = tuner.hypermodel.build(best_hps)
        # Retrain the model
        hypermodel.fit(train_dataset,
                       epochs=best_epoch,
                       callbacks=[stop_early,
                                  tensorboard_logs(model_name)])

        eval_result = hypermodel.evaluate(validation_dataset)
        for index, column in enumerate(['loss', 'accuracy']):
            print(f"{column} : {eval_result[index]}")

        hypermodel.save(f"{OUTPUT_PATH}\\"
                        f"{model_name}"
                        f"_loss_{eval_result[0]}"
                        f"_acc_{eval_result[1]}")

In [None]:
hp_optimization_and_training(linear)

In [320]:
hp_optimization_and_training(mlp)

Trial 254 Complete [00h 05m 07s]
val_accuracy: 0.5458278059959412

Best val_accuracy So Far: 0.5479888916015625
Total elapsed time: 14h 53m 57s
INFO:tensorflow:Oracle triggered exit
optimal_learning_rate : 0.01


KeyError: 'units does not exist.'

In [None]:
hp_optimization_and_training(cnn)

In [1]:
hp_optimization_and_training(simple_rnn)

NameError: name 'train' is not defined

In [None]:
hp_optimization_and_training(transformer)

In [None]:
# In case of multiple models training
for model in [linear, mlp, cnn, simple_rnn, transformer]:
    hp_optimization_and_training(model)

**Evaluation**

In [188]:
models = [dir for root, dirs, files in os.walk(f'{KAGGLE_PATH}/working') for dir in dirs if dir.__contains__("acc")]
sort_models_per_acc = sorted(models, key=lambda x: float(x[x.find('_acc_') + 5:]), reverse=True)
sort_models_per_loss = sorted(models, key=lambda x: float(x[x.find('_loss_') + 6:x.find('_acc_')]))
print(sort_models_per_acc)
print(sort_models_per_loss)

['linear_loss_1.0978895425796509_acc_0.5394444465637207', 'mlp_loss_1.073839545249939_acc_0.5354999899864197', 'linear_loss_1.113369345664978_acc_0.5319499969482422', 'cnn_loss_1.1353473663330078_acc_0.5252388715744019']
['mlp_loss_1.073839545249939_acc_0.5354999899864197', 'linear_loss_1.0978895425796509_acc_0.5394444465637207', 'linear_loss_1.113369345664978_acc_0.5319499969482422', 'cnn_loss_1.1353473663330078_acc_0.5252388715744019']


In [None]:
best_model = tf.keras.models.load_model(f"{KAGGLE_PATH}/working/{sort_models_per_acc[0]}")

**Submission**

In [None]:
submission = pd.DataFrame()
submission['review_id'] = [data.decode("utf-8") for data in test_dataset['review_id']]
submission['rating'] = best_model.predict(test_dataset)

In [59]:
submission.to_csv(SUBMISSION_PATH, index=False)
print(f"Submission registered at {SUBMISSION_PATH}")

NameError: name 'submission' is not defined