**Load Libs**

In [332]:
# Native python libs
import os
import math
from datetime import datetime
from typing import Any, NoReturn

In [333]:
# pip installed libs
import numpy as np
import pandas as pd
import tensorflow as tf
import keras_tuner as kt
import sklearn
import kerastuner_tensorboard_logger as kt_logger

**Paths**

In [334]:
BASE_PATH = f"{os.path.abspath('')}"

**Kaggle**

In [335]:
KAGGLE = False

In [336]:
KAGGLE_PATH = "/kaggle" if KAGGLE else f"{BASE_PATH}\\kaggle"

In [337]:
def submission_path_exists() -> str:
    directory = f"{KAGGLE_PATH}\\working\\{datetime.now().strftime('%d%m%Y')}"
    if not os.path.exists(directory):
        os.mkdir(directory)
        print(f"Created new output directory for today at '{directory}'")
    return directory

In [338]:
INPUT_PATH = f"{KAGGLE_PATH}\\input\\goodreads-books-reviews-290312"
OUTPUT_PATH = submission_path_exists()
SUBMISSION_PATH = f"{OUTPUT_PATH}\\{datetime.now().strftime('%H%M%S')}_submission.csv"

**Tensorboard & General Monitoring**

In [339]:
TENSORBOARD_LOGS_PATH = f"{BASE_PATH}\\tensorboard_logs"
KERAS_TUNER_MONITOR_PATH = f"{OUTPUT_PATH}\\keras_tuner_monitoring"
MONITOR_PATH = f"{OUTPUT_PATH}\\monitoring.csv"

In [340]:
# Machine Learning tensorboard paths
TENSORBOARD_LOGS_PATH_ML = f"{TENSORBOARD_LOGS_PATH}\\ML"
LINEAR = f"{TENSORBOARD_LOGS_PATH_ML}\\Linear"
MLP = f"{TENSORBOARD_LOGS_PATH_ML}\\MLP"

In [341]:
# Deep Learning tensorboard paths
TENSORBOARD_LOGS_PATH_DL = f"{TENSORBOARD_LOGS_PATH}\\DL"
CNN = f"{TENSORBOARD_LOGS_PATH_DL}\\CNN"
RESNET = f"{TENSORBOARD_LOGS_PATH_DL}\\ResNet"
RNN = f"{TENSORBOARD_LOGS_PATH_DL}\\RNN"
SIMPLE_RNN = f"{TENSORBOARD_LOGS_PATH_DL}\\SimpleRNN"
TRANSFORMER = f"{TENSORBOARD_LOGS_PATH_DL}\\Transformer"

In [342]:
# Test if path is good
os.path.abspath(TENSORBOARD_LOGS_PATH)

'C:\\Users\\juanm\\OneDrive\\Bureau\\ESGI - Projets\\4IABD\\Projet Deep Learning\\tensorboard_logs'

**GPU/TPU MultiThreading Setup**

In [343]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)

    strategy = tf.distribute.experimental.TPUStrategy
except ValueError:
    strategy = tf.distribute.get_strategy()
    print('Number of replicas:', strategy.num_replicas_in_sync)

Number of replicas: 1


In [344]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection
except ValueError:
    tpu = None
    gpus = tf.config.experimental.list_logical_devices("GPU")

In [345]:
if tpu:
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu, )
    print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])
elif len(gpus) > 1:
    strategy = tf.distribute.MultiWorkerMirroredStrategy([gpu.name for gpu in gpus])
    print('Running on multiple GPUs ', [gpu.name for gpu in gpus])
elif len(gpus) == 1:
    strategy = tf.distribute.get_strategy()
    print('Running on single GPU ', gpus[0].name)
else:
    strategy = tf.distribute.get_strategy()
    print('Running on CPU')
print("Number of accelerators: ", strategy.num_replicas_in_sync)

Running on single GPU  /device:GPU:0
Number of accelerators:  1


**Hyperparameters**

In [346]:
# Fix
CLASSES = 6

In [347]:
# Adjustable
BATCH_SIZE = 1024  # Big batch size, small learning rate
VOCAB_SIZE = 20000
SEQUENCE_LENGTH = 256
EMBEDDING_DIMS = 128
EPOCHS = 100
TRIALS = 100

**Load Datasets**

In [348]:
train_dataset = pd.read_csv(f"{INPUT_PATH}\\goodreads_train.csv",
                            usecols=['review_text', 'rating'])

In [349]:
test_dataset = pd.read_csv(f"{INPUT_PATH}\\goodreads_test.csv",
                           usecols=['review_id', 'review_text'])

**NLP**

In [350]:
# Create a TextVectorization layer
vectorize_layer = tf.keras.layers.TextVectorization(max_tokens=VOCAB_SIZE,
                                                    standardize=None,
                                                    output_sequence_length=SEQUENCE_LENGTH,
                                                    output_mode='int')

In [351]:
%%time
with strategy.scope():
    vectorize_layer.adapt(train_dataset['review_text'], batch_size=BATCH_SIZE * strategy.num_replicas_in_sync)

CPU times: total: 16.3 s
Wall time: 51.8 s


In [352]:
vectorize_layer.get_vocabulary()[2:10]

['the', 'and', 'I', 'to', 'a', 'of', 'is', 'was']

In [353]:
def vectorize_text(text: Any, label: Any) -> Any:
    text = tf.expand_dims(text, -1)
    return vectorize_layer(text), label

**Creating Dataset For Models**

In [354]:
train_dataset, validation_dataset = sklearn.model_selection.train_test_split(train_dataset, test_size=0.2)

In [355]:
review_ids = test_dataset['review_id']

In [356]:
def dataset_from_raw_data(x: np.ndarray, y: np.ndarray, batch_size: int = BATCH_SIZE) -> Any:
    # Create dataset
    dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(batch_size)
    # Vectorize
    dataset = dataset.map(vectorize_text)
    print(dataset.element_spec)
    return dataset

In [357]:
train_dataset = dataset_from_raw_data(train_dataset['review_text'], train_dataset['rating'])

(TensorSpec(shape=(None, 256), dtype=tf.int64, name=None), TensorSpec(shape=(None,), dtype=tf.int64, name=None))


In [358]:
validation_dataset = dataset_from_raw_data(validation_dataset['review_text'], validation_dataset['rating'])

(TensorSpec(shape=(None, 256), dtype=tf.int64, name=None), TensorSpec(shape=(None,), dtype=tf.int64, name=None))


In [359]:
test_dataset = dataset_from_raw_data(test_dataset['review_text'],
                                     np.random.randint(0, 5, len(test_dataset['review_text'])))

(TensorSpec(shape=(None, 256), dtype=tf.int64, name=None), TensorSpec(shape=(None,), dtype=tf.int32, name=None))


**Linear**

In [360]:
def linear(hp: kt.HyperParameters) -> tf.keras.Sequential:
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(VOCAB_SIZE + 1, EMBEDDING_DIMS),
        tf.keras.layers.GlobalAveragePooling1D(),
        tf.keras.layers.Dense(CLASSES, activation='sigmoid'),
    ])

    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=hp_learning_rate),
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  metrics=["accuracy"])
    return model

**MLP**

In [361]:
def mlp(hp: kt.HyperParameters) -> tf.keras.Sequential:
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Embedding(VOCAB_SIZE + 1, EMBEDDING_DIMS))
    model.add(tf.keras.layers.GlobalAveragePooling1D())
    model.add(tf.keras.layers.Dense(units=hp.Int('units_0', min_value=32, max_value=512, step=8),
                                    activation='relu'))
    model.add(tf.keras.layers.Dense(units=hp.Int('units_1', min_value=32, max_value=512, step=8),
                                    activation='relu'))
    model.add(tf.keras.layers.Dense(CLASSES, activation='sigmoid'))

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=["accuracy"])
    return model

**CNN**

In [362]:
def cnn(hp: kt.HyperParameters) -> tf.keras.Sequential:
    model = tf.keras.Sequential()
    model.add(tf.keras.Input(shape=(SEQUENCE_LENGTH,), dtype=tf.int32))
    model.add(tf.keras.layers.Embedding(VOCAB_SIZE + 1, EMBEDDING_DIMS))
    model.add(tf.keras.layers.Reshape((math.isqrt(SEQUENCE_LENGTH), math.isqrt(SEQUENCE_LENGTH), -1),
                                      input_shape=(None, SEQUENCE_LENGTH)))

    # Conv & pooling tf.keras.layers
    hp_filters_0 = hp.Int('filters_0', min_value=8, max_value=32, step=8)
    model.add(tf.keras.layers.Conv2D(filters=hp_filters_0, kernel_size=(3, 3), activation='tanh', padding='same'))
    model.add(tf.keras.layers.Conv2D(filters=hp_filters_0, kernel_size=(3, 3), activation='tanh', padding='same'))
    model.add(tf.keras.layers.MaxPool2D())
    hp_filters_1 = hp.Int('filters_1', min_value=16, max_value=64, step=8)
    model.add(tf.keras.layers.Conv2D(filters=hp_filters_1, kernel_size=(3, 3), activation='tanh', padding='same'))
    model.add(tf.keras.layers.Conv2D(filters=hp_filters_1, kernel_size=(3, 3), activation='tanh', padding='same'))
    model.add(tf.keras.layers.MaxPool2D())
    hp_filters_2 = hp.Int('filters_2', min_value=32, max_value=128, step=8)
    model.add(tf.keras.layers.Conv2D(filters=hp_filters_2, kernel_size=(3, 3), activation='tanh', padding='same'))
    model.add(tf.keras.layers.Conv2D(filters=hp_filters_2, kernel_size=(3, 3), activation='tanh', padding='same'))
    model.add(tf.keras.layers.MaxPool2D())
    hp_filters_3 = hp.Int('filters_3', min_value=64, max_value=256, step=8)
    model.add(tf.keras.layers.Conv2D(filters=hp_filters_3, kernel_size=(3, 3), activation='tanh', padding='same'))
    model.add(tf.keras.layers.Conv2D(filters=hp_filters_3, kernel_size=(3, 3), activation='tanh', padding='same'))
    model.add(tf.keras.layers.MaxPool2D())

    # Fully connected tf.keras.layers
    model.add(tf.keras.layers.Flatten())
    hp_units_0 = hp.Int('units_0', min_value=64, max_value=256, step=8)
    model.add(tf.keras.layers.Dense(units=hp_units_0, activation='relu'))
    hp_units_1 = hp.Int('units_1', min_value=32, max_value=128, step=8)
    model.add(tf.keras.layers.Dense(units=hp_units_1, activation='relu'))
    hp_units_2 = hp.Int('units_2', min_value=16, max_value=64, step=8)
    model.add(tf.keras.layers.Dense(units=hp_units_2, activation='relu'))
    model.add(tf.keras.layers.Dense(CLASSES, activation='softmax'))

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=["accuracy"])
    return model

**ResNet**

In [363]:
def residual_module(data,
                    filters,
                    stride,
                    reduce,
                    reg,
                    bn_eps,
                    bn_momentum):
    shortcut = 0
    bn_1 = tf.keras.layers.BatchNormalization(axis=-1,
                                              epsilon=bn_eps,
                                              momentum=bn_momentum)(data)
    act_1 = tf.keras.layers.ReLU()(bn_1)
    conv_1 = tf.keras.layers.Conv2D(filters=int(filters / 4.),
                                    kernel_size=(1, 1),
                                    use_bias=False,
                                    kernel_regularizer=tf.keras.regularizers.l2(reg))(act_1)
    bn_2 = tf.keras.layers.BatchNormalization(axis=-1,
                                              epsilon=bn_eps,
                                              momentum=bn_momentum)(conv_1)
    act_2 = tf.keras.layers.ReLU()(bn_2)
    conv_2 = tf.keras.layers.Conv2D(filters=int(filters / 4.),
                                    kernel_size=(3, 3),
                                    strides=stride,
                                    padding='same',
                                    use_bias=False,
                                    kernel_regularizer=tf.keras.regularizers.l2(reg))(act_2)
    bn_3 = tf.keras.layers.BatchNormalization(axis=-1,
                                              epsilon=bn_eps,
                                              momentum=bn_momentum)(conv_2)
    act_3 = tf.keras.layers.ReLU()(bn_3)
    conv_3 = tf.keras.layers.Conv2D(filters=filters,
                                    kernel_size=(1, 1),
                                    use_bias=False,
                                    kernel_regularizer=tf.keras.regularizers.l2(reg))(act_3)

    if reduce:
        shortcut = tf.keras.layers.Conv2D(filters=filters,
                                          kernel_size=(1, 1),
                                          strides=stride,
                                          use_bias=False,
                                          kernel_regularizer=tf.keras.regularizers.l2(reg))(act_1)

    x = tf.keras.layers.Add()([conv_3, shortcut])
    return x

In [364]:
def resnet(hp: kt.HyperParameters) -> tf.keras.Sequential:
    filters = [64, 64, 128, 256, 512]
    stages = [3, 4, 6, 3]
    bn_eps = hp.Float('bn_eps', min_value=1e-8, max_value=1e-2, sampling='LOG')
    bn_momentum = hp.Float('momentum', min_value=1e-3, max_value=1e-5)
    reg = hp.Float('reg', min_value=1e-4, max_value=1e-2, sampling='LOG', default=1e-3)

    inputs = tf.keras.Input(shape=(SEQUENCE_LENGTH,))
    x = tf.keras.layers.Embedding(VOCAB_SIZE + 1, EMBEDDING_DIMS)(inputs)
    x = tf.keras.layers.BatchNormalization(axis=-1,
                                           epsilon=bn_eps,
                                           momentum=bn_momentum)(inputs)
    x = tf.keras.layers.Conv2D(filters[0], (3, 3),
                               use_bias=False,
                               padding='same',
                               kernel_regularizer=tf.keras.regularizers.l2(reg))(x)
    for i in range(len(stages)):
        stride = (1, 1) if i == 0 else (2, 2)
        x = residual_module(data=x, filters=filters[i + 1], stride=stride,
                            reduce=True, bn_eps=bn_eps, bn_momentum=bn_momentum)
        for j in range(stages[i] - 1):
            x = residual_module(data=x,
                                filters=filters[i + 1],
                                stride=(1, 1),
                                bn_eps=bn_eps,
                                bn_momentum=bn_momentum)
    x = tf.keras.layers.BatchNormalization(axis=-1,
                                           epsilon=bn_eps,
                                           momentum=bn_momentum)(x)
    x = tf.keras.layers.ReLU()(x)
    x = tf.keras.layers.AveragePooling2D((8, 8))(x)
    x = tf.keras.layers.Flatten()(x)
    outputs = tf.keras.layers.Dense(CLASSES,
                                    activation="softmax",
                                    kernel_regularizer=tf.keras.regularizers.l2(reg))(x)
    model = tf.keras.Model(inputs, outputs)

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=["accuracy"])
    return model

**RNN**

In [365]:
def rnn(hp: kt.HyperParameters) -> tf.keras.Sequential:
    inputs = tf.keras.Input(shape=(SEQUENCE_LENGTH,), dtype=tf.int32)
    x = tf.keras.layers.Embedding(VOCAB_SIZE + 1, EMBEDDING_DIMS)(inputs)
    for i in range(hp.Int('num_layers', min_value=0, max_value=3, step=1)):
        x = tf.keras.layers.Bidirectional(
            tf.keras.layers.LSTM(hp.Int('units_' + str(i), min_value=8, max_value=64, step=8),
                                 return_sequences=True))(x)
    x = tf.keras.layers.Bidirectional(
        tf.keras.layers.LSTM(hp.Int('lstm_units', min_value=8, max_value=64, step=8),
                             return_sequences=False))(x)
    x = tf.keras.layers.Dense(hp.Int('dense_units', min_value=8, max_value=64, step=8),
                              activation='relu')(x)
    x = tf.keras.layers.Dense(CLASSES, activation='softmax')(x)
    model = tf.keras.Model(inputs, x)

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=["accuracy"])
    return model

**Transformer**

In [366]:
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Normalization and Attention
    x = tf.keras.layers.LayerNormalization(epsilon=1e-6)(inputs)
    x = tf.keras.layers.MultiHeadAttention(key_dim=head_size,
                                           num_heads=num_heads,
                                           dropout=dropout)(x, x)
    x = tf.keras.layers.Dropout(dropout)(x)
    res = x + inputs

    # Feed Forward Part
    x = tf.keras.layers.LayerNormalization(epsilon=1e-6)(res)
    x = tf.keras.layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(x)
    x = tf.keras.layers.Dropout(dropout)(x)
    x = tf.keras.layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    return x + res

In [367]:
def transformer(hp: kt.HyperParameters) -> tf.keras.Sequential:
    inputs = tf.keras.Input(shape=(SEQUENCE_LENGTH,))
    x = tf.keras.layers.Embedding(VOCAB_SIZE + 1, EMBEDDING_DIMS)(inputs)
    for _ in range(hp.Int('num_transformer_blocks', min_value=1, max_value=3, step=1)):
        x = transformer_encoder(x,
                                hp.Int('head_size', min_value=8, max_value=64, step=8),
                                hp.Int('num_heads', min_value=1, max_value=2, step=1),
                                hp.Int('ff_dim', min_value=8, max_value=64, step=8),
                                hp.Fixed('dropout', value=0))
    x = tf.keras.layers.GlobalAveragePooling1D(data_format="channels_first")(x)
    for dim in [hp.Int('mlp_units', min_value=8, max_value=64, step=8)]:
        x = tf.keras.layers.Dense(dim, activation="relu")(x)
        x = tf.keras.layers.Dropout(hp.Float('mlp_dropout', min_value=0, max_value=1e-1, step=1e-1))(x)
    outputs = tf.keras.layers.Dense(CLASSES, activation="softmax")(x)
    model = tf.keras.Model(inputs, outputs)

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=["accuracy"])
    return model

**Utilitary For Monitoring**

In [368]:
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

In [369]:
def path_exists(path: str) -> str:
    if os.path.exists(path):
        i = 0
        while True:
            new_path = f"{path}_{i}"
            if not os.path.exists(new_path):
                return new_path
            i += 1
    return path

In [370]:
def tensorboard_logs(model_name: str) -> tf.keras.callbacks.TensorBoard:
    path = f"{globals()[model_name.upper()]}" \
           f"_BS_{BATCH_SIZE}" \
           f"_MAXFEAT_{VOCAB_SIZE}" \
           f"_EMBEDDING_{EMBEDDING_DIMS}" \
           f"_SEQLEN_{SEQUENCE_LENGTH}" \
           f"_EPOCHS_{EPOCHS}" \
           f"_TRIALS_{TRIALS}"
    return tf.keras.callbacks.TensorBoard(path_exists(path))

In [371]:
def epochs_logs(model_name: str) -> tf.keras.callbacks.CSVLogger:
    path = f"{globals()[model_name.upper()]}" \
           f"_BS_{BATCH_SIZE}" \
           f"_MAXFEAT_{VOCAB_SIZE}" \
           f"_EMBEDDING_{EMBEDDING_DIMS}" \
           f"_SEQLEN_{SEQUENCE_LENGTH}" \
           f"_EPOCHS_{EPOCHS}" \
           f"_TRIALS_{TRIALS}"
    return tf.keras.callbacks.CSVLogger(f"{path_exists(path)}.csv")

**Training & Hyperparameter Optimization**

In [372]:
def optimizer_choice(model: tf.keras.Sequential, model_name: str, optimizer: str) -> Any:
    if optimizer == "RandomSearch":
        tuner = kt.RandomSearch(model,
                                objective=kt.Objective('val_accuracy', direction='max'),
                                max_trials=TRIALS,
                                overwrite=True,
                                project_name=f"{OUTPUT_PATH}\\{model_name}_tuner",
                                directory=f"{KERAS_TUNER_MONITOR_PATH}_{model_name}")
    elif optimizer == "BayesianOptimization":
        tuner = kt.BayesianOptimization(model,
                                        objective=kt.Objective('val_accuracy', direction='max'),
                                        max_trials=TRIALS,
                                        overwrite=True,
                                        project_name=f"{OUTPUT_PATH}\\{model_name}_tuner",
                                        directory=f"{KERAS_TUNER_MONITOR_PATH}_{model_name}")
    elif optimizer == "Hyperband":
        tuner = kt.Hyperband(model,
                             objective=kt.Objective('val_accuracy', direction='max'),
                             max_epochs=TRIALS,
                             overwrite=True,
                             project_name=f"{OUTPUT_PATH}\\{model_name}_tuner",
                             logger=kt_logger.TensorBoardLogger(metrics=['val_accuracy'],
                                                                logdir=f"{KERAS_TUNER_MONITOR_PATH}_{model_name}"))
    else:
        raise ValueError("optimizer_choice must be 0, 1 or 2")

    return tuner

In [373]:
def hp_optimization_and_training(model: Any, optimizer: str = "Hyperband") -> NoReturn:
    model_name = model.__name__
    with strategy.scope():
        tuner = optimizer_choice(model, model_name, optimizer=optimizer)

        # Search for best hyperparameters
        tuner.search(train_dataset,
                     epochs=EPOCHS,
                     validation_data=validation_dataset,
                     callbacks=[stop_early,
                                epochs_logs(model_name),
                                tensorboard_logs(model_name)])
        # Get the optimal hyperparameters
        best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
        print(best_hps)

        # Build model with optimal hyperparameters
        model = tuner.hypermodel.build(best_hps)
        history = model.fit(train_dataset,
                            epochs=EPOCHS,
                            validation_data=validation_dataset,
                            callbacks=[stop_early,
                                       epochs_logs(model_name),
                                       tensorboard_logs(model_name)])
        val_acc_per_epoch = history.history['val_accuracy']
        best_epoch = val_acc_per_epoch.index(max(val_acc_per_epoch)) + 1
        print(f"best_epoch : {best_epoch}")

        hypermodel = tuner.hypermodel.build(best_hps)
        # Retrain the model with epoch with highest val_accuracy value
        hypermodel.fit(train_dataset,
                       epochs=best_epoch,
                       validation_data=validation_dataset,
                       callbacks=[stop_early,
                                  epochs_logs(model_name),
                                  tensorboard_logs(model_name)])

        eval_result = hypermodel.evaluate(validation_dataset)

        hypermodel.save(f"{OUTPUT_PATH}\\"
                        f"{model_name}"
                        f"_loss_{eval_result[0]}"
                        f"_acc_{eval_result[1]}"
                        f"_best_epoch_{best_epoch}")

In [None]:
hp_optimization_and_training(linear, optimizer="BayesianOptimization")

In [None]:
hp_optimization_and_training(mlp, optimizer="BayesianOptimization")

In [None]:
hp_optimization_and_training(cnn, optimizer="BayesianOptimization")

In [None]:
hp_optimization_and_training(resnet, optimizer="BayesianOptimization")

In [374]:
hp_optimization_and_training(rnn, optimizer="BayesianOptimization")

Trial 3 Complete [00h 09m 14s]
val_accuracy: 0.5992555618286133

Best val_accuracy So Far: 0.5992555618286133
Total elapsed time: 00h 28m 15s
INFO:tensorflow:Oracle triggered exit


INFO:tensorflow:Oracle triggered exit


<keras_tuner.engine.hyperparameters.hyperparameters.HyperParameters object at 0x000002158D3932B0>
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
best_epoch : 2
Epoch 1/2
Epoch 2/2




INFO:tensorflow:Assets written to: C:\Users\juanm\OneDrive\Bureau\ESGI - Projets\4IABD\Projet Deep Learning\kaggle\working\26022023\rnn_loss_0.9452013969421387_acc_0.5972722172737122_best_epoch_2\assets


INFO:tensorflow:Assets written to: C:\Users\juanm\OneDrive\Bureau\ESGI - Projets\4IABD\Projet Deep Learning\kaggle\working\26022023\rnn_loss_0.9452013969421387_acc_0.5972722172737122_best_epoch_2\assets


In [None]:
hp_optimization_and_training(transformer, optimizer="BayesianOptimization")

**Evaluation**

In [375]:
models = [f'{root}\\{dir}' for root, dirs, files in os.walk(f'{KAGGLE_PATH}/working') for dir in dirs if "acc" in dir]
sort_models_per_acc = sorted(models,
                             key=lambda x: float(x[x.find('_acc_') + 5:x.find('_best_') if 'best' in x else x.find('_para_') if "_para_" in x else None]),
                             reverse=True)
print(sort_models_per_acc)

['C:\\Users\\juanm\\OneDrive\\Bureau\\ESGI - Projets\\4IABD\\Projet Deep Learning\\kaggle/working\\26022023\\rnn_loss_0.9452013969421387_acc_0.5972722172737122_best_epoch_2', 'C:\\Users\\juanm\\OneDrive\\Bureau\\ESGI - Projets\\4IABD\\Projet Deep Learning\\kaggle/working\\26022023\\rnn_loss_0.9616591334342957_acc_0.5904111266136169_best_epoch_2', 'C:\\Users\\juanm\\OneDrive\\Bureau\\ESGI - Projets\\4IABD\\Projet Deep Learning\\kaggle/working\\26022023\\rnn_loss_0.9633556604385376_acc_0.5894277691841125_best_epoch_2', 'C:\\Users\\juanm\\OneDrive\\Bureau\\ESGI - Projets\\4IABD\\Projet Deep Learning\\kaggle/working\\25022023\\rnn_loss_0.9779065847396851_acc_0.5878055691719055_best_epoch_3', 'C:\\Users\\juanm\\OneDrive\\Bureau\\ESGI - Projets\\4IABD\\Projet Deep Learning\\kaggle/working\\23022023\\mlp_loss_1.0539891719818115_acc_0.5459833145141602', 'C:\\Users\\juanm\\OneDrive\\Bureau\\ESGI - Projets\\4IABD\\Projet Deep Learning\\kaggle/working\\25022023\\transformer_loss_1.070438504219055

In [None]:
def models_acc(model_name):
    model = [res[res.find(model_name):] for res in sort_models_per_acc if model_name in res]
    print(model[0] if len(model) != 0 else f"No {model_name} model found")
    print(model[1] if len(model) > 0 else f"No {model_name} model found")

In [None]:
models_acc_and_loss = []
for model_name in ["linear", "mlp", "cnn", "resnet", "rnn", "transformer"]:
    models_acc(model_name)

In [None]:
models_acc_and_loss

In [376]:
best_model = tf.keras.models.load_model(sort_models_per_acc[0])

In [377]:
predictions = best_model.predict(test_dataset).argmax(axis=1)



**Submission**

In [378]:
submission = pd.DataFrame()
submission['review_id'] = [data for data in review_ids]
submission['rating'] = predictions

In [379]:
submission.to_csv(SUBMISSION_PATH, index=False)
print(f"Submission registered at {SUBMISSION_PATH}")

Submission registered at C:\Users\juanm\OneDrive\Bureau\ESGI - Projets\4IABD\Projet Deep Learning\kaggle\working\26022023\223812_submission.csv


In [381]:
test = pd.read_csv(SUBMISSION_PATH)
test['rating'].value_counts()

4    187670
5    148699
3     83640
2     38818
1      9704
0      9502
Name: rating, dtype: int64

4    187670
5    148699
3     83640
2     38818
1      9704
0      9502
Name: rating, dtype: int64

In [382]:
reference = pd.read_csv("C:\\Users\\juanm\\OneDrive\\Bureau\\ESGI - Projets\\4IABD\\Projet Deep Learning\\kaggle\\working\\25022023\\145908_submission.csv")
reference['rating'].value_counts()

4    192131
5    124705
3    107315
2     26796
0     15154
1     11932
Name: rating, dtype: int64