[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://github.com/Pinkwjp/sentiment-analysis-with-transformer-on-IMDB/blob/master/transformer.ipynb)

In [None]:
# NOTE: python 3.12 not working too well with Keras
# pipenv install --python 3.10
# pipenv shell

In [None]:
%pip install -q --upgrade keras-nlp  # install keras-nlp before keras
%pip install keras-tuner
%pip install -q --upgrade keras

In [4]:
import keras
from keras import layers
import keras_nlp
from keras_nlp import layers as nlp_layers
import keras_tuner

In [5]:
from pathlib import Path

trained_model_folder = Path("./trained_models/")

if not trained_model_folder.exists():
    trained_model_folder.mkdir()

assert trained_model_folder.exists()

In [6]:
vocab_size = 20000
maxlen = 200

(x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(num_words=vocab_size)
print("training set size:", len(x_train))
print("test set size:", len(x_test))
x_train = keras.utils.pad_sequences(x_train, maxlen=maxlen)
x_test = keras.utils.pad_sequences(x_test, maxlen=maxlen)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
training set size: 25000
test set size: 25000


In [7]:
inputs = keras.Input(shape=(None,), dtype="int64")

x = nlp_layers.TokenAndPositionEmbedding(vocabulary_size=vocab_size,
                                         sequence_length=maxlen,
                                         embedding_dim=32
                                         )(inputs)

x = nlp_layers.TransformerEncoder(intermediate_dim=20,
                                  num_heads=2
                                  )(x)

x = layers.GlobalMaxPooling1D()(x)
x = layers.Dropout(0.3)(x)
outputs = layers.Dense(1, activation="sigmoid")(x)

baseline_model = keras.Model(inputs, outputs)
baseline_model.compile(optimizer="rmsprop",
                       loss="binary_crossentropy",
                       metrics=["accuracy"])
baseline_model.summary()

In [8]:
from keras import callbacks

def save_best_only(file_path: Path) -> list[callbacks.Callback]:
    return [callbacks.ModelCheckpoint(file_path, save_best_only=True)]

In [9]:
baseline_model_path = trained_model_folder / "baseline_model.keras"

baseline_model.fit(x_train, y_train,
                   batch_size=64, epochs=10,
                   validation_split=0.2,
                   callbacks=save_best_only(baseline_model_path))


Epoch 1/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 59ms/step - accuracy: 0.6151 - loss: 0.7688 - val_accuracy: 0.8216 - val_loss: 0.4064
Epoch 2/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 16ms/step - accuracy: 0.8193 - loss: 0.4066 - val_accuracy: 0.8284 - val_loss: 0.3804
Epoch 3/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 15ms/step - accuracy: 0.8571 - loss: 0.3356 - val_accuracy: 0.8412 - val_loss: 0.3647
Epoch 4/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 16ms/step - accuracy: 0.8823 - loss: 0.2816 - val_accuracy: 0.8644 - val_loss: 0.3205
Epoch 5/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 16ms/step - accuracy: 0.8968 - loss: 0.2530 - val_accuracy: 0.8558 - val_loss: 0.3492
Epoch 6/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 16ms/step - accuracy: 0.9132 - loss: 0.2164 - val_accuracy: 0.8714 - val_loss: 0.3166
Epoch 7/10
[1m313/3

<keras.src.callbacks.history.History at 0x7bc2675737f0>

In [10]:
best_baseline_model = keras.models.load_model(baseline_model_path)
best_baseline_model.evaluate(x_test, y_test)

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.8651 - loss: 0.3176


[0.3180903494358063, 0.8640000224113464]

In [11]:
from keras_tuner import HyperParameters

def build_model(hp: HyperParameters) -> keras.Model:
    """build and return a compiled model"""

    inputs = keras.Input(shape=(None,), dtype="int64")

    x = nlp_layers.TokenAndPositionEmbedding(vocabulary_size=vocab_size,
                                             sequence_length=maxlen,
                                             embedding_dim=hp.Choice("embed_dimension", [16, 32, 64])
                                             )(inputs)

    x = nlp_layers.TransformerEncoder(intermediate_dim=hp.Choice("intermediate_dim", [8, 16, 32]),
                                      num_heads=hp.Choice("num_heads", [2, 3, 4])
                                      )(x)

    x = layers.GlobalMaxPool1D()(x)
    x = layers.Dropout(0.3)(x)

    use_dense_layers = hp.Boolean("use_dense_layers")
    if use_dense_layers:
        x = layers.Dense(10)(x)
        x = layers.Dropout(0.3)(x)


    outputs = layers.Dense(1, activation="sigmoid")(x)

    model = keras.Model(inputs, outputs)
    model.compile(optimizer="rmsprop",
                  loss="binary_crossentropy",
                  metrics=["accuracy"])
    return model


In [18]:
tuner = keras_tuner.RandomSearch(hypermodel=build_model,
                                 objective="val_accuracy",
                                 max_trials=50,
                                 executions_per_trial=1,
                                 overwrite=True,
                                 directory=trained_model_folder,
                                 project_name="tuning")

tuner.search_space_summary()

Search space summary
Default search space size: 4
embed_dimension (Choice)
{'default': 16, 'conditions': [], 'values': [16, 32, 64], 'ordered': True}
intermediate_dim (Choice)
{'default': 8, 'conditions': [], 'values': [8, 16, 32], 'ordered': True}
num_heads (Choice)
{'default': 2, 'conditions': [], 'values': [2, 3, 4], 'ordered': True}
use_dense_layers (Boolean)
{'default': False, 'conditions': []}


In [19]:
tuner.search(x_train, y_train, validation_split=0.2, epochs=2)


Trial 50 Complete [00h 00m 18s]
val_accuracy: 0.853600025177002

Best val_accuracy So Far: 0.870199978351593
Total elapsed time: 00h 19m 25s


In [20]:
best_hp = tuner.get_best_hyperparameters(1)[0]
best_hp.values


{'embed_dimension': 16,
 'intermediate_dim': 32,
 'num_heads': 2,
 'use_dense_layers': True}

In [21]:
best_tuned_model = tuner.get_best_models(1)[0]
best_tuned_model.summary()

  trackable.load_own_variables(weights_store.get(inner_path))


In [22]:
best_tuned_model_path = trained_model_folder / "best_tuned_model.keras"

best_tuned_model.fit(x=x_train, y=y_train,
                     batch_size=64, validation_split=0.2, epochs=10,
                     callbacks=save_best_only(best_tuned_model_path))

Epoch 1/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 34ms/step - accuracy: 0.8521 - loss: 0.3612 - val_accuracy: 0.7902 - val_loss: 0.4322
Epoch 2/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 12ms/step - accuracy: 0.8888 - loss: 0.2864 - val_accuracy: 0.8770 - val_loss: 0.2966
Epoch 3/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 11ms/step - accuracy: 0.9160 - loss: 0.2234 - val_accuracy: 0.8700 - val_loss: 0.3167
Epoch 4/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 12ms/step - accuracy: 0.9401 - loss: 0.1731 - val_accuracy: 0.8844 - val_loss: 0.2975
Epoch 5/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 12ms/step - accuracy: 0.9562 - loss: 0.1353 - val_accuracy: 0.8782 - val_loss: 0.3414
Epoch 6/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - accuracy: 0.9643 - loss: 0.1148 - val_accuracy: 0.8838 - val_loss: 0.3482
Epoch 7/10
[1m313/31

<keras.src.callbacks.history.History at 0x7bc260d58e80>

In [23]:
best_tuned_model = keras.models.load_model(best_tuned_model_path)
best_tuned_model.evaluate(x_test, y_test)


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8793 - loss: 0.2978


[0.29807788133621216, 0.8796399831771851]