[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://github.com/Pinkwjp/sentiment-analysis-with-transformer-on-IMDB/blob/master/transformer.ipynb)

In [1]:
# NOTE: python 3.12 not working too well with Keras
# pipenv install --python 3.10
# pipenv shell

In [None]:
%pip install -q --upgrade keras-nlp  # install keras-nlp before keras
%pip install keras-tuner
%pip install -q --upgrade keras

In [3]:
import keras
from keras import layers
import keras_nlp
from keras_nlp import layers as nlp_layers
import keras_tuner

In [4]:
from pathlib import Path

trained_model_folder = Path("./trained_models/")

if not trained_model_folder.exists():
    trained_model_folder.mkdir()

assert trained_model_folder.exists()

In [5]:
vocab_size = 20000
maxlen = 200

(x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(num_words=vocab_size)
print("training set size:", len(x_train))
print("test set size:", len(x_test))
x_train = keras.utils.pad_sequences(x_train, maxlen=maxlen)
x_test = keras.utils.pad_sequences(x_test, maxlen=maxlen)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step
training set size: 25000
test set size: 25000


In [6]:
inputs = keras.Input(shape=(None,), dtype="int64")

x = nlp_layers.TokenAndPositionEmbedding(vocabulary_size=vocab_size,
                                         sequence_length=maxlen,
                                         embedding_dim=32
                                         )(inputs)

x = nlp_layers.TransformerEncoder(intermediate_dim=20,
                                  num_heads=2
                                  )(x)

x = layers.GlobalMaxPooling1D()(x)
x = layers.Dropout(0.3)(x)
outputs = layers.Dense(1, activation="sigmoid")(x)

baseline_model = keras.Model(inputs, outputs)
baseline_model.compile(optimizer="rmsprop",
                       loss="binary_crossentropy",
                       metrics=["accuracy"])
baseline_model.summary()

In [7]:
from keras import callbacks

def save_best_only(file_path: Path) -> list[callbacks.Callback]:
    return [callbacks.ModelCheckpoint(file_path, save_best_only=True)]

In [8]:
baseline_model_path = trained_model_folder / "baseline_model.keras"

baseline_model.fit(x_train, y_train,
                   batch_size=64, epochs=10,
                   validation_split=0.2,
                   callbacks=save_best_only(baseline_model_path))


Epoch 1/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 60ms/step - accuracy: 0.6041 - loss: 0.7141 - val_accuracy: 0.8100 - val_loss: 0.4303
Epoch 2/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 16ms/step - accuracy: 0.8153 - loss: 0.4130 - val_accuracy: 0.8418 - val_loss: 0.3591
Epoch 3/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 15ms/step - accuracy: 0.8544 - loss: 0.3383 - val_accuracy: 0.8508 - val_loss: 0.3465
Epoch 4/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 16ms/step - accuracy: 0.8769 - loss: 0.2973 - val_accuracy: 0.8576 - val_loss: 0.3289
Epoch 5/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 16ms/step - accuracy: 0.8915 - loss: 0.2637 - val_accuracy: 0.8690 - val_loss: 0.3101
Epoch 6/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 17ms/step - accuracy: 0.9138 - loss: 0.2247 - val_accuracy: 0.8498 - val_loss: 0.3561
Epoch 7/10
[1m313/3

<keras.src.callbacks.history.History at 0x79caa688ac80>

In [9]:
best_baseline_model = keras.models.load_model(baseline_model_path)
best_baseline_model.evaluate(x_test, y_test)

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8702 - loss: 0.3160


[0.31575116515159607, 0.8715199828147888]

In [10]:
from keras_tuner import HyperParameters

def build_model(hp: HyperParameters) -> keras.Model:
    """build and return a compiled model"""

    inputs = keras.Input(shape=(None,), dtype="int64")

    x = nlp_layers.TokenAndPositionEmbedding(vocabulary_size=vocab_size,
                                             sequence_length=maxlen,
                                             embedding_dim=hp.Choice("embed_dimension", [16, 32, 64])
                                             )(inputs)

    x = nlp_layers.TransformerEncoder(intermediate_dim=hp.Choice("intermediate_dim", [8, 16, 32]),
                                      num_heads=hp.Choice("num_heads", [2, 3, 4])
                                      )(x)

    x = layers.GlobalMaxPool1D()(x)
    x = layers.Dropout(0.3)(x)

    use_dense_layers = hp.Boolean("use_dense_layers")
    if use_dense_layers:
        x = layers.Dense(10)(x)
        x = layers.Dropout(0.3)(x)


    outputs = layers.Dense(1, activation="sigmoid")(x)

    model = keras.Model(inputs, outputs)
    model.compile(optimizer="rmsprop",
                  loss="binary_crossentropy",
                  metrics=["accuracy"])
    return model


In [11]:
tuner = keras_tuner.RandomSearch(hypermodel=build_model,
                                 objective="val_accuracy",
                                 max_trials=60,
                                 executions_per_trial=1,
                                 overwrite=True,
                                 directory=trained_model_folder,
                                 project_name="tuning")

tuner.search_space_summary()

Search space summary
Default search space size: 4
embed_dimension (Choice)
{'default': 16, 'conditions': [], 'values': [16, 32, 64], 'ordered': True}
intermediate_dim (Choice)
{'default': 8, 'conditions': [], 'values': [8, 16, 32], 'ordered': True}
num_heads (Choice)
{'default': 2, 'conditions': [], 'values': [2, 3, 4], 'ordered': True}
use_dense_layers (Boolean)
{'default': False, 'conditions': []}


In [12]:
tuner.search(x_train, y_train, validation_split=0.2, epochs=2)


Trial 47 Complete [00h 00m 29s]
val_accuracy: 0.8464000225067139

Best val_accuracy So Far: 0.8718000054359436
Total elapsed time: 00h 21m 27s


In [13]:
best_hp = tuner.get_best_hyperparameters(1)[0]
best_hp.values


{'embed_dimension': 16,
 'intermediate_dim': 8,
 'num_heads': 3,
 'use_dense_layers': False}

In [14]:
best_tuned_model = tuner.get_best_models(1)[0]
best_tuned_model.summary()

  trackable.load_own_variables(weights_store.get(inner_path))


In [15]:
best_tuned_model_path = trained_model_folder / "best_tuned_model.keras"

best_tuned_model.fit(x=x_train, y=y_train,
                     batch_size=64, validation_split=0.2, epochs=10,
                     callbacks=save_best_only(best_tuned_model_path))


Epoch 1/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 34ms/step - accuracy: 0.8620 - loss: 0.3319 - val_accuracy: 0.8324 - val_loss: 0.3682
Epoch 2/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 12ms/step - accuracy: 0.8865 - loss: 0.2750 - val_accuracy: 0.8806 - val_loss: 0.2930
Epoch 3/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 12ms/step - accuracy: 0.9137 - loss: 0.2286 - val_accuracy: 0.8818 - val_loss: 0.2935
Epoch 4/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 12ms/step - accuracy: 0.9337 - loss: 0.1766 - val_accuracy: 0.8856 - val_loss: 0.3005
Epoch 5/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 12ms/step - accuracy: 0.9484 - loss: 0.1485 - val_accuracy: 0.8790 - val_loss: 0.3194
Epoch 6/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 12ms/step - accuracy: 0.9590 - loss: 0.1225 - val_accuracy: 0.8816 - val_loss: 0.3427
Epoch 7/10
[1m313/31

<keras.src.callbacks.history.History at 0x79caa4710940>

In [16]:
best_tuned_model = keras.models.load_model(best_tuned_model_path)
best_tuned_model.evaluate(x_test, y_test)


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8766 - loss: 0.2968


[0.2989835739135742, 0.8754000067710876]