[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/https://github.com/Pinkwjp/sentiment-analysis-with-transformer-on-IMDB)

In [None]:
# NOTE: python 3.12 not working too well with Keras
# pipenv install --python 3.10
# pipenv shell

In [None]:
%pip install -q --upgrade keras-nlp  # install keras-nlp before keras
%pip install keras-tuner
%pip install -q --upgrade keras

In [2]:
import keras
from keras import layers
import keras_nlp
from keras_nlp import layers as nlp_layers
import keras_tuner

In [3]:
# create folder
from pathlib import Path

trained_model_folder = Path("./trained_models/")

if not trained_model_folder.exists():
    trained_model_folder.mkdir()

assert trained_model_folder.exists()

In [4]:
vocab_size = 20000  # Only consider the top 20k words
maxlen = 200  # Only consider the first 200 words of each movie review
(x_train, y_train), (x_val, y_val) = keras.datasets.imdb.load_data(num_words=vocab_size)
print(len(x_train), "Training sequences")
print(len(x_val), "Validation sequences")
x_train = keras.utils.pad_sequences(x_train, maxlen=maxlen)
x_val = keras.utils.pad_sequences(x_val, maxlen=maxlen)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
25000 Training sequences
25000 Validation sequences


In [None]:
# baseline model


In [None]:
from keras_tuner import HyperParameters

def build_model(hp: HyperParameters):
    inputs = keras.Input(shape=(None,), dtype="int64")

    # vocabulary_size = hp.Int("vocabulary_size", min_value=20_000, max_value=40_000, step=10_000)
    sequence_length = hp.Int("sequence_length", min_value=300, max_value=600, step=100)
    embed_dimension = hp.Choice("embed_dimension", [32, 64])
    x = nlp_layers.TokenAndPositionEmbedding(
        vocabulary_size=vocab_size,
        sequence_length=sequence_length,
        embedding_dim=embed_dimension
        )(inputs)

    num_heads = hp.Choice("num_heads", [2, 3, 4])
    intermediate_dim = hp.Choice("intermediate_dim", [16, 32, 64])
    x = nlp_layers.TransformerEncoder(
        intermediate_dim=intermediate_dim,
        num_heads=num_heads
        )(x)

    pooling_type = hp.Choice("pooling_type", ["GlobalMaxPolling", "GlobalAveragePooling"])
    with hp.conditional_scope("pooling_type", ["GlobalMaxPolling"]):
        if pooling_type == "GlobalMaxPolling":
            x = layers.GlobalMaxPool1D()(x)
    with hp.conditional_scope("pooling_type", ["GlobalAveragePooling"]):
        if pooling_type == "GlobalAveragePooling":
            x = layers.GlobalAveragePooling1D()(x)

    dropout_rate = hp.Choice("dropout_rate", [0.1, 0.3, 0.5])
    x = layers.Dropout(dropout_rate)(x)

    dense_units = hp.Choice("dense_units", [10, 20, 30])
    x = layers.Dense(dense_units, activation="relu")(x)
    x = layers.Dropout(dropout_rate)(x)

    outputs = layers.Dense(1, activation="sigmoid")(x)

    model = keras.Model(inputs, outputs)

    optimizer_type = hp.Choice("optimizer_type", ["adam", "rmsprop"])
    model.compile(
        optimizer=optimizer_type,
        loss="binary_crossentropy",
        metrics=["accuracy"])
    return model


build_model(HyperParameters())

<Functional name=functional_1, built=True>

In [None]:
tuner = keras_tuner.RandomSearch(
    hypermodel=build_model,
    objective="val_accuracy",
    max_trials=10,
    executions_per_trial=2,
    overwrite=True,
    directory="tuned_models",
    project_name="tuned_transformer_imdb",
)

tuner.search_space_summary()

Search space summary
Default search space size: 8
sequence_length (Int)
{'default': None, 'conditions': [], 'min_value': 300, 'max_value': 600, 'step': 100, 'sampling': 'linear'}
embed_dimension (Choice)
{'default': 32, 'conditions': [], 'values': [32, 64], 'ordered': True}
num_heads (Choice)
{'default': 2, 'conditions': [], 'values': [2, 3, 4], 'ordered': True}
intermediate_dim (Choice)
{'default': 16, 'conditions': [], 'values': [16, 32, 64], 'ordered': True}
pooling_type (Choice)
{'default': 'GlobalMaxPolling', 'conditions': [], 'values': ['GlobalMaxPolling', 'GlobalAveragePooling'], 'ordered': False}
dropout_rate (Choice)
{'default': 0.1, 'conditions': [], 'values': [0.1, 0.3, 0.5], 'ordered': True}
dense_units (Choice)
{'default': 10, 'conditions': [], 'values': [10, 20, 30], 'ordered': True}
optimizer_type (Choice)
{'default': 'adam', 'conditions': [], 'values': ['adam', 'rmsprop'], 'ordered': False}


In [None]:
tuner.search(x_train, y_train, validation_split=0.2, epochs=2)

Trial 10 Complete [00h 00m 47s]
val_accuracy: 0.8709999918937683

Best val_accuracy So Far: 0.8863999843597412
Total elapsed time: 00h 08m 46s


In [None]:
best_hps = tuner.get_best_hyperparameters(1)
best_tuned_model = build_model(best_hps[0])
best_tuned_model.summary()

In [None]:
callbacks = [
    keras.callbacks.ModelCheckpoint(
        "/tuned_models/best_tuned_model.keras",
        save_best_only=True)]


best_tuned_model.fit(x=x_train, y=y_train, batch_size=64, epochs=10, validation_split=0.2, callbacks=callbacks)

Epoch 1/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 31ms/step - accuracy: 0.5747 - loss: 0.6533 - val_accuracy: 0.8752 - val_loss: 0.3050
Epoch 2/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.9025 - loss: 0.2769 - val_accuracy: 0.8892 - val_loss: 0.2760
Epoch 3/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9488 - loss: 0.1656 - val_accuracy: 0.8826 - val_loss: 0.3074
Epoch 4/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.9704 - loss: 0.1036 - val_accuracy: 0.8672 - val_loss: 0.4804
Epoch 5/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.9834 - loss: 0.0676 - val_accuracy: 0.8708 - val_loss: 0.5229
Epoch 6/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.9917 - loss: 0.0398 - val_accuracy: 0.8696 - val_loss: 0.6335
Epoch 7/10
[1m313/313[0m

<keras.src.callbacks.history.History at 0x7cd2588588b0>

In [None]:
final_model = keras.models.load_model("/tuned_models/best_tuned_model.keras")
final_model.evaluate(x=x_val, y=y_val)

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.8769 - loss: 0.3027


[0.3013143539428711, 0.8768399953842163]

**Download the data**

In [None]:
keras.utils.get_file(
    origin="https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz",
    cache_dir="./",
    extract=True
)

imdb_dir = Path("./datasets/aclImdb")

Downloading data from https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz
[1m84125825/84125825[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 0us/step


In [None]:
!tree -d datasets/aclImdb/


[01;34mdatasets/aclImdb/[0m
├── [01;34mtest[0m
│   ├── [01;34mneg[0m
│   └── [01;34mpos[0m
└── [01;34mtrain[0m
    ├── [01;34mneg[0m
    ├── [01;34mpos[0m
    └── [01;34munsup[0m

7 directories


remove unsupervised training data, we don't need that here

In [None]:
!rm -r datasets/aclImdb/train/unsup


In [None]:
!tree -d datasets/aclImdb/


[01;34mdatasets/aclImdb/[0m
├── [01;34mtest[0m
│   ├── [01;34mneg[0m
│   └── [01;34mpos[0m
└── [01;34mtrain[0m
    ├── [01;34mneg[0m
    └── [01;34mpos[0m

6 directories


quick look at one review

In [None]:
!cat datasets/aclImdb/train/neg/21_4.txt

What was with all the Turkish actors? No offense but I thought it was all for nothing for all these actors. The film had no script to test any actors acting skill or ability. It demanded next to nothing I bought this film to see Michael Madsen. He is one of my favorite actors but this film was another failure for him. The script was so bad. Their was just nothing to sink your teeth into and all the characters were two dimensional. Madsen tried to act like a hard ass but the script and direction didn't even allow him to do enough with his character to make it more interesting or 3 dimensional.<br /><br />Even the sound effects of the gunfight at the beginning of the film sounded like the noise of paint ball guns when they are fired in a skirmish. It was really weird and they didn't sound like real guns. A video game had better sound effects than this film. There was also a really annoying bloke at the beginning of the film who was a member of the robbery gang. He had this American whini

prepare validation set

In [None]:
import os, shutil, random

validation_dir = imdb_dir / "validation"
validation_dir.mkdir()
train_dir = imdb_dir / "train"
for category in ("neg", "pos"):
    (validation_dir / category).mkdir()
    files = os.listdir(train_dir / category)
    random.Random(1234).shuffle(files)  # use seed to ensure same dataset through different runs
    num_validation_samples = int(0.2 * len(files))
    validation_files = files[-num_validation_samples:]
    for file in validation_files:
        shutil.move(train_dir / category / file,
                    validation_dir / category / file)


In [None]:
!tree -d datasets/aclImdb/

[01;34mdatasets/aclImdb/[0m
├── [01;34mtest[0m
│   ├── [01;34mneg[0m
│   └── [01;34mpos[0m
├── [01;34mtrain[0m
│   ├── [01;34mneg[0m
│   └── [01;34mpos[0m
└── [01;34mvalidation[0m
    ├── [01;34mneg[0m
    └── [01;34mpos[0m

9 directories


In [None]:
batch_size = 32

# 0 for negative, 1 for positive
train_dataset = keras.utils.text_dataset_from_directory(
    "datasets/aclImdb/train", batch_size=batch_size)
validation_dataset = keras.utils.text_dataset_from_directory(
    "datasets/aclImdb/validation", batch_size=batch_size)
test_dataset = keras.utils.text_dataset_from_directory(
    "datasets/aclImdb/test", batch_size=batch_size)

Found 20000 files belonging to 2 classes.
Found 5000 files belonging to 2 classes.
Found 25000 files belonging to 2 classes.


In [None]:
vocabulary_size = 20_000
sequence_length = 600
embed_dimension = 32

num_heads = 2
dense_layer_dimension = 32

inputs = keras.Input(shape=(None,), dtype="int64")

x = nlp_layers.TokenAndPositionEmbedding(vocabulary_size=vocabulary_size,
                                         sequence_length=sequence_length,
                                         embedding_dim=embed_dimension
                                         )(inputs)

x = nlp_layers.TransformerEncoder(intermediate_dim=dense_layer_dimension,
                                  num_heads=num_heads
                                  )(x)

# x = layers.GlobalMaxPooling1D()(x)
x = layers.GlobalAveragePooling1D()(x)
x = layers.Dropout(0.1)(x)  # 0.5
x = layers.Dense(20, activation="relu")(x)
x = layers.Dropout(0.1)(x)
outputs = layers.Dense(1, activation="sigmoid")(x)

model_1 = keras.Model(inputs, outputs)
model_1.compile(optimizer="adam", # rmsprop
              loss="binary_crossentropy",
              metrics=["accuracy"])
model_1.summary()

In [None]:
for inputs, targets in train_dataset:
    print("inputs.shape: ", inputs.shape)
    print("inputs.dtype:", inputs.dtype)
    print("targets.shape:", targets.shape)
    print("targets.dtype:", targets.dtype)
    print("inputs[0]:", inputs[0])
    print("targets[0]:", targets[0])
    break


inputs.shape:  (32,)
inputs.dtype: <dtype: 'string'>
targets.shape: (32,)
targets.dtype: <dtype: 'int32'>
inputs[0]: tf.Tensor(b"One of Frances Farmer's earliest movies; at 22, she is absolutely beautiful. Bing Crosby is in great voice, but the songs are not his best. Martha Raye and Bob Burns are interesting, but their comedy, probably great in its time, is really corny today. Roy Rogers also appears- in a singing role. In my view only worth watching if you are a Frances Farmer fan, and possibly a Bing Crosby fan.", shape=(), dtype=string)
targets[0]: tf.Tensor(0, shape=(), dtype=int32)


train a text vectorization layer with unlabeled data

In [None]:
text_only_train_dataset = train_dataset.map(lambda x, y: x)  # do not need labels to train the text vectorization layer

max_length = 600
max_tokens = 20_000
text_vectorization = layers.TextVectorization(
    max_tokens=max_tokens,
    output_mode="int",
    output_sequence_length=max_length)

text_vectorization.adapt(text_only_train_dataset)

prepare integer sequence datasets

In [None]:
int_train_dataset = train_dataset.map(
    lambda x, y: (text_vectorization(x), y),
    num_parallel_calls=4)
int_validation_dataset = validation_dataset.map(
    lambda x, y: (text_vectorization(x), y),
    num_parallel_calls=4)
int_test_dataset = test_dataset.map(
    lambda x, y: (text_vectorization(x), y),
    num_parallel_calls=4)

use a TransformerEncoder-based model for text classification

train the transformer encoder model

In [None]:
callbacks = [
    keras.callbacks.ModelCheckpoint(
        "trained_models/basic_transformer_encoder.keras",
        save_best_only=True)]

model_1.fit(int_train_dataset,
            validation_data=int_validation_dataset,
            epochs=2,
            callbacks=callbacks)


Epoch 1/2
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 27ms/step - accuracy: 0.5881 - loss: 0.6518 - val_accuracy: 0.8768 - val_loss: 0.2981
Epoch 2/2
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 16ms/step - accuracy: 0.8908 - loss: 0.2717 - val_accuracy: 0.8906 - val_loss: 0.2923


<keras.src.callbacks.history.History at 0x7c3494ccbf40>

evaluate the transformer encoder model

In [None]:
model = keras.models.load_model(
    "trained_models/basic_transformer_encoder.keras")

print(f"Test accuracy: {model.evaluate(int_test_dataset)[1]:.3f}")

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6ms/step - accuracy: 0.8739 - loss: 0.3367
Test accuracy: 0.875


In [None]:
vocab_size = 20000  # Only consider the top 20k words
maxlen = 200  # Only consider the first 200 words of each movie review
(x_train, y_train), (x_val, y_val) = keras.datasets.imdb.load_data(num_words=vocab_size)
print(len(x_train), "Training sequences")
print(len(x_val), "Validation sequences")
x_train = keras.utils.pad_sequences(x_train, maxlen=maxlen)
x_val = keras.utils.pad_sequences(x_val, maxlen=maxlen)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
25000 Training sequences
25000 Validation sequences


tune a transfromer encoder

In [None]:
from keras_tuner import HyperParameters

def build_model(hp: HyperParameters):
    inputs = keras.Input(shape=(None,), dtype="int64")

    # vocabulary_size = hp.Int("vocabulary_size", min_value=20_000, max_value=40_000, step=10_000)
    sequence_length = hp.Int("sequence_length", min_value=300, max_value=600, step=100)
    embed_dimension = hp.Choice("embed_dimension", [32, 64])
    x = nlp_layers.TokenAndPositionEmbedding(
        vocabulary_size=vocab_size,
        sequence_length=sequence_length,
        embedding_dim=embed_dimension
        )(inputs)

    num_heads = hp.Choice("num_heads", [2, 3, 4])
    intermediate_dim = hp.Choice("intermediate_dim", [16, 32, 64])
    x = nlp_layers.TransformerEncoder(
        intermediate_dim=intermediate_dim,
        num_heads=num_heads
        )(x)

    pooling_type = hp.Choice("pooling_type", ["GlobalMaxPolling", "GlobalAveragePooling"])
    with hp.conditional_scope("pooling_type", ["GlobalMaxPolling"]):
        if pooling_type == "GlobalMaxPolling":
            x = layers.GlobalMaxPool1D()(x)
    with hp.conditional_scope("pooling_type", ["GlobalAveragePooling"]):
        if pooling_type == "GlobalAveragePooling":
            x = layers.GlobalAveragePooling1D()(x)

    dropout_rate = hp.Choice("dropout_rate", [0.1, 0.3, 0.5])
    x = layers.Dropout(dropout_rate)(x)

    dense_units = hp.Choice("dense_units", [10, 20, 30])
    x = layers.Dense(dense_units, activation="relu")(x)
    x = layers.Dropout(dropout_rate)(x)

    outputs = layers.Dense(1, activation="sigmoid")(x)

    model = keras.Model(inputs, outputs)

    optimizer_type = hp.Choice("optimizer_type", ["adam", "rmsprop"])
    model.compile(
        optimizer=optimizer_type,
        loss="binary_crossentropy",
        metrics=["accuracy"])
    return model


build_model(HyperParameters())

<Functional name=functional_3, built=True>

In [None]:
tuner = keras_tuner.RandomSearch(
    hypermodel=build_model,
    objective="val_accuracy",
    max_trials=10,
    executions_per_trial=2,
    overwrite=True,
    directory="tuned_models",
    project_name="tuned_transformer_imdb",
)

tuner.search_space_summary()

Search space summary
Default search space size: 8
sequence_length (Int)
{'default': None, 'conditions': [], 'min_value': 300, 'max_value': 600, 'step': 100, 'sampling': 'linear'}
embed_dimension (Choice)
{'default': 32, 'conditions': [], 'values': [32, 64], 'ordered': True}
num_heads (Choice)
{'default': 2, 'conditions': [], 'values': [2, 3, 4], 'ordered': True}
intermediate_dim (Choice)
{'default': 16, 'conditions': [], 'values': [16, 32, 64], 'ordered': True}
pooling_type (Choice)
{'default': 'GlobalMaxPolling', 'conditions': [], 'values': ['GlobalMaxPolling', 'GlobalAveragePooling'], 'ordered': False}
dropout_rate (Choice)
{'default': 0.1, 'conditions': [], 'values': [0.1, 0.3, 0.5], 'ordered': True}
dense_units (Choice)
{'default': 10, 'conditions': [], 'values': [10, 20, 30], 'ordered': True}
optimizer_type (Choice)
{'default': 'adam', 'conditions': [], 'values': ['adam', 'rmsprop'], 'ordered': False}


In [None]:
tuner.search(x_train, y_train, validation_split=0.2, epochs=2)

Trial 10 Complete [00h 00m 35s]
val_accuracy: 0.8850000202655792

Best val_accuracy So Far: 0.8863000273704529
Total elapsed time: 00h 08m 56s


In [None]:
best_model = tuner.get_best_models(num_models=1)[0]
best_model.summary()

  trackable.load_own_variables(weights_store.get(inner_path))


In [None]:
tuner.results_summary()

Results summary
Results in tuned_models/tuned_transformer_imdb
Showing 10 best trials
Objective(name="val_accuracy", direction="max")

Trial 03 summary
Hyperparameters:
sequence_length: 400
embed_dimension: 64
num_heads: 2
intermediate_dim: 32
pooling_type: GlobalAveragePooling
dropout_rate: 0.3
dense_units: 10
optimizer_type: adam
Score: 0.8863000273704529

Trial 09 summary
Hyperparameters:
sequence_length: 400
embed_dimension: 32
num_heads: 2
intermediate_dim: 16
pooling_type: GlobalAveragePooling
dropout_rate: 0.3
dense_units: 30
optimizer_type: adam
Score: 0.8850000202655792

Trial 04 summary
Hyperparameters:
sequence_length: 400
embed_dimension: 32
num_heads: 4
intermediate_dim: 16
pooling_type: GlobalAveragePooling
dropout_rate: 0.3
dense_units: 30
optimizer_type: adam
Score: 0.8833000063896179

Trial 06 summary
Hyperparameters:
sequence_length: 500
embed_dimension: 32
num_heads: 2
intermediate_dim: 16
pooling_type: GlobalAveragePooling
dropout_rate: 0.1
dense_units: 20
optimizer

In [None]:
best_hps = tuner.get_best_hyperparameters(1)
best_tuned_model = build_model(best_hps[0])
best_tuned_model.summary()

In [None]:
# TODO: train more (10 epochs) and add callback to save the best model

best_tuned_model.fit(x=x_train, y=y_train, epochs=1)

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 18ms/step - accuracy: 0.5699 - loss: 0.6468


<keras.src.callbacks.history.History at 0x7c3494876140>

In [None]:
best_tuned_model.evaluate(x=x_val, y=y_val)

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.8716 - loss: 0.3043


[0.3025885820388794, 0.8724799752235413]

## ***The following is for comaprison***

try closely follow a example on Keras website:  
https://keras.io/examples/nlp/text_classification_with_transformer/

In [None]:
vocab_size = 20000  # Only consider the top 20k words
maxlen = 200  # Only consider the first 200 words of each movie review
(x_train, y_train), (x_val, y_val) = keras.datasets.imdb.load_data(num_words=vocab_size)
print(len(x_train), "Training sequences")
print(len(x_val), "Validation sequences")
x_train = keras.utils.pad_sequences(x_train, maxlen=maxlen)
x_val = keras.utils.pad_sequences(x_val, maxlen=maxlen)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step
25000 Training sequences
25000 Validation sequences


In [None]:
embed_dim = 32  # Embedding size for each token
num_heads = 2  # Number of attention heads
ff_dim = 32  # Hidden layer size in feed forward network inside transformer

inputs = layers.Input(shape=(maxlen,))
embedding_layer = nlp_layers.TokenAndPositionEmbedding(vocab_size, maxlen, embed_dim)
x = embedding_layer(inputs)
encoder = nlp_layers.TransformerEncoder(ff_dim, num_heads)
x = encoder(x)
x = layers.GlobalAveragePooling1D()(x)
x = layers.Dropout(0.1)(x)
x = layers.Dense(20, activation="relu")(x)
x = layers.Dropout(0.1)(x)
outputs = layers.Dense(2, activation="softmax")(x)

model = keras.Model(inputs=inputs, outputs=outputs)
model.summary()

In [None]:
#TODO: run the example model with my dataset to see if the size of training set makes the difference

In [None]:
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

callbacks = [
    keras.callbacks.ModelCheckpoint(
        "trained_models/new_transformer_encoder.keras",
        save_best_only=True)]


history = model.fit(x_train, y_train,
                    batch_size=32,
                    epochs=2,
                    validation_data=(x_val, y_val),
                    callbacks=callbacks)


Epoch 1/2
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 24ms/step - accuracy: 0.7158 - loss: 0.5174 - val_accuracy: 0.8810 - val_loss: 0.2805
Epoch 2/2
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 8ms/step - accuracy: 0.9245 - loss: 0.1998 - val_accuracy: 0.8712 - val_loss: 0.3056
