In [1]:
import tensorflow as tf
import tensorflow_datasets as tfds
import keras_hub

## Daten aufbereiten

In [34]:
raw_train_ds, raw_val_ds = tfds.load('imdb_reviews', split=['train', 'test'])

In [35]:
for x in raw_train_ds.take(1):
    print(x)

{'label': <tf.Tensor: shape=(), dtype=int64, numpy=0>, 'text': <tf.Tensor: shape=(), dtype=string, numpy=b"This was an absolutely terrible movie. Don't be lured in by Christopher Walken or Michael Ironside. Both are great actors, but this must simply be their worst role in history. Even their great acting could not redeem this movie's ridiculous storyline. This movie is an early nineties US propaganda piece. The most pathetic scenes were those when the Columbian rebels were making their cases for revolutions. Maria Conchita Alonso appeared phony, and her pseudo-love affair with Walken was nothing but a pathetic emotional plug in a movie that was devoid of any real meaning. I am disappointed that there are movies like this, ruining actor's like Christopher Walken's good name. I could barely sit through it.">}


2025-04-25 12:58:46.890265: W tensorflow/core/kernels/data/cache_dataset_ops.cc:914] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


In [50]:
train_ds = raw_train_ds.map(lambda x: (x['text'], x['label']))
for (x, y) in train_ds.take(1):
    print(x)
    print(y)

tf.Tensor(b"This was an absolutely terrible movie. Don't be lured in by Christopher Walken or Michael Ironside. Both are great actors, but this must simply be their worst role in history. Even their great acting could not redeem this movie's ridiculous storyline. This movie is an early nineties US propaganda piece. The most pathetic scenes were those when the Columbian rebels were making their cases for revolutions. Maria Conchita Alonso appeared phony, and her pseudo-love affair with Walken was nothing but a pathetic emotional plug in a movie that was devoid of any real meaning. I am disappointed that there are movies like this, ruining actor's like Christopher Walken's good name. I could barely sit through it.", shape=(), dtype=string)
tf.Tensor(0, shape=(), dtype=int64)


In [51]:
train_ds = train_ds.shuffle(1000).batch(128).prefetch(tf.data.AUTOTUNE)
val_ds = raw_val_ds.shuffle(1000).take(2500).map(lambda x: (x['text'], x['label'])).batch(128).prefetch(
        tf.data.AUTOTUNE
        )

## Erster Versuch

In [40]:
bert_name = "bert_tiny_en_uncased"
tokenizer = keras_hub.models.BertTokenizer.from_preset(bert_name)
text = "This is a test"
text_tk = tokenizer(text)
print(text_tk)

tf.Tensor([2023 2003 1037 3231], shape=(4,), dtype=int32)


In [41]:
preprocessor = keras_hub.models.BertPreprocessor.from_preset(bert_name)
backbone = keras_hub.models.BertBackbone.from_preset(bert_name)

In [42]:
inp = preprocessor(["hallo", "this is a test"])
y = backbone(inp)
print(y.keys())
print(y['pooled_output'].shape)
print(y['sequence_output'].shape)

dict_keys(['sequence_output', 'pooled_output'])
(2, 128)
(2, 512, 128)


In [60]:
backbone.trainable = False

input = tf.keras.Input(shape=(), dtype=tf.string, name='text')
output = backbone(preprocessor(input))
net = output['pooled_output']
#net = output['sequence_output'][:, -1, :]
# net = tf.keras.layers.Dropout(0.1)(net)
net = tf.keras.layers.Dense(512, activation='selu')(net)
net = tf.keras.layers.Dense(512, activation='relu')(net)
net = tf.keras.layers.Dense(128, activation='selu')(net)
net = tf.keras.layers.Dense(1, activation='sigmoid')(net)
model = tf.keras.Model(input, net)
model.summary()

In [61]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [62]:
early_stopping_cb = tf.keras.callbacks.EarlyStopping(
        patience=2,
        monitor='val_loss',
        min_delta=0.003,
        restore_best_weights=True
        )
model.fit(train_ds, epochs=20, validation_data=val_ds, verbose=2, callbacks=[early_stopping_cb])

Epoch 1/20
196/196 - 21s - 107ms/step - accuracy: 0.6090 - loss: 0.7174 - val_accuracy: 0.6932 - val_loss: 0.5991
Epoch 2/20
196/196 - 17s - 88ms/step - accuracy: 0.6575 - loss: 0.6200 - val_accuracy: 0.7112 - val_loss: 0.5792
Epoch 3/20
196/196 - 18s - 90ms/step - accuracy: 0.6660 - loss: 0.6104 - val_accuracy: 0.6516 - val_loss: 0.6151
Epoch 4/20
196/196 - 17s - 87ms/step - accuracy: 0.6570 - loss: 0.6220 - val_accuracy: 0.7016 - val_loss: 0.5730
Epoch 5/20
196/196 - 17s - 86ms/step - accuracy: 0.6746 - loss: 0.6062 - val_accuracy: 0.6944 - val_loss: 0.5887
Epoch 6/20
196/196 - 17s - 87ms/step - accuracy: 0.6748 - loss: 0.6046 - val_accuracy: 0.7180 - val_loss: 0.5692
Epoch 7/20
196/196 - 17s - 88ms/step - accuracy: 0.6784 - loss: 0.5978 - val_accuracy: 0.7104 - val_loss: 0.5717
Epoch 8/20
196/196 - 17s - 88ms/step - accuracy: 0.6789 - loss: 0.6015 - val_accuracy: 0.6520 - val_loss: 0.6059


<keras.src.callbacks.history.History at 0x4f8cc27b0>

In [74]:
model.save("models/bert_tiny_en_uncased_finetuned.keras")

## Zweiter Versuch

In [64]:
preprocessor2 = keras_hub.models.BertPreprocessor.from_preset(bert_name)
backbone2 = keras_hub.models.BertBackbone.from_preset(bert_name)

input2 = tf.keras.Input(shape=(), dtype=tf.string, name='text')
output2 = backbone2(preprocessor2(input2))
net2 = output2['pooled_output']
#net = output['sequence_output'][:, -1, :]
# net = tf.keras.layers.Dropout(0.1)(net)
#net2 = tf.keras.layers.Dense(1, activation='sigmoid')(net2)
net2 = tf.keras.layers.Dense(2, activation='softmax')(net2)
model2 = tf.keras.Model(input2, net2)
model2.summary()

In [65]:
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)
model2.compile(loss=loss_fn, optimizer='adam', metrics=['accuracy'])

In [66]:
early_stopping_cb = tf.keras.callbacks.EarlyStopping(
        patience=2,
        monitor='val_loss',
        min_delta=0.003,
        restore_best_weights=True
        )
model2.fit(train_ds, epochs=20, validation_data=val_ds, verbose=2, callbacks=[early_stopping_cb])

Epoch 1/20
196/196 - 49s - 248ms/step - accuracy: 0.5800 - loss: 0.6595 - val_accuracy: 0.7420 - val_loss: 0.5689
Epoch 2/20
196/196 - 40s - 204ms/step - accuracy: 0.7996 - loss: 0.4418 - val_accuracy: 0.8080 - val_loss: 0.4107
Epoch 3/20
196/196 - 41s - 211ms/step - accuracy: 0.8954 - loss: 0.2556 - val_accuracy: 0.8388 - val_loss: 0.3965
Epoch 4/20
196/196 - 44s - 224ms/step - accuracy: 0.9438 - loss: 0.1480 - val_accuracy: 0.8408 - val_loss: 0.5755
Epoch 5/20
196/196 - 40s - 206ms/step - accuracy: 0.9760 - loss: 0.0724 - val_accuracy: 0.8340 - val_loss: 0.7314


<keras.src.callbacks.history.History at 0x4f8c1dd30>

In [73]:
model2.save("models/bert_tiny_en_uncased_full_finetuned_2.keras")

## 3. Versuch

In [75]:
classifier = keras_hub.models.TextClassifier.from_preset(bert_name, sequence_lengths=1024, num_classes=2)
classifier.summary()

In [77]:
early_stopping_cb = tf.keras.callbacks.EarlyStopping(
        patience=2,
        monitor='val_loss',
        min_delta=0.003,
        restore_best_weights=True
        )
classifier.fit(train_ds, epochs=20, validation_data=val_ds, verbose=2, callbacks=[early_stopping_cb])
classifier.save("models/classifier_bert_tiny_en_uncased_1024_2_classifier.keras")

Epoch 1/20
196/196 - 46s - 236ms/step - loss: 0.5607 - sparse_categorical_accuracy: 0.7210 - val_loss: 0.3867 - val_sparse_categorical_accuracy: 0.8376
Epoch 2/20
196/196 - 41s - 208ms/step - loss: 0.3399 - sparse_categorical_accuracy: 0.8600 - val_loss: 0.3277 - val_sparse_categorical_accuracy: 0.8628
Epoch 3/20
196/196 - 41s - 211ms/step - loss: 0.2821 - sparse_categorical_accuracy: 0.8873 - val_loss: 0.3121 - val_sparse_categorical_accuracy: 0.8700
Epoch 4/20
196/196 - 43s - 220ms/step - loss: 0.2413 - sparse_categorical_accuracy: 0.9062 - val_loss: 0.3052 - val_sparse_categorical_accuracy: 0.8808
Epoch 5/20
196/196 - 42s - 212ms/step - loss: 0.2000 - sparse_categorical_accuracy: 0.9266 - val_loss: 0.3303 - val_sparse_categorical_accuracy: 0.8800
Epoch 6/20
196/196 - 41s - 207ms/step - loss: 0.1742 - sparse_categorical_accuracy: 0.9376 - val_loss: 0.3209 - val_sparse_categorical_accuracy: 0.8844
