In [5]:
pip install transformers

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Collecting transformers
  Downloading transformers-4.49.0-py3-none-any.whl.metadata (44 kB)
Collecting filelock (from transformers)
  Downloading filelock-3.18.0-py3-none-any.whl.metadata (2.9 kB)
Collecting huggingface-hub<1.0,>=0.26.0 (from transformers)
  Downloading huggingface_hub-0.29.3-py3-none-any.whl.metadata (13 kB)
Collecting regex!=2019.12.17 (from transformers)
  Downloading regex-2024.11.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (40 kB)
Collecting tokenizers<0.22,>=0.21 (from transformers)
  Downloading tokenizers-0.21.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)
Collecting safetensors>=0.4.1 (from transformers)
  Downloading safetensors-0.5.3-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)
Downloading transformers-4.49.0-py3-none-any.whl (10.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.0/

In [1]:
import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_hub as hub
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split

dataset = tfds.load("imdb_reviews", split="train", as_supervised=True)
texts, labels = [], []
for text, label in dataset:
    texts.append(text.numpy().decode('utf-8'))
    labels.append(int(label.numpy()))

# Split dataset
train_texts, val_texts, train_labels, val_labels = train_test_split(texts, labels, test_size=0.2, random_state=42)
train_data = tf.data.Dataset.from_tensor_slices((train_texts, train_labels))
val_data = tf.data.Dataset.from_tensor_slices((val_texts, val_labels))

# Preprocessing 
def preprocess_text(text, label):
    text = tf.strings.lower(text)
    text = tf.strings.regex_replace(text, r"<br\s*/?>", " ")  
    return text, label
train_data = train_data.map(preprocess_text)
val_data = val_data.map(preprocess_text)

BUFFER_SIZE = 10000
BATCH_SIZE = 64

train_data = train_data.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
val_data = val_data.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

# Text Vectorization
vectorizer = layers.TextVectorization(max_tokens=10000, output_mode="int", output_sequence_length=250)
vectorizer.adapt(train_data.map(lambda text, label: text))

# Model with regularization
model = keras.Sequential([
    keras.Input(shape=(1,), dtype=tf.string),
    vectorizer,
    layers.Embedding(input_dim=10000, output_dim=16),
    layers.Dropout(0.3),  
    layers.GlobalAveragePooling1D(),
    layers.Dense(16, activation="relu", kernel_regularizer=tf.keras.regularizers.l2(0.01)),  
    layers.Dense(1, activation="sigmoid")
])

# Model Compile 
optimizer = keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=["accuracy"])
early_stopping = keras.callbacks.EarlyStopping(
    monitor="val_loss", patience=3, restore_best_weights=True
)

# Model Training
history = model.fit(train_data, validation_data=val_data, epochs=15, callbacks=[early_stopping])

2025-03-15 19:58:53.850730: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1742083133.862292    4437 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1742083133.865609    4437 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1742083133.874574    4437 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1742083133.874589    4437 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1742083133.874590    4437 computation_placer.cc:177] computation placer alr

Epoch 1/15
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 11ms/step - accuracy: 0.5720 - loss: 0.7823 - val_accuracy: 0.6150 - val_loss: 0.6795
Epoch 2/15
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - accuracy: 0.7224 - loss: 0.6467 - val_accuracy: 0.8180 - val_loss: 0.5482
Epoch 3/15
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - accuracy: 0.8297 - loss: 0.5163 - val_accuracy: 0.8370 - val_loss: 0.4762
Epoch 4/15
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - accuracy: 0.8526 - loss: 0.4467 - val_accuracy: 0.8328 - val_loss: 0.4441
Epoch 5/15
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step - accuracy: 0.8647 - loss: 0.4126 - val_accuracy: 0.8608 - val_loss: 0.4082
Epoch 6/15
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 18ms/step - accuracy: 0.8745 - loss: 0.3840 - val_accuracy: 0.8668 - val_loss: 0.3894
Epoch 7/15
[1m313/313

In [4]:
# Fine-tuning
embedding_layer = hub.KerasLayer("https://tfhub.dev/google/tf2-preview/gnews-swivel-20dim/1",
                                 trainable=True, input_shape=[], dtype=tf.string)

# Model Compile 
optimizer = keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=["accuracy"])
early_stopping = keras.callbacks.EarlyStopping(
    monitor="val_loss", patience=5, restore_best_weights=True
)

# Model Training
history = model.fit(train_data, validation_data=val_data, epochs=15, callbacks=[early_stopping])

Epoch 1/15
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 12ms/step - accuracy: 0.9345 - loss: 0.2291 - val_accuracy: 0.8834 - val_loss: 0.3203
Epoch 2/15
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - accuracy: 0.9323 - loss: 0.2256 - val_accuracy: 0.8810 - val_loss: 0.3199
Epoch 3/15
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step - accuracy: 0.9407 - loss: 0.2159 - val_accuracy: 0.8830 - val_loss: 0.3194
Epoch 4/15
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - accuracy: 0.9419 - loss: 0.2103 - val_accuracy: 0.8830 - val_loss: 0.3168
Epoch 5/15
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step - accuracy: 0.9402 - loss: 0.2111 - val_accuracy: 0.8818 - val_loss: 0.3191
Epoch 6/15
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step - accuracy: 0.9449 - loss: 0.2013 - val_accuracy: 0.8812 - val_loss: 0.3189
Epoch 7/15
[1m313/313

In [6]:
# Save the model
model.save("savedModel.keras")