In [None]:
!pip install -q --upgrade keras-nlp
!pip install -q --upgrade keras  # Upgrade to Keras 3.

In [2]:
import os

os.environ["KERAS_BACKEND"] = "jax"  # or "tensorflow" or "torch"

import keras_nlp
import keras

# Use mixed precision to speed up all training in this guide.
keras.mixed_precision.set_global_policy("mixed_float16")

## Retriving Sentiment Data and Preprocessing

In [4]:
import pandas as pd

# Load the dataset
dataset_path = '/content/Sentiment_NLP.csv'
df = pd.read_csv(dataset_path)

# Display the first few rows to understand its structure
print(df.head())

                                                text  sentiment
0  So there is no way for me to plug it in here i...          0
1                         Good case Excellent value.          1
2                             Great for the jawbone.          1
3  Tied to charger for conversations lasting more...          0
4                                  The mic is great.          1


In [5]:
from sklearn.model_selection import train_test_split

# Assuming the DataFrame has columns 'text' for the review text and 'label' for the sentiment
texts = df['text'].values
labels = df['sentiment'].values  # Ensure labels are numeric

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=42)

In [6]:
import tensorflow as tf

def make_dataset(data, labels, batch_size=16):
    dataset = tf.data.Dataset.from_tensor_slices((data, labels))
    dataset = dataset.batch(batch_size)
    return dataset

# Create training and testing datasets
BATCH_SIZE = 16
train_dataset = make_dataset(X_train, y_train, BATCH_SIZE)
test_dataset = make_dataset(X_test, y_test, BATCH_SIZE)

## Inference from a pretrained Classifier

 Outputs are the logits per class (e.g., [0, 0] is 50% chance of positive). The output is [negative, positive] for binary classification.

In [None]:
classifier = keras_nlp.models.BertClassifier.from_preset("bert_tiny_en_uncased_sst2")
# Note: batched inputs expected so must wrap string in iterable
classifier.predict(["I not only like playing hockey, I love it!"])

Downloading from https://www.kaggle.com/api/v1/models/keras/bert/keras/bert_tiny_en_uncased_sst2/3/download/config.json...
100%|██████████| 2.14k/2.14k [00:00<00:00, 353kB/s]
Downloading from https://www.kaggle.com/api/v1/models/keras/bert/keras/bert_tiny_en_uncased_sst2/3/download/assets/tokenizer/vocabulary.txt...
100%|██████████| 226k/226k [00:00<00:00, 2.20MB/s]
Downloading from https://www.kaggle.com/api/v1/models/keras/bert/keras/bert_tiny_en_uncased_sst2/3/download/model.weights.h5...
100%|██████████| 16.8M/16.8M [00:00<00:00, 43.9MB/s]
  trackable.load_own_variables(weights_store.get(inner_path))
  trackable.load_own_variables(weights_store.get(inner_path))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


array([[ 0.5522, -0.4211]], dtype=float16)

The above sentence is rather positive but maybe beacause of the 'not' in the sentence, the prediction is slightly towards negative class.

In [None]:
classifier.evaluate(test_dataset)

[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 988ms/step - loss: 0.4555 - sparse_categorical_accuracy: 0.8031


[0.4458002746105194, 0.8120300769805908]

About 81% accuracy without fine-tuning is really good. The model is well generalized.

## Finetuning a Pre-trained Backbone

All the backbone presets are listed here: https://keras.io/api/keras_nlp/models/

As bert_tiny_en_uncased is the smallest, we'll finetune it.

In [None]:
classifier = keras_nlp.models.BertClassifier.from_preset(
    "bert_tiny_en_uncased",
    num_classes=2
)

Downloading from https://www.kaggle.com/api/v1/models/keras/bert/keras/bert_tiny_en_uncased/2/download/config.json...
100%|██████████| 507/507 [00:00<00:00, 218kB/s]
Downloading from https://www.kaggle.com/api/v1/models/keras/bert/keras/bert_tiny_en_uncased/2/download/model.weights.h5...
100%|██████████| 16.8M/16.8M [00:00<00:00, 46.9MB/s]
Downloading from https://www.kaggle.com/api/v1/models/keras/bert/keras/bert_tiny_en_uncased/2/download/tokenizer.json...
100%|██████████| 547/547 [00:00<00:00, 1.03MB/s]
Downloading from https://www.kaggle.com/api/v1/models/keras/bert/keras/bert_tiny_en_uncased/2/download/assets/tokenizer/vocabulary.txt...
100%|██████████| 226k/226k [00:00<00:00, 2.13MB/s]


In [None]:
classifier.fit(
    train_dataset,
    validation_data=test_dataset,
    epochs=10
)

Epoch 1/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m368s[0m 4s/step - loss: 0.6406 - sparse_categorical_accuracy: 0.6756 - val_loss: 0.5437 - val_sparse_categorical_accuracy: 0.7594
Epoch 2/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m326s[0m 3s/step - loss: 0.5062 - sparse_categorical_accuracy: 0.7853 - val_loss: 0.3929 - val_sparse_categorical_accuracy: 0.8271
Epoch 3/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m348s[0m 3s/step - loss: 0.3662 - sparse_categorical_accuracy: 0.8621 - val_loss: 0.3734 - val_sparse_categorical_accuracy: 0.8471
Epoch 4/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m348s[0m 3s/step - loss: 0.3023 - sparse_categorical_accuracy: 0.8825 - val_loss: 0.3467 - val_sparse_categorical_accuracy: 0.8596
Epoch 5/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m348s[0m 3s/step - loss: 0.2366 - sparse_categorical_accuracy: 0.9124 - val_loss: 0.3521 - val_sparse_categorical_accuracy: 0.

<keras.src.callbacks.history.History at 0x794a18baf310>

We achieved a accuracy of ~87 only after 6 epochs, some good progress.

## Finetuning with User Controled Preprocessing

using bert_large_en_uncased for preprocessing the data. We can possibly choose any from the backbone presets: https://keras.io/api/keras_nlp/models/

In [7]:
import tensorflow as tf

#Defining the preprocessor
preprocessor = keras_nlp.models.BertPreprocessor.from_preset(
    "bert_large_en_uncased",
    sequence_length=512,
)

Downloading from https://www.kaggle.com/api/v1/models/keras/bert/keras/bert_large_en_uncased/2/download/tokenizer.json...
100%|██████████| 547/547 [00:00<00:00, 242kB/s]
Downloading from https://www.kaggle.com/api/v1/models/keras/bert/keras/bert_large_en_uncased/2/download/assets/tokenizer/vocabulary.txt...
100%|██████████| 226k/226k [00:00<00:00, 2.06MB/s]


In [22]:
#Using the preprocessor on train and test sets
train = (
    train_dataset.map(preprocessor, tf.data.AUTOTUNE).cache().prefetch(tf.data.AUTOTUNE)
)
test = (
    test_dataset.map(preprocessor, tf.data.AUTOTUNE).cache().prefetch(tf.data.AUTOTUNE)
)

In [23]:
#Keeping the classifier same for better comparision
classifier = keras_nlp.models.BertClassifier.from_preset(
    "bert_tiny_en_uncased", preprocessor=None, num_classes=2
)

In [18]:
#training the classifier on train data
classifier.fit(
    train,
    validation_data = test,
    epochs=6,
)

Epoch 1/6
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m386s[0m 4s/step - loss: 0.6966 - sparse_categorical_accuracy: 0.5163 - val_loss: 0.6725 - val_sparse_categorical_accuracy: 0.6065
Epoch 2/6
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m319s[0m 3s/step - loss: 0.6527 - sparse_categorical_accuracy: 0.6757 - val_loss: 0.5444 - val_sparse_categorical_accuracy: 0.7644
Epoch 3/6
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m366s[0m 4s/step - loss: 0.5192 - sparse_categorical_accuracy: 0.7895 - val_loss: 0.3999 - val_sparse_categorical_accuracy: 0.8471
Epoch 4/6
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m347s[0m 3s/step - loss: 0.4277 - sparse_categorical_accuracy: 0.8223 - val_loss: 0.3370 - val_sparse_categorical_accuracy: 0.8672
Epoch 5/6
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m321s[0m 3s/step - loss: 0.3654 - sparse_categorical_accuracy: 0.8603 - val_loss: 0.3246 - val_sparse_categorical_accuracy: 0.8747


<keras.src.callbacks.history.History at 0x7e27183db160>

the performance went up by ~2.5 percent for the same no. of epochs, some good progress.

## Finetuning a Custom Model.

In [24]:
#kepping the backbone and preprocessor same to compare the results with other approaches
backbone = keras_nlp.models.BertBackbone.from_preset("bert_tiny_en_uncased")

backbone.trainable = False
inputs = backbone.input
sequence = backbone(inputs)["sequence_output"]
for _ in range(2):
    sequence = keras_nlp.layers.TransformerEncoder(
        num_heads=4,  # Adjusted number of heads
        intermediate_dim=256,  # Adjusted intermediate dimension
        dropout=0.2,  # Adjusted dropout rate
    )(sequence)

# Adding a dropout layer before the output layer for regularization
sequence = keras.layers.Dropout(0.5)(sequence[:, backbone.cls_token_index, :])
outputs = keras.layers.Dense(2)(sequence)

In [25]:
model = keras.Model(inputs, outputs)
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(1e-4),  # Adjusted optimizer and learning rate
    metrics=[keras.metrics.SparseCategoricalAccuracy()],
    jit_compile=True,  # Ensure it's supported by your TensorFlow version
)

In [26]:
model.summary()

In [27]:
# Training the model
model.fit(
    train,
    validation_data=test,
    epochs=6,
)

Epoch 1/6
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m631s[0m 6s/step - loss: 1.1358 - sparse_categorical_accuracy: 0.5074 - val_loss: 0.6148 - val_sparse_categorical_accuracy: 0.6466
Epoch 2/6
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m592s[0m 6s/step - loss: 0.8394 - sparse_categorical_accuracy: 0.5799 - val_loss: 0.5671 - val_sparse_categorical_accuracy: 0.6967
Epoch 3/6
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m588s[0m 6s/step - loss: 0.7152 - sparse_categorical_accuracy: 0.6286 - val_loss: 0.5496 - val_sparse_categorical_accuracy: 0.7093
Epoch 4/6
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m546s[0m 5s/step - loss: 0.7134 - sparse_categorical_accuracy: 0.6168 - val_loss: 0.5342 - val_sparse_categorical_accuracy: 0.7218
Epoch 5/6
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m557s[0m 6s/step - loss: 0.6524 - sparse_categorical_accuracy: 0.6526 - val_loss: 0.5247 - val_sparse_categorical_accuracy: 0.7168


<keras.src.callbacks.history.History at 0x79eda9943640>

Though the performance is way bad as per the first look (about a 15 percentage point dip from the previous performance), the performance is decent considering similar training time and only about 5% trainable parameters.