In [None]:
!pip install -q --upgrade keras-nlp
!pip install -q --upgrade keras  # Upgrade to Keras 3.

In [None]:
!pip install keras-nlp

In [None]:
!pip install transformers

# 1. Inference with a Pre-trained Classifier

In [7]:
import keras_nlp

# Load a pre-trained sentiment analysis classifier with the chosen preset
classifier = keras_nlp.models.BertClassifier.from_preset("bert_tiny_en_uncased_sst2")

# Perform inference
predictions = classifier.predict(["Keras-NLP makes NLP easy and accessible!"])

# Display predictions
print(predictions)

Downloading from https://www.kaggle.com/api/v1/models/keras/bert/keras/bert_tiny_en_uncased_sst2/3/download/config.json...
100%|██████████| 2.14k/2.14k [00:00<00:00, 2.81MB/s]
Downloading from https://www.kaggle.com/api/v1/models/keras/bert/keras/bert_tiny_en_uncased_sst2/3/download/assets/tokenizer/vocabulary.txt...
100%|██████████| 226k/226k [00:00<00:00, 1.05MB/s]
Downloading from https://www.kaggle.com/api/v1/models/keras/bert/keras/bert_tiny_en_uncased_sst2/3/download/model.weights.h5...
100%|██████████| 16.8M/16.8M [00:00<00:00, 21.5MB/s]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
[[-0.6988769  0.5546421]]


# 2. Fine-tuning a Pre-trained Backbone

In [13]:
from transformers import BertTokenizer

# Load the pre-trained BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Tokenize input text
text = "Keras-NLP makes NLP easy and accessible!"
tokens = tokenizer.tokenize(text)
token_ids = tokenizer.convert_tokens_to_ids(tokens)

print(tokens)
print(token_ids)

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

['ke', '##ras', '-', 'nl', '##p', 'makes', 'nl', '##p', 'easy', 'and', 'accessible', '!']
[17710, 8180, 1011, 17953, 2361, 3084, 17953, 2361, 3733, 1998, 7801, 999]


# 3. Fine-tuning with User-controlled Preprocessing

In [None]:
!pip install tensorflow-datasets

In [54]:
import tensorflow as tf
import tensorflow_datasets as tfds
import keras_nlp

# Function to prepare features
def prepare_features(examples):
    return examples['sentence'], examples['label']

# Load and prepare datasets
ds_train = tfds.load('glue/sst2', split='train').map(prepare_features).batch(16).take(100) # Reduced batch size and dataset size
ds_test = tfds.load('glue/sst2', split='validation').map(prepare_features).batch(16).take(25) # Reduced batch size and dataset size

# Initialize and compile the BERT classifier
pretrained_classifier = keras_nlp.models.BertClassifier.from_preset("bert_tiny_en_uncased", num_classes=2)
pretrained_classifier.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])

# Fit the model
pretrained_classifier.fit(ds_train, validation_data=ds_test, epochs=1)


[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m175s[0m 2s/step - accuracy: 0.5943 - loss: 0.6700 - val_accuracy: 0.6975 - val_loss: 0.5857


<keras.src.callbacks.history.History at 0x7dd5003a6b30>

the model is learning and improving its ability to classify the given text data, as evidenced by the accuracy metrics. If the training time is a critical factor for your project, consider reviewing the above areas to find a good balance between speed and performance that fits your specific requirements and resources.

# 4. Fine-tuning a Custom Model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout

# Define the vocabulary size (you can choose a number, like the size of your tokenizer's word index)
vocab_size = len(tokenizer.word_index) + 1

# Define a simple LSTM model
custom_model = Sequential()
custom_model.add(Embedding(input_dim=vocab_size, output_dim=100, input_length=max_sequence_length))
custom_model.add(LSTM(128, return_sequences=True))
custom_model.add(Dropout(0.2))
custom_model.add(LSTM(64))
custom_model.add(Dense(16, activation='relu'))
custom_model.add(Dense(4, activation='softmax'))  # 4 classes

custom_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the custom model (adjust epochs and batch_size as needed)
custom_model.fit(X_train, y_train, epochs=5, batch_size=32, validation_data=(X_test, y_test))

# Make predictions
predictions = custom_model.predict(X_test)

# Convert these predictions into class labels
predicted_classes = np.argmax(predictions, axis=1)

# Calculate and print the accuracy
accuracy = accuracy_score(y_test, predicted_classes)
print(f"Custom Model Accuracy: {accuracy * 100:.2f}%")

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Custom Model Accuracy: 87.78%
