In [1]:
from transformers import BertTokenizer, TFBertForSequenceClassification
from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
import pandas as pd

# Load data
data = pd.read_json('Intent.json')
sentences = [item for sublist in data['intents'] for item in sublist['text']]
labels = [idx for idx, sublist in enumerate(data['intents']) for _ in sublist['text']]

# Encode labels
num_labels = len(data['intents'])
labels = tf.keras.utils.to_categorical(labels, num_classes=num_labels)

# Split data
train_texts, val_texts, train_labels, val_labels = train_test_split(sentences, labels, test_size=0.2)

# Tokenize
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
train_encodings = tokenizer(train_texts, truncation=True, padding=True, max_length=128, return_tensors="tf")
val_encodings = tokenizer(val_texts, truncation=True, padding=True, max_length=128, return_tensors="tf")

# Load model
model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=num_labels)

# Optimizer and loss function
optimizer = Adam(learning_rate=5e-5)
loss_fn = tf.keras.losses.CategoricalCrossentropy(from_logits=True)

# Prepare dataset
train_dataset = tf.data.Dataset.from_tensor_slices((
    {'input_ids': train_encodings['input_ids'], 'attention_mask': train_encodings['attention_mask']},
    train_labels
)).shuffle(len(train_texts)).batch(16)

val_dataset = tf.data.Dataset.from_tensor_slices((
    {'input_ids': val_encodings['input_ids'], 'attention_mask': val_encodings['attention_mask']},
    val_labels
)).batch(16)

# Training loop
epochs = 5
for epoch in range(epochs):
    print(f"Epoch {epoch + 1}/{epochs}")
    # Training
    for batch in train_dataset:
        with tf.GradientTape() as tape:
            outputs = model(batch[0], training=True)
            loss = loss_fn(batch[1], outputs.logits)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    print(f"Training loss: {loss.numpy()}")

    # Validation
    val_loss = 0
    for batch in val_dataset:
        outputs = model(batch[0], training=False)
        val_loss += loss_fn(batch[1], outputs.logits).numpy()
    val_loss /= len(val_dataset)
    print(f"Validation loss: {val_loss}")


  from .autonotebook import tqdm as notebook_tqdm






All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5
Training loss: 3.3273134231567383
Validation loss: 3.3252113130357532
Epoch 2/5
Training loss: 2.5487217903137207
Validation loss: 2.4756075541178384
Epoch 3/5
Training loss: 1.622786283493042
Validation loss: 1.7161619398328993
Epoch 4/5
Training loss: 1.29448401927948
Validation loss: 1.2113480303022597
Epoch 5/5
Training loss: 0.7105638384819031
Validation loss: 0.943240225315094


In [2]:
# Validation
val_loss = 0
correct_predictions = 0
total_predictions = 0

for batch in val_dataset:
    outputs = model(batch[0], training=False)
    val_loss += loss_fn(batch[1], outputs.logits).numpy()

    # Compute predictions and accuracy
    predictions = tf.argmax(outputs.logits, axis=1)
    labels = tf.argmax(batch[1], axis=1)
    correct_predictions += tf.reduce_sum(tf.cast(predictions == labels, tf.float32)).numpy()
    total_predictions += labels.shape[0]

val_loss /= len(val_dataset)
accuracy = correct_predictions / total_predictions
print(f"Validation loss: {val_loss}, Accuracy: {accuracy}")


Validation loss: 0.943240225315094, Accuracy: 0.7651515151515151


In [3]:
# Function to test the model with a single input
def test_model(sentence):
    # Tokenize the input
    encoding = tokenizer(sentence, truncation=True, padding=True, max_length=128, return_tensors="tf")
    outputs = model(encoding)
    logits = outputs.logits

    # Get predicted class
    predicted_class = tf.argmax(logits, axis=1).numpy()[0]
    return predicted_class

# Test an example sentence
example_sentence = "The lack of taste is making it hard to enjoy anything, even drinks."
predicted_class = test_model(example_sentence)

# Map the predicted class to the intent
intent = data['intents'][predicted_class]['intent']
print(f"Predicted intent: {intent}")


Predicted intent: Loss of Taste


In [4]:
import pickle

In [5]:
# Map the predicted class to the intent
intent = data['intents'][predicted_class]['intent']
print(f"Predicted intent: {intent}")

# Save the model and tokenizer
model.save_pretrained('./saved_model')
tokenizer.save_pretrained('./saved_model')

# Pickle the label mappings
label_mapping = {i: intent['intent'] for i, intent in enumerate(data['intents'])}
with open('label_mapping.pkl', 'wb') as f:
    pickle.dump(label_mapping, f)

print("Model and label mapping saved successfully!")

Predicted intent: Loss of Taste
Model and label mapping saved successfully!
