In [2]:
import pandas as pd
import numpy as np

In [3]:
data=[
    {"payment": "I want to make a payment using Khalti."},
    {"payment": "Can I pay through eSewa?"},
    {"payment": "How do I use my debit card for payment?"},
    {"payment": "Is it possible to pay on delivery?"},
    {"payment": "Do you accept payments via Fonepay?"},
    {"payment": "Can I use my credit card for the payment?"},
    {"payment": "What payment options are available?"},
    {"payment": "How can I pay using IME Pay?"},
    {"payment": "Can I use mobile banking for the payment?"},
    {"payment": "Is there an option to pay via PayPal?"},
     {"payment": "Can I use mobile banking for the payment?"},
    {"payment": "Is there an option to pay via PayPal?"},
    {"payment": "Can I pay using ConnectIPS?"},
    {"payment": "Is there any discount for using digital payment?"},
    {"payment": "How do I pay using my mobile wallet?"},
    {"payment": "Can I split the payment between cash and card?"},
    {"payment": "Can I pay using a QR code?"},
    {"payment": "Is Google Pay accepted for payment?"},
    {"payment": "Do you have an option for EMI payments?"},
    {"payment": "Can I pay using my international credit card?"},
    {"payment": "Do you accept Bitcoin or other cryptocurrencies?"},
    {"payment": "Can I pay in installments?"},
    {"cancel_order": "I would like to cancel my order for chicken momo."},
    {"cancel_order": "Can I cancel the order I just placed?"},
    {"cancel_order": "How do I cancel my order for chatamari?"},
    {"cancel_order": "Please cancel my order for sel roti."},
    {"cancel_order": "I need to cancel my order for thukpa."},
    {"cancel_order": "Is it possible to cancel my order now?"},
    {"cancel_order": "I want to cancel my order for samosa."},
    {"cancel_order": "How can I cancel the order for bara?"},
    {"cancel_order": "Please cancel my order for yomari."},
    {"cancel_order": "I need to cancel my order for pani puri."},
      {"cancel_order": "Can I cancel my order for lassi?"},
    {"cancel_order": "I changed my mind, please cancel my order for chatpate."},
    {"cancel_order": "Is it too late to cancel my order for gundruk?"},
    {"cancel_order": "I want to cancel my order for chatamari immediately."},
    {"cancel_order": "I ordered the wrong item, can I cancel it?"},
    {"cancel_order": "How do I cancel my order for sel roti online?"},
    {"cancel_order": "I accidentally placed an order, please cancel it."},
    {"cancel_order": "Please cancel my entire order."},
    {"cancel_order": "Can I cancel my order and get a refund?"},
    {"cancel_order": "I need to cancel my order for kwati."},
    {"food_queries": "I want to know the price of chicken momo."},
    {"food_queries": "What ingredients are used in chatamari?"},
    {"food_queries": "Do you have vegetarian options for thukpa?"},
    {"food_queries": "Can you tell me the price of sel roti?"},
    {"food_queries": "Is samosa available right now?"},
    {"food_queries": "What are the popular street foods in Kathmandu?"},
    {"food_queries": "Can I get more information about yomari?"},
    {"food_queries": "What is the price of a plate of pani puri?"},
    {"food_queries": "Do you have any discounts on bara?"},
    {"food_queries": "Are there any vegan options available?"},
    {"food_queries": "What is the price of a plate of pani puri?"},
    {"food_queries": "Do you have any discounts on bara?"},
    {"food_queries": "Are there any vegan options available?"},
    {"food_queries": "Do you serve jhol momo?"},
    {"food_queries": "Is chatpate available for delivery?"},
    {"food_queries": "What are the ingredients in laphing?"},
    {"food_queries": "Do you offer delivery for kwati?"},
    {"food_queries": "What is the cost of a lassi?"},
    {"food_queries": "Can you tell me more about gundruk?"},
    {"food_queries": "Do you have any special offers on sel roti?"},
    {"food_queries": "Are there gluten-free options available?"},
    {"food_queries": "How spicy is your thukpa?"},
    {"food_queries": "What is the best time to order samosa?"}
]


In [4]:
# Transform the data into the desired format
formatted_data = []
for item in data:
    for category, message in item.items():
        formatted_data.append({"Category": category, "Message": message})

# Create DataFrame from the formatted data
df = pd.DataFrame(formatted_data)


In [7]:
df

Unnamed: 0,Category,Message
0,payment,I want to make a payment using Khalti.
1,payment,Can I pay through eSewa?
2,payment,How do I use my debit card for payment?
3,payment,Is it possible to pay on delivery?
4,payment,Do you accept payments via Fonepay?
...,...,...
60,food_queries,Can you tell me more about gundruk?
61,food_queries,Do you have any special offers on sel roti?
62,food_queries,Are there gluten-free options available?
63,food_queries,How spicy is your thukpa?


In [5]:
df_input=df[['Message']]
df_output=df[['Category']]
df_input

In [6]:
from sklearn.model_selection import train_test_split
train_df, test_df=train_test_split(df, test_size=0.2, random_state=42, stratify=df['Category'])

In [8]:
from transformers import BertTokenizer

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

def preprocess_data(data):
    return tokenizer(
        data['Message'].tolist(),
        padding=True,
        truncation=True,
        return_tensors='pt'
    )

train_encodings = preprocess_data(train_df)
test_encodings = preprocess_data(test_df)

# Convert labels to indices
label_to_id = {label: idx for idx, label in enumerate(df['Category'].unique())}
train_labels = [label_to_id[label] for label in train_df['Category']]
test_labels = [label_to_id[label] for label in test_df['Category']]

In [10]:
import tensorflow as tf

train_dataset = tf.data.Dataset.from_tensor_slices((
    dict(train_encodings),
    train_labels
)).shuffle(1000).batch(16)

test_dataset = tf.data.Dataset.from_tensor_slices((
    dict(test_encodings),
    test_labels
)).batch(16)


In [12]:
from transformers import TFBertModel
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.metrics import SparseCategoricalAccuracy

class TFBertClassifier(tf.keras.Model):
    def __init__(self, num_classes):
        super(TFBertClassifier, self).__init__()
        self.bert = TFBertModel.from_pretrained('bert-base-uncased')
        self.dropout = Dropout(0.3)
        self.classifier = Dense(num_classes, activation='softmax')

    def call(self, inputs):
        input_ids, attention_mask = inputs['input_ids'], inputs['attention_mask']
        outputs = self.bert(input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output
        dropout_output = self.dropout(pooled_output)
        return self.classifier(dropout_output)

# Initialize the model, optimizer, and loss function
num_classes = len(label_to_id)
model = TFBertClassifier(num_classes=num_classes)

optimizer = Adam(learning_rate=2e-5)
loss = SparseCategoricalCrossentropy()
metric = SparseCategoricalAccuracy()

# Compile the model
model.compile(optimizer=optimizer, loss=loss, metrics=[metric])

# Train the model
model.fit(train_dataset, validation_data=test_dataset, epochs=5)


Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7e0e5e4d3460>

# Inference

In [15]:
def preprocess_and_encode(text, tokenizer):
    # Tokenize the text
    encodings = tokenizer(
        [text],
        padding=True,
        truncation=True,
        return_tensors='tf'
    )
    return encodings

def predict(text, model, tokenizer, label_to_id):
    # Preprocess and encode the input text
    encodings = preprocess_and_encode(text, tokenizer)

    # Extract tensors from the encodings
    input_ids = encodings['input_ids']
    attention_mask = encodings['attention_mask']

    # Make predictions
    predictions = model.predict({'input_ids': input_ids, 'attention_mask': attention_mask})

    # Decode the predictions to get the category labels
    predicted_label_id = tf.argmax(predictions, axis=1).numpy()[0]
    id_to_label = {v: k for k, v in label_to_id.items()}
    predicted_label = id_to_label[predicted_label_id]

    return predicted_label


In [16]:
# Test the prediction function with a few examples
examples = [
    "How do I cancel my order for chatamari?",
    "I want to make a payment using eSewa.",
    "What is the price of chicken momo?"
]

for example in examples:
    predicted_label = predict(example, model, tokenizer, label_to_id)
    print(f"Input: {example}")
    print(f"Predicted Category: {predicted_label}")
    print()


Input: How do I cancel my order for chatamari?
Predicted Category: cancel_order

Input: I want to make a payment using eSewa.
Predicted Category: payment

Input: What is the price of chicken momo?
Predicted Category: food_queries

