In [None]:
# Code demo for the business case.

In [None]:
# Install these libraries if not dopne already. 
!pip install transformers torch 

In [7]:
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments, pipeline
import torch
from torch.utils.data import DataLoader, Dataset
import numpy as np

# Define sample data
data = [
    {"text": "I want to return my order", "label": 0},  # returns
    {"text": "Can I know the delivery status?", "label": 1},  # product inquiries
    {"text": "My product is defective", "label": 2}  # complaints
]

# Map label indices to categories
label_map = {0: "returns", 1: "product inquiries", 2: "complaints"}

# Load pre-trained BERT tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3)

# Tokenize and preprocess data
class SampleDataset(Dataset):
    def __init__(self, data, tokenizer):
        self.data = data
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data[idx]
        inputs = self.tokenizer(item["text"], truncation=True, padding='max_length', max_length=32, return_tensors="pt")
        inputs = {k: v.squeeze() for k, v in inputs.items()}  # Squeeze to remove extra dimension
        inputs['labels'] = torch.tensor(item["label"], dtype=torch.long)
        return inputs

# Create Dataset and DataLoader
dataset = SampleDataset(data, tokenizer)

# Set up minimal training for fine-tuning
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="no",
    per_device_train_batch_size=2,
    num_train_epochs=20,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
)

# Train the model (this is minimal; actual fine-tuning requires more data and epochs)
trainer.train()

# Use Hugging Face pipeline for prediction
nlp_pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer)

# Predict categories for new queries
new_queries = ["I received the wrong item", "Where is my package?", "Can I return this?", "I received a defective product"]
for query in new_queries:
    result = nlp_pipeline(query)[0]
    predicted_label = int(result['label'].split('_')[-1])  # Get label index from 'LABEL_0', 'LABEL_1', etc.
    category = label_map[predicted_label]  # Map label to category
    print(f"Query: '{query}'\nPredicted Category: {category}\n")

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly i

Step,Training Loss


Query: 'I received the wrong item'
Predicted Category: complaints

Query: 'Where is my package?'
Predicted Category: product inquiries

Query: 'Can I return this?'
Predicted Category: product inquiries

Query: 'I received a defective product'
Predicted Category: complaints

