# Auto Tagging Support Tickets Using LLM

in below notebook, I tried zero-shot, and few-shot prompts and saw the difference. The few-shot prompt which I give multiple examples to LLM was accurate and it also help hallucinations by some degree.

In [None]:
from datasets import load_dataset

ticketing_corpus = load_dataset(
    "Tobi-Bueck/customer-support-tickets",
    streaming=True
)

dataset = (
    ticketing_corpus["train"]
    .filter(lambda x: x["language"] == "en")
    .take(200)
)

In [None]:
dataset = list(dataset)

In [None]:
dataset[:1]

In [None]:
def extract_tags(ticket):
    return [
        ticket[f"tag_{i}"]
        for i in range(1, 11)
        if ticket.get(f"tag_{i}") not in [None, ""]
    ]

ALL_TAGS = sorted(
    list({tag for x in dataset for tag in extract_tags(x)})
)

print(f"Total unique tags: {len(ALL_TAGS)}")
print(ALL_TAGS[:10])

In [None]:
def zero_shot_prompt(ticket, tags):
    return f"""
You are a helpdesk ticket tagging system.

Your task is to assign tags to a support ticket based ONLY on the provided information.
Do NOT use external knowledge.
Do NOT invent new tags.

====================
TICKET DETAILS
====================
Queue (Department): {ticket['queue']}
Priority (Urgency): {ticket['priority']}
Ticket Type: {ticket['type']}
Language: {ticket['language']}

Subject:
{ticket['subject']}

Email Body:
{ticket['body']}

====================
TASK
====================
Select the TOP 3 most relevant tags for this ticket.

- Tags represent issue categories (e.g. Account, Outage, Billing, Software Bug).
- Choose ONLY from the available tag list.
- Rank tags from most relevant to least relevant.
- Use at most 3 tags.

====================
AVAILABLE TAGS
====================
{", ".join(tags)}

====================
OUTPUT FORMAT
====================
Return ONLY a valid JSON array.
Example:
['Account', 'Alert', 'Analytics']
"""

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

model_id = "facebook/opt-125m"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype=torch.float16
)

def generate(prompt):
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=120,
        temperature=0.2
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [None]:
ticket = dataset[0]
prompt = zero_shot_prompt(ticket, ALL_TAGS)
print(generate(prompt))

In [None]:
def build_few_shot_examples(dataset, n=3):
    """
    Build few-shot examples for small/medium LLMs.

    Args:
        dataset: list of tickets (dict)
        n: number of examples to include

    Returns:
        str: formatted few-shot examples
    """
    examples = []
    for x in dataset[:n]:
        tags = extract_tags(x)  # returns list of non-empty tags
        examples.append(
            f"""
Ticket:
Queue (Department): {x['queue']}
Priority (Urgency): {x['priority']}
Ticket Type: {x['type']}
Language: {x['language']}

Subject:
{x['subject']}

Email Body:
{x['body']}

Correct Tags:
{tags}
"""
        )
    return "\n".join(examples)

In [None]:
def few_shot_prompt(ticket, tags, examples):
    return f"""
You are an expert helpdesk ticket classifier.

Use ONLY the information provided.
Do NOT invent new tags. Choose ONLY from the tag list below.

====================
FEW-SHOT EXAMPLES
====================
{examples}

====================
TICKET TO CLASSIFY
====================
Queue (Department): {ticket['queue']}
Priority (Urgency): {ticket['priority']}
Ticket Type: {ticket['type']}
Language: {ticket['language']}

Subject:
{ticket['subject']}

Email Body:
{ticket['body']}

====================
TASK
====================
Select the TOP 3 most relevant tags for this ticket.

- Tags represent issue categories (e.g. Account, Outage, Billing, Software Bug).
- Rank tags from most relevant to least relevant.
- Use at most 3 tags.

====================
AVAILABLE TAGS
====================
{", ".join(tags)}

====================
OUTPUT FORMAT
====================
Return ONLY a valid JSON array.
Example:
["Account", "Outage", "Disruption"]
"""

In [None]:
examples = build_few_shot_examples(dataset, n=4)
prompt = few_shot_prompt(ticket, ALL_TAGS, examples)

print(generate(prompt))

In [None]:
def format_training_example(x):
    """
    Format a single ticket for instruction fine-tuning.

    Args:
        x: dict representing a ticket

    Returns:
        dict: {"text": formatted instruction + example}
    """
    return {
        "text": f"""
You are a helpdesk ticket classifier.

Classify the following ticket and assign the TOP 3 most relevant tags.

====================
TICKET DETAILS
====================
Queue (Department): {x['queue']}
Priority (Urgency): {x['priority']}
Ticket Type: {x['type']}
Business Type: {x['business_type']}
Language: {x['language']}

Subject:
{x['subject']}

Email Body:
{x['body']}

====================
EXPECTED TAGS
====================
{extract_tags(x)}
"""
    }

In [None]:
from peft import LoraConfig, get_peft_model
from transformers import Trainer, TrainingArguments

lora = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.1,
    target_modules=["q_proj", "v_proj"],
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora)

In [None]:
training_args = TrainingArguments(
    output_dir="./ticket-tagging",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    num_train_epochs=1,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=10
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    tokenizer=tokenizer
)

trainer.train()

In [None]:
prompt = zero_shot_prompt(ticket, ALL_TAGS)
print(generate(prompt))