In [1]:
!pip install transformers datasets torch scikit-learn openai




In [2]:
import pandas as pd

# Load dataset
file_path = "C:\\Users\\Admin\\OneDrive\\Desktop\\customer_support_tickets.csv\\customer_support_tickets.csv"
df = pd.read_csv(file_path)

# Keep only necessary columns
df = df[["Ticket Description", "Ticket Type"]].dropna()

df.head()


Unnamed: 0,Ticket Description,Ticket Type
0,I'm having an issue with the {product_purchase...,Technical issue
1,I'm having an issue with the {product_purchase...,Technical issue
2,I'm facing a problem with my {product_purchase...,Technical issue
3,I'm having an issue with the {product_purchase...,Billing inquiry
4,I'm having an issue with the {product_purchase...,Billing inquiry


In [3]:
from transformers import pipeline

# Define classifier
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

# Define candidate labels (categories from dataset)
candidate_labels = df["Ticket Type"].unique().tolist()
print("Candidate Labels:", candidate_labels)

# Test on a sample ticket
sample_ticket = df["Ticket Description"].iloc[0]
result = classifier(sample_ticket, candidate_labels, multi_label=False)

print("Ticket:", sample_ticket)
print("Top Predictions:", list(zip(result["labels"], result["scores"])))





Device set to use cpu


Candidate Labels: ['Technical issue', 'Billing inquiry', 'Cancellation request', 'Product inquiry', 'Refund request']
Ticket: I'm having an issue with the {product_purchased}. Please assist.

Your billing zip code is: 71701.

We appreciate that you have requested a website address.

Please double check your email address. I've tried troubleshooting steps mentioned in the user manual, but the issue persists.
Top Predictions: [('Product inquiry', 0.40578052401542664), ('Billing inquiry', 0.34699854254722595), ('Technical issue', 0.19952958822250366), ('Refund request', 0.029203880578279495), ('Cancellation request', 0.018487442284822464)]


In [None]:
from sklearn.metrics import accuracy_score, top_k_accuracy_score

y_true = []
y_pred = []
all_probs = []

for desc, true_label in zip(df["Ticket Description"], df["Ticket Type"]):
    res = classifier(desc, candidate_labels, multi_label=False)
    y_true.append(true_label)
    y_pred.append(res["labels"][0])
    all_probs.append(res["scores"])

# Accuracy
acc = accuracy_score(y_true, y_pred)

# Top-3 Accuracy (manually check if true label is in top 3)
top3_correct = 0
for true, probs, labels in zip(y_true, all_probs, [classifier(d, candidate_labels)["labels"][:3] for d in df["Ticket Description"]]):
    if true in labels:
        top3_correct += 1
top3_acc = top3_correct / len(y_true)

print("Zero-Shot Accuracy:", round(acc, 3))
print("Zero-Shot Top-3 Accuracy:", round(top3_acc, 3))


In [None]:
args = TrainingArguments(
    output_dir="output",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=2,
    logging_dir="logs",
    logging_strategy="steps",   # <--- ensures logs appear
    logging_steps=10,           # <--- print every 10 steps
    load_best_model_at_end=True,
    metric_for_best_model="accuracy"
)

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    tokenizer=tokenizer,
)

train_result = trainer.train()
print(train_result)


In [None]:
from sklearn.metrics import classification_report, accuracy_score

# Get predictions
preds = trainer.predict(dataset["test"])
y_true = preds.label_ids
y_pred = np.argmax(preds.predictions, axis=1)

# Accuracy
acc = accuracy_score(y_true, y_pred)
print("Fine-Tuned Model Accuracy:", round(acc, 3))

# Classification report
print("\nClassification Report:")
print(classification_report(y_true, y_pred, target_names=labels))


In [None]:
import torch
import numpy as np

def get_top3_predictions(texts, model, tokenizer, labels, top_k=3):
    encodings = tokenizer(texts, truncation=True, padding=True, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**encodings)
        probs = torch.nn.functional.softmax(outputs.logits, dim=-1).numpy()
    
    results = []
    for i, prob in enumerate(probs):
        top_indices = prob.argsort()[-top_k:][::-1]
        top_labels = [labels[j] for j in top_indices]
        top_probs = [round(prob[j], 3) for j in top_indices]
        results.append({"Ticket": texts[i], "Top_3_Predictions": list(zip(top_labels, top_probs))})
    return results

# Run on test set
test_texts = dataset["test"]["Ticket Description"]
top3_results = get_top3_predictions(test_texts, model, tokenizer, labels)

# Display first 5
for r in top3_results[:5]:
    print("Ticket:", r["Ticket"])
    for label, prob in r["Top_3_Predictions"]:
        print(f"  {label}: {prob}")
    print("-"*50)
