## Load and Clean Data

In [3]:
# Import required libraries
import pandas as pd

# Load the dataset
file_path = "customer_support_tickets.csv"  # If in same directory
df = pd.read_csv(file_path)


# Keep only the columns we care about
df = df[['Ticket ID', 'Ticket Type', 'Ticket Description']]

# Drop rows with missing values in essential columns
df = df.dropna(subset=['Ticket Type', 'Ticket Description'])

# Remove duplicates (if any)
df = df.drop_duplicates()

# Reset index
df = df.reset_index(drop=True)


print("Total Tickets:", len(df))
print("Unique Tags (Ticket Types):", df['Ticket Type'].nunique())
print("\nSample Ticket:\n")
print(df.iloc[0])


Total Tickets: 8469
Unique Tags (Ticket Types): 5

Sample Ticket:

Ticket ID                                                             1
Ticket Type                                             Technical issue
Ticket Description    I'm having an issue with the {product_purchase...
Name: 0, dtype: object


##  Zero-Shot LLM Classification

In [5]:
from transformers import pipeline

# Load model
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

# Get unique ticket types (tags)
candidate_labels = df["Ticket Type"].unique().tolist()

# Classify a small sample (e.g. first 5 tickets)
results = []
for _, row in df.head(5).iterrows():
    description = row["Ticket Description"]
    actual_tag = row["Ticket Type"]
    
    prediction = classifier(description, candidate_labels, multi_label=True)
    top3 = sorted(zip(prediction['labels'], prediction['scores']), key=lambda x: -x[1])[:3]
    
    results.append({
        "Ticket": description[:150] + "...",  # shorten for display
        "Actual Tag": actual_tag,
        "Top 3 Predicted Tags": top3
    })

# Convert results to DataFrame
pd.DataFrame(results)







Unnamed: 0,Ticket,Actual Tag,Top 3 Predicted Tags
0,I'm having an issue with the {product_purchase...,Technical issue,"[(Technical issue, 0.8724848031997681), (Billi..."
1,I'm having an issue with the {product_purchase...,Technical issue,"[(Product inquiry, 0.9132856130599976), (Techn..."
2,I'm facing a problem with my {product_purchase...,Technical issue,"[(Product inquiry, 0.8779209852218628), (Techn..."
3,I'm having an issue with the {product_purchase...,Billing inquiry,"[(Product inquiry, 0.7847293019294739), (Techn..."
4,I'm having an issue with the {product_purchase...,Billing inquiry,"[(Product inquiry, 0.9413691163063049), (Techn..."


In [10]:
# !pip install -q transformers



[notice] A new release of pip is available: 24.2 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


##  Load flan-t5-base (small and CPU-optimized)

In [7]:
# !pip install -q transformers

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

model_id = "google/flan-t5-base"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForSeq2SeqLM.from_pretrained(model_id)



[notice] A new release of pip is available: 24.2 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


## Few-Shot Prompt + Inference Code

In [14]:
def classify_ticket(ticket):
    prompt = f"""
Classify the following support ticket into one of the following categories:
- Account access
- Billing inquiry
- Technical issue

Ticket: "{ticket}"
Category:"""

    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(**inputs, max_new_tokens=20)
    prediction = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
    return prediction


### Test on First 5 Tickets

In [16]:
for i in range(5):
    ticket = df.loc[i, 'Ticket Description']
    actual = df.loc[i, 'Ticket Type']
    predicted = classify_ticket(ticket)
    
    print(f"\n🎫 Ticket: {ticket[:80]}...")
    print(f"✅ Actual: {actual}")
    print(f"🤖 Predicted: {predicted}")



🎫 Ticket: I'm having an issue with the {product_purchased}. Please assist.

Your billing z...
✅ Actual: Technical issue
🤖 Predicted: Technical issue

🎫 Ticket: I'm having an issue with the {product_purchased}. Please assist.

If you need to...
✅ Actual: Technical issue
🤖 Predicted: Technical issue

🎫 Ticket: I'm facing a problem with my {product_purchased}. The {product_purchased} is not...
✅ Actual: Technical issue
🤖 Predicted: Technical issue

🎫 Ticket: I'm having an issue with the {product_purchased}. Please assist.

If you have a ...
✅ Actual: Billing inquiry
🤖 Predicted: Technical issue

🎫 Ticket: I'm having an issue with the {product_purchased}. Please assist.


Note: The sel...
✅ Actual: Billing inquiry
🤖 Predicted: Technical issue
