In [2]:
# --- TASK 5: ZERO-SHOT ONLY (Speed Run) ---

# # 1. Install Dependencies
# !pip install transformers torch pandas scikit-learn -q

import pandas as pd
from transformers import pipeline
from sklearn.metrics import accuracy_score

# ==========================================
# üõ†Ô∏è STEP 1: LOAD & PREPARE DATA
# ==========================================
filename = "tickets-dataset.csv"

print(f"‚è≥ Loading {filename}...")
try:
    df = pd.read_csv(filename)

    # 1. Filter for English ('en') only
    df = df[df['language'] == 'en'].copy()

    # 2. Fix Missing Values (The Cause of previous errors)
    df['subject'] = df['subject'].fillna('')
    df['body'] = df['body'].fillna('')

    # 3. Create Input Text & Truncate
    # We take the first 512 characters to avoid "Model too long" errors
    df['text'] = (df['subject'] + ": " + df['body']).str.slice(0, 512)

    # 4. Set the Answer Key
    df['true_label'] = df['queue']
    candidate_labels = list(df['queue'].unique())

    # 5. Sample 50 rows for quick testing
    df_sample = df.sample(n=50, random_state=42).reset_index(drop=True)

    print(f"‚úÖ Data Ready. Processing {len(df_sample)} tickets.")
    print(f"üè∑Ô∏è Candidate Tags: {candidate_labels}")

except FileNotFoundError:
    print("‚ùå ERROR: File not found. Please upload your CSV.")

# ==========================================
# ü§ñ STEP 2: RUN ZERO-SHOT MODEL
# ==========================================
print("\nüöÄ Loading Zero-Shot Model (facebook/bart-large-mnli)...")
# This model is perfect for "NLI" (Natural Language Inference) classification
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli", device=0)

print("‚è≥ Classifying Tickets...")

results_list = []
preds_for_accuracy = []

for index, row in df_sample.iterrows():
    text = row['text']
    
    # PREDICT: The model ranks ALL candidate labels for this text
    output = classifier(text, candidate_labels, multi_label=False)
    
    # Get Top 1 Prediction for Accuracy Calculation
    top_pred = output['labels'][0]
    preds_for_accuracy.append(top_pred)

    # Save Top 3 for your Report
    results_list.append({
        "Ticket_Text": text[:100] + "...",
        "True_Label": row['true_label'],
        "Pred_1": output['labels'][0], "Score_1": f"{output['scores'][0]:.2%}",
        "Pred_2": output['labels'][1], "Score_2": f"{output['scores'][1]:.2%}",
        "Pred_3": output['labels'][2], "Score_3": f"{output['scores'][2]:.2%}"
    })

# ==========================================
# üìä STEP 3: EVALUATE & SAVE
# ==========================================
# Calculate Accuracy (Does Top Prediction == True Label?)
acc = accuracy_score(df_sample['true_label'], preds_for_accuracy)
print(f"\nüèÜ Zero-Shot Accuracy: {acc:.2%}")

# Show a preview of the results
results_df = pd.DataFrame(results_list)
print("\nüîç Result Preview:")
print(results_df[['True_Label', 'Pred_1', 'Score_1']].head(5).to_string(index=False))

# Save to CSV
results_df.to_csv("task5_zeroshot_results.csv", index=False)
print("\nüíæ Saved results to 'task5_zeroshot_results.csv'")

‚è≥ Loading tickets-dataset.csv...
‚úÖ Data Ready. Processing 50 tickets.
üè∑Ô∏è Candidate Tags: ['Technical Support', 'Returns and Exchanges', 'Billing and Payments', 'Sales and Pre-Sales', 'Service Outages and Maintenance', 'Product Support', 'IT Support', 'Customer Service', 'Human Resources', 'General Inquiry']

üöÄ Loading Zero-Shot Model (facebook/bart-large-mnli)...


Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Device set to use cpu


‚è≥ Classifying Tickets...

üèÜ Zero-Shot Accuracy: 4.00%

üîç Result Preview:
          True_Label                Pred_1 Score_1
          IT Support      Customer Service  81.51%
          IT Support       General Inquiry  22.96%
Billing and Payments      Customer Service  75.13%
   Technical Support       General Inquiry  20.15%
          IT Support Returns and Exchanges  16.69%

üíæ Saved results to 'task5_zeroshot_results.csv'


In [4]:
df = pd.read_csv("task5_zeroshot_results.csv")
df['Ticket_Text'][4]

': Could you offer assistance on securing medical data with Bitdefender Antivirus Plus on hospital sy...'