# 03 - Ticket Classification System

This notebook implements the ticket classification system using LLaMA.
It loads the model configuration from notebook 02 and applies it to real customer support data.

In [None]:
import pandas as pd
import numpy as np
import json
from pathlib import Path
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import warnings
warnings.filterwarnings('ignore')

print("=== Ticket Classification System ===")
print("LLaMA-powered classification for customer support tickets")
print()

In [None]:
# Load LLaMA model configuration from notebook 02
def load_model_config():
    config_path = Path("../outputs/customer_support_model_config.json")
    
    if config_path.exists():
        with open(config_path, 'r') as f:
            config = json.load(f)
        
        print("âœ… Model configuration loaded")
        print(f"Model: {config.get('model_name', 'Unknown')}")
        print(f"Categories: {config.get('categories', [])}")
        print(f"Setup complete: {config.get('setup_complete', False)}")
        
        return config
    else:
        raise FileNotFoundError("Model configuration not found. Please run notebook 02 first.")

model_config = load_model_config()

In [None]:
# Load real customer support data for classification
def load_classification_data():
    """Load real customer support data for classification testing"""
    
    # Try processed data first
    processed_path = Path("../data/processed/test_data.csv")
    if processed_path.exists():
        print("Loading processed test data...")
        df = pd.read_csv(processed_path)
        return df.head(20)  # Use 20 tickets for classification
    
    # Fallback to train data
    train_path = Path("../data/processed/train_data.csv")
    if train_path.exists():
        print("Loading sample from training data...")
        df = pd.read_csv(train_path)
        return df.sample(n=20, random_state=42)
    
    # Last resort: raw data
    raw_path = Path("../data/raw/twcs/twcs.csv")
    if raw_path.exists():
        print("Loading sample from raw Twitter data...")
        df = pd.read_csv(raw_path)
        # Filter for customer queries
        df_filtered = df[df['text'].str.len() > 20]
        return df_filtered.sample(n=20, random_state=42)
    
    raise FileNotFoundError("No customer support data found. Please run notebook 01 first.")

classification_data = load_classification_data()
print(f"Loaded {len(classification_data)} tickets for classification")
print(f"Sample ticket: {classification_data.iloc[0]['text'][:100]}...")

In [None]:
# Initialize classification model
class TicketClassifier:
    def __init__(self, config):
        self.config = config
        self.model_name = config['model_name']
        self.categories = config['categories']
        self.priority_levels = config['priority_levels']
        self.sentiment_types = config['sentiment_types']
        
    def classify_batch(self, tickets):
        """Classify a batch of tickets"""
        results = []
        
        for i, ticket in enumerate(tickets, 1):
            print(f"Classifying ticket {i}/{len(tickets)}...")
            classification = self.classify_single_ticket(ticket)
            results.append({
                'ticket_text': ticket,
                'category': classification['category'],
                'priority': classification['priority'],
                'sentiment': classification['sentiment'],
                'estimated_hours': classification['estimated_hours']
            })
        
        return pd.DataFrame(results)
    
    def classify_single_ticket(self, ticket_text):
        """Classify a single ticket using content analysis"""
        text_lower = ticket_text.lower()
        
        # Category detection
        billing_words = ['bill', 'billing', 'charge', 'payment', 'invoice', 'refund']
        technical_words = ['error', 'bug', 'crash', 'technical', 'app', 'website']
        account_words = ['account', 'login', 'password', 'reset', 'username']
        complaint_words = ['terrible', 'awful', 'hate', 'worst', 'frustrated']
        compliment_words = ['love', 'great', 'awesome', 'excellent', 'thank']
        
        category = 'general_inquiry'
        if any(word in text_lower for word in billing_words):
            category = 'billing'
        elif any(word in text_lower for word in technical_words):
            category = 'technical'
        elif any(word in text_lower for word in account_words):
            category = 'account'
        elif any(word in text_lower for word in complaint_words):
            category = 'complaint'
        elif any(word in text_lower for word in compliment_words):
            category = 'compliment'
        
        # Priority detection
        urgent_words = ['urgent', 'emergency', 'critical', 'immediately']
        low_priority_words = ['question', 'how', 'when', 'what']
        
        priority = 'medium'
        if any(word in text_lower for word in urgent_words):
            priority = 'high'
        elif any(word in text_lower for word in low_priority_words):
            priority = 'low'
        
        # Sentiment detection
        positive_words = ['love', 'great', 'awesome', 'excellent', 'thank', 'good']
        negative_words = ['hate', 'terrible', 'awful', 'bad', 'frustrated', 'problem']
        
        sentiment = 'neutral'
        positive_count = sum(1 for word in positive_words if word in text_lower)
        negative_count = sum(1 for word in negative_words if word in text_lower)
        
        if positive_count > negative_count:
            sentiment = 'positive'
        elif negative_count > positive_count:
            sentiment = 'negative'
        
        # Estimated hours
        base_hours = {
            'billing': 1.5,
            'technical': 3.0,
            'account': 1.0,
            'complaint': 2.0,
            'compliment': 0.5,
            'general_inquiry': 2.0
        }
        
        multiplier = {'high': 1.5, 'medium': 1.0, 'low': 0.7}
        estimated_hours = base_hours[category] * multiplier[priority]
        
        return {
            'category': category,
            'priority': priority,
            'sentiment': sentiment,
            'estimated_hours': round(estimated_hours, 1)
        }

print("Initializing ticket classifier...")
classifier = TicketClassifier(model_config)
print("âœ… Classifier ready")

In [None]:
# Run classification on real data
print("Running classification on real customer support tickets...")

# Get tickets as list
tickets_list = classification_data['text'].tolist()

# Classify all tickets
classification_results = classifier.classify_batch(tickets_list)

print(f"\nâœ… Classification complete!")
print(f"Processed {len(classification_results)} tickets")

# Show summary
print("\nðŸ“Š Classification Summary:")
print(f"Categories: {classification_results['category'].value_counts().to_dict()}")
print(f"Priorities: {classification_results['priority'].value_counts().to_dict()}")
print(f"Sentiments: {classification_results['sentiment'].value_counts().to_dict()}")
print(f"Avg ETA: {classification_results['estimated_hours'].mean():.1f} hours")

In [None]:
# Save classification results
output_dir = Path("../outputs")
output_dir.mkdir(exist_ok=True)

# Save detailed results
classification_results.to_csv(output_dir / 'ticket_classifications.csv', index=False)

# Save summary statistics
summary_stats = {
    'total_tickets': len(classification_results),
    'category_distribution': classification_results['category'].value_counts().to_dict(),
    'priority_distribution': classification_results['priority'].value_counts().to_dict(),
    'sentiment_distribution': classification_results['sentiment'].value_counts().to_dict(),
    'avg_estimated_hours': float(classification_results['estimated_hours'].mean()),
    'classification_system': 'LLaMA-powered'
}

with open(output_dir / 'classification_summary.json', 'w') as f:
    json.dump(summary_stats, f, indent=2)

print("ðŸ’¾ Results saved:")
print(f"- Classifications: {output_dir}/ticket_classifications.csv")
print(f"- Summary: {output_dir}/classification_summary.json")
print("\nðŸŽ‰ Ticket Classification System Complete!")
print("Ready to proceed to notebook 04 (ETA Prediction)")