In [1]:
import pandas as pd
from transformers import pipeline
import json
from typing import Dict, List, Tuple
import re

class FeedbackClassifier:
    def __init__(self):
        """Initialize the zero-shot classification pipeline"""
        # Load the zero-shot classification model
        self.classifier = pipeline(
            "zero-shot-classification",
            model="facebook/bart-large-mnli"
        )
        
        # Define departments and their descriptions
        self.departments = {
            "Technical Support": [
                "technical issues", "software problems", "bugs", "system errors",
                "login problems", "app crashes", "website not working",
                "performance issues", "connectivity problems"
            ],
            "Billing": [
                "payment issues", "billing problems", "charges", "refunds",
                "invoice questions", "subscription issues", "pricing",
                "payment methods", "transaction problems"
            ],
            "Customer Service": [
                "general inquiries", "account questions", "service requests",
                "complaints", "feedback", "suggestions", "policy questions",
                "general support", "information requests"
            ],
            "Product": [
                "product features", "product requests", "feature suggestions",
                "product feedback", "functionality issues", "product improvements",
                "new product ideas", "product quality"
            ],
            "Shipping": [
                "delivery issues", "shipping problems", "order tracking",
                "damaged packages", "lost packages", "shipping delays",
                "delivery address", "shipping costs"
            ],
            "Returns": [
                "return requests", "exchange requests", "return policy",
                "defective products", "wrong items", "return process",
                "refund status", "return shipping"
            ],
            "Sales": [
                "sales inquiries", "product information", "pricing questions",
                "purchase assistance", "product recommendations",
                "sales support", "quotations", "bulk orders"
            ]
        }
    
    def preprocess_text(self, text: str) -> str:
        """Clean and preprocess the feedback text"""
        # Remove extra whitespace and convert to lowercase
        text = re.sub(r'\s+', ' ', text.strip().lower())
        
        # Remove special characters but keep basic punctuation
        text = re.sub(r'[^\w\s.,!?-]', '', text)
        
        return text
    
    def classify_feedback(self, feedback: str, confidence_threshold: float = 0.5) -> Dict:
        """
        Classify feedback into departments using zero-shot classification
        
        Args:
            feedback: Customer feedback text
            confidence_threshold: Minimum confidence score to accept classification
            
        Returns:
            Dictionary with department, confidence score, and all scores
        """
        # Preprocess the feedback
        processed_feedback = self.preprocess_text(feedback)
        
        # Get department names for classification
        department_names = list(self.departments.keys())
        
        # Perform zero-shot classification
        result = self.classifier(processed_feedback, department_names)
        
        # Extract results
        top_department = result['labels'][0]
        top_confidence = result['scores'][0]
        
        # Create detailed results
        classification_result = {
            'feedback': feedback,
            'predicted_department': top_department,
            'confidence': top_confidence,
            'all_scores': dict(zip(result['labels'], result['scores'])),
            'high_confidence': top_confidence >= confidence_threshold
        }
        
        return classification_result
    
    def classify_batch(self, feedbacks: List[str], confidence_threshold: float = 0.5) -> List[Dict]:
        """Classify multiple feedbacks at once"""
        results = []
        for feedback in feedbacks:
            result = self.classify_feedback(feedback, confidence_threshold)
            results.append(result)
        return results
    
    def get_department_summary(self, classifications: List[Dict]) -> Dict:
        """Generate summary statistics for classified feedbacks"""
        department_counts = {}
        high_confidence_counts = {}
        
        for classification in classifications:
            dept = classification['predicted_department']
            department_counts[dept] = department_counts.get(dept, 0) + 1
            
            if classification['high_confidence']:
                high_confidence_counts[dept] = high_confidence_counts.get(dept, 0) + 1
        
        summary = {
            'total_feedbacks': len(classifications),
            'department_distribution': department_counts,
            'high_confidence_distribution': high_confidence_counts,
            'average_confidence': sum(c['confidence'] for c in classifications) / len(classifications)
        }
        
        return summary
    
    def save_results(self, classifications: List[Dict], filename: str = "feedback_classifications.json"):
        """Save classification results to JSON file"""
        with open(filename, 'w') as f:
            json.dump(classifications, f, indent=2)
        print(f"Results saved to {filename}")
    
    def route_feedback(self, feedback: str) -> Dict:
        """
        Route feedback to appropriate department with routing information
        """
        classification = self.classify_feedback(feedback)
        
        # Add routing information
        routing_info = {
            **classification,
            'routing_decision': 'auto_route' if classification['high_confidence'] else 'manual_review',
            'department_keywords': self.departments[classification['predicted_department']],
            'timestamp': pd.Timestamp.now().isoformat()
        }
        
        return routing_info

# Example usage and testing
def demo_classifier():
    """Demonstrate the feedback classifier with sample data"""
    
    # Initialize classifier
    classifier = FeedbackClassifier()
    
    # Sample customer feedbacks
    sample_feedbacks = [
        "I can't log into my account, the app keeps crashing",
        "I was charged twice for the same order last month",
        "The product quality is amazing, but I'd love to see more color options",
        "My package was supposed to arrive yesterday but it's still not here",
        "I want to return this item because it doesn't fit properly",
        "Can you help me understand your pricing plans?",
        "Your customer service team was very helpful with my issue",
        "The website is running very slowly and sometimes doesn't load",
        "I need a refund for my cancelled subscription",
        "How do I track my order? I can't find the tracking number"
    ]
    
    print("=== Customer Feedback Classification Demo ===\n")
    
    # Classify individual feedbacks
    print("Individual Classifications:")
    print("-" * 50)
    
    results = []
    for i, feedback in enumerate(sample_feedbacks, 1):
        result = classifier.classify_feedback(feedback)
        results.append(result)
        
        print(f"{i}. Feedback: {feedback}")
        print(f"   Department: {result['predicted_department']}")
        print(f"   Confidence: {result['confidence']:.3f}")
        print(f"   High Confidence: {result['high_confidence']}")
        print()
    
    # Generate summary
    print("=== Department Summary ===")
    summary = classifier.get_department_summary(results)
    print(f"Total Feedbacks: {summary['total_feedbacks']}")
    print(f"Average Confidence: {summary['average_confidence']:.3f}")
    print("\nDepartment Distribution:")
    for dept, count in summary['department_distribution'].items():
        print(f"  {dept}: {count}")
    
    # Save results
    classifier.save_results(results)
    
    return classifier, results

# Run the demo
if __name__ == "__main__":
    classifier, results = demo_classifier()
    
    # Example of routing a new feedback
    print("\n=== Routing Example ===")
    new_feedback = "I need help setting up my new account"
    routing_result = classifier.route_feedback(new_feedback)
    print(f"Feedback: {new_feedback}")
    print(f"Routed to: {routing_result['predicted_department']}")
    print(f"Routing Decision: {routing_result['routing_decision']}")
    print(f"Confidence: {routing_result['confidence']:.3f}")

2025-07-13 12:07:09.795873: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1752408430.053836      13 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1752408430.138685      13 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Device set to use cpu


=== Customer Feedback Classification Demo ===

Individual Classifications:
--------------------------------------------------
1. Feedback: I can't log into my account, the app keeps crashing
   Department: Returns
   Confidence: 0.407
   High Confidence: False

2. Feedback: I was charged twice for the same order last month
   Department: Returns
   Confidence: 0.228
   High Confidence: False

3. Feedback: The product quality is amazing, but I'd love to see more color options
   Department: Product
   Confidence: 0.663
   High Confidence: True

4. Feedback: My package was supposed to arrive yesterday but it's still not here
   Department: Shipping
   Confidence: 0.490
   High Confidence: False

5. Feedback: I want to return this item because it doesn't fit properly
   Department: Returns
   Confidence: 0.763
   High Confidence: True

6. Feedback: Can you help me understand your pricing plans?
   Department: Customer Service
   Confidence: 0.258
   High Confidence: False

7. Feedback: Yo