In [1]:
# ==================================================
# CELL 1: Import All Required Libraries
# ==================================================

import pandas as pd
import numpy as np
import pickle
import warnings
import re
from datetime import datetime

warnings.filterwarnings('ignore')

# Display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', 200)

print("="*70)
print("üöÄ NOTEBOOK 5: DISPUTE RESOLUTION PREDICTION SYSTEM")
print("="*70)
print("\n‚úÖ All libraries imported successfully!")
print("\nüì¶ This notebook will:")
print("   1. Load your trained model")
print("   2. Create prediction functions")
print("   3. Test with sample complaints")
print("   4. Generate prediction reports")
print("="*70)

üöÄ NOTEBOOK 5: DISPUTE RESOLUTION PREDICTION SYSTEM

‚úÖ All libraries imported successfully!

üì¶ This notebook will:
   1. Load your trained model
   2. Create prediction functions
   3. Test with sample complaints
   4. Generate prediction reports


In [2]:
# ==================================================
# CELL 2: Load Saved Model and Components
# ==================================================

print("üìÇ LOADING SAVED MODEL COMPONENTS")
print("="*70)

models_dir = '../models/'

# Load all components
print("\nüîÑ Loading components...")

try:
    # 1. Load model
    print("\n1Ô∏è‚É£ Loading trained model...")
    with open(f'{models_dir}dispute_resolution_model_latest.pkl', 'rb') as f:
        model = pickle.load(f)
    print(f"   ‚úÖ Model loaded: {type(model).__name__}")
    
    # 2. Load TF-IDF vectorizer
    print("\n2Ô∏è‚É£ Loading TF-IDF vectorizer...")
    with open(f'{models_dir}tfidf_vectorizer_latest.pkl', 'rb') as f:
        tfidf_vectorizer = pickle.load(f)
    print(f"   ‚úÖ Vectorizer loaded: {tfidf_vectorizer.max_features} features")
    
    # 3. Load target encoder
    print("\n3Ô∏è‚É£ Loading target encoder...")
    with open(f'{models_dir}target_encoder_latest.pkl', 'rb') as f:
        target_encoder = pickle.load(f)
    print(f"   ‚úÖ Target encoder loaded: {len(target_encoder.classes_)} classes")
    print(f"   Classes: {list(target_encoder.classes_)}")
    
    # 4. Load feature info
    print("\n4Ô∏è‚É£ Loading feature info...")
    with open(f'{models_dir}feature_info_latest.pkl', 'rb') as f:
        feature_info = pickle.load(f)
    print(f"   ‚úÖ Feature info loaded")
    print(f"   Numerical features: {feature_info['numerical_features']}")
    
    # 5. Load metadata
    print("\n5Ô∏è‚É£ Loading model metadata...")
    with open(f'{models_dir}model_metadata_latest.pkl', 'rb') as f:
        metadata = pickle.load(f)
    print(f"   ‚úÖ Metadata loaded")
    print(f"   Model: {metadata['model_name']}")
    print(f"   Accuracy: {metadata['accuracy']:.4f} ({metadata['accuracy']*100:.2f}%)")
    print(f"   F1-Score: {metadata['f1_score']:.4f}")
    
    print("\n" + "="*70)
    print("‚úÖ ALL COMPONENTS LOADED SUCCESSFULLY!")
    print("="*70)
    h
except FileNotFoundError as e:
    print(f"\n‚ùå ERROR: Could not find model files!")
    print(f"   {e}")
    print("\n   Make sure you completed Notebook 4 and saved the model.")
    
except Exception as e:
    print(f"\n‚ùå ERROR loading components: {e}")

üìÇ LOADING SAVED MODEL COMPONENTS

üîÑ Loading components...

1Ô∏è‚É£ Loading trained model...
   ‚úÖ Model loaded: LinearSVC

2Ô∏è‚É£ Loading TF-IDF vectorizer...
   ‚úÖ Vectorizer loaded: 3000 features

3Ô∏è‚É£ Loading target encoder...
   ‚úÖ Target encoder loaded: 3 classes
   Classes: ['favor_seller', 'favour_customer', 'split_payment']

4Ô∏è‚É£ Loading feature info...
   ‚úÖ Feature info loaded
   Numerical features: ['processed_word_count', 'negative_word_count', 'positive_word_count', 'urgency_indicator', 'financial_terms_count', 'question_count', 'exclamation_count']

5Ô∏è‚É£ Loading model metadata...
   ‚úÖ Metadata loaded
   Model: Linear SVC
   Accuracy: 0.8192 (81.92%)
   F1-Score: 0.7663

‚úÖ ALL COMPONENTS LOADED SUCCESSFULLY!


In [3]:
# ==================================================
# CELL 3: Define Text Preprocessing Functions
# ==================================================

print("üßπ DEFINING TEXT PREPROCESSING FUNCTIONS")
print("="*70)

def clean_text(text):
    """
    Clean and preprocess complaint text
    SAME function used in training (Notebook 3)
    """
    if pd.isna(text) or text == '':
        return ''
    
    # Convert to string
    text = str(text)
    
    # Convert to lowercase
    text = text.lower()
    
    # Remove XXXX patterns (redaction)
    text = re.sub(r'x{2,}', '', text)
    
    # Remove URLs
    text = re.sub(r'http\S+|www\.\S+', '', text)
    
    # Remove email addresses
    text = re.sub(r'\S+@\S+', '', text)
    
    # Remove phone numbers
    text = re.sub(r'\d{3}[-.\s]?\d{3}[-.\s]?\d{4}', '', text)
    text = re.sub(r'\(\d{3}\)\s*\d{3}[-.\s]?\d{4}', '', text)
    
    # Remove account numbers
    text = re.sub(r'(account|acct|acc)[\s#]*\d+', '', text)
    
    # Remove dates
    text = re.sub(r'\d{1,2}[-/]\d{1,2}[-/]\d{2,4}', '', text)
    
    # Replace currency amounts with placeholder
    text = re.sub(r'\$\s?\d+[\d,]*\.?\d*', 'AMOUNT', text)
    
    # Remove extra whitespace
    text = re.sub(r'\s+', ' ', text)
    
    # Remove special characters but keep basic punctuation
    text = re.sub(r'[^a-zA-Z0-9\s.,!?]', '', text)
    
    # Trim
    text = text.strip()
    
    return text

def remove_stopwords(text):
    """
    Remove common English stopwords
    SAME function used in training (Notebook 3)
    """
    stopwords = {
        'i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', 
        'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself',
        'she', 'her', 'hers', 'herself', 'it', 'its', 'itself', 'they', 'them',
        'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this',
        'that', 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been',
        'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing',
        'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until',
        'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between',
        'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to',
        'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again',
        'further', 'then', 'once'
    }
    
    words = text.split()
    filtered_words = [word for word in words if word.lower() not in stopwords]
    return ' '.join(filtered_words)

def extract_text_features(text):
    """
    Extract numerical features from text
    SAME features used in training (Notebook 3)
    """
    # Negative words
    negative_words = ['terrible', 'horrible', 'worst', 'awful', 'bad', 'poor', 
                     'unacceptable', 'frustrated', 'angry', 'disappointed', 'never']
    negative_count = sum(1 for word in text.split() if word in negative_words)
    
    # Positive words
    positive_words = ['good', 'great', 'excellent', 'satisfied', 'happy', 
                     'resolved', 'helpful', 'thank']
    positive_count = sum(1 for word in text.split() if word in positive_words)
    
    # Urgency words
    urgency_words = ['urgent', 'immediately', 'asap', 'emergency', 'critical', 
                    'important', 'serious', 'severe']
    urgency_indicator = 1 if any(word in text.split() for word in urgency_words) else 0
    
    # Financial terms
    financial_terms = ['payment', 'amount', 'fee', 'charge', 'refund', 'money', 
                      'balance', 'account', 'credit', 'debit', 'transaction']
    financial_count = sum(1 for word in text.split() if word in financial_terms)
    
    # Question marks
    question_count = text.count('?')
    
    # Exclamation marks
    exclamation_count = text.count('!')
    
    # Word count
    word_count = len(text.split())
    
    return {
        'processed_word_count': word_count,
        'negative_word_count': negative_count,
        'positive_word_count': positive_count,
        'urgency_indicator': urgency_indicator,
        'financial_terms_count': financial_count,
        'question_count': question_count,
        'exclamation_count': exclamation_count
    }

print("‚úÖ Text preprocessing functions defined:")
print("   ‚Ä¢ clean_text() - Removes noise and special characters")
print("   ‚Ä¢ remove_stopwords() - Removes common words")
print("   ‚Ä¢ extract_text_features() - Extracts numerical features")
print("\n‚ö° These are the SAME functions used during training")
print("="*70)

üßπ DEFINING TEXT PREPROCESSING FUNCTIONS
‚úÖ Text preprocessing functions defined:
   ‚Ä¢ clean_text() - Removes noise and special characters
   ‚Ä¢ remove_stopwords() - Removes common words
   ‚Ä¢ extract_text_features() - Extracts numerical features

‚ö° These are the SAME functions used during training


In [4]:
# ==================================================
# CELL 4: Create Main Prediction Function
# ==================================================

print("üéØ CREATING MAIN PREDICTION FUNCTION")
print("="*70)

def predict_dispute_resolution(complaint_text):
    """
    Main function to predict dispute resolution outcome
    
    Args:
        complaint_text (str): Raw complaint text from customer
        
    Returns:
        dict: Prediction results with confidence scores
    """
    
    # Step 1: Clean the text
    cleaned_text = clean_text(complaint_text)
    
    # Step 2: Remove stopwords
    processed_text = remove_stopwords(cleaned_text)
    
    # Check if text is empty after processing
    if not processed_text or len(processed_text) < 5:
        return {
            'error': 'Text too short or empty after preprocessing',
            'original_text': complaint_text[:100]
        }
    
    # Step 3: Extract text features
    text_features = extract_text_features(processed_text)
    
    # Step 4: Convert text to TF-IDF features
    text_tfidf = tfidf_vectorizer.transform([processed_text])
    
    # Step 5: Create numerical features array
    numerical_features_values = np.array([[
        text_features['processed_word_count'],
        text_features['negative_word_count'],
        text_features['positive_word_count'],
        text_features['urgency_indicator'],
        text_features['financial_terms_count'],
        text_features['question_count'],
        text_features['exclamation_count']
    ]])
    
    # Step 6: Combine features (keep sparse)
    from scipy.sparse import csr_matrix, hstack as sparse_hstack
    numerical_sparse = csr_matrix(numerical_features_values)
    X_combined = sparse_hstack([text_tfidf, numerical_sparse])
    
    # Step 7: Make prediction
    prediction_encoded = model.predict(X_combined)[0]
    prediction_label = target_encoder.inverse_transform([prediction_encoded])[0]
    
    # Step 8: Get confidence scores (if model supports decision_function)
    try:
        decision_scores = model.decision_function(X_combined)[0]
        
        # Convert to confidence percentages
        # For multiclass SVC, decision_function returns scores for each class
        if len(decision_scores) == len(target_encoder.classes_):
            # Softmax-like transformation
            exp_scores = np.exp(decision_scores - np.max(decision_scores))
            confidence_scores = exp_scores / exp_scores.sum()
            
            confidence_dict = {}
            for label, score in zip(target_encoder.classes_, confidence_scores):
                confidence_dict[label] = float(score)
        else:
            confidence_dict = {prediction_label: 1.0}
    except:
        # If decision_function not available, just show predicted class
        confidence_dict = {prediction_label: 1.0}
    
    # Step 9: Return results
    return {
        'prediction': prediction_label,
        'confidence_scores': confidence_dict,
        'text_features': text_features,
        'cleaned_text': cleaned_text[:200],
        'processed_text': processed_text[:200]
    }

print("‚úÖ Main prediction function created: predict_dispute_resolution()")
print("\nüìù Usage:")
print("   result = predict_dispute_resolution('Your complaint text here')")
print("="*70)

üéØ CREATING MAIN PREDICTION FUNCTION
‚úÖ Main prediction function created: predict_dispute_resolution()

üìù Usage:
   result = predict_dispute_resolution('Your complaint text here')


In [5]:
# ==================================================
# CELL 5: Create Pretty Print Function for Results
# ==================================================

def print_prediction_result(result):
    """
    Pretty print prediction results
    """
    if 'error' in result:
        print("‚ùå ERROR:", result['error'])
        return
    
    print("="*70)
    print(" üéØ DISPUTE RESOLUTION PREDICTION")
    print("="*70)
    
    # Show prediction
    prediction = result['prediction']
    print(f"\nüèÜ PREDICTED OUTCOME: {prediction.upper()}")
    
    # Show confidence scores
    print(f"\nüìä CONFIDENCE SCORES:")
    for label, score in sorted(result['confidence_scores'].items(), 
                               key=lambda x: x[1], reverse=True):
        bar_length = int(score * 40)
        bar = '‚ñà' * bar_length + '‚ñë' * (40 - bar_length)
        print(f"   {label:20s} {bar} {score*100:.2f}%")
    
    # Show text features
    print(f"\nüìà TEXT FEATURES:")
    features = result['text_features']
    print(f"   ‚Ä¢ Word count: {features['processed_word_count']}")
    print(f"   ‚Ä¢ Negative words: {features['negative_word_count']}")
    print(f"   ‚Ä¢ Positive words: {features['positive_word_count']}")
    print(f"   ‚Ä¢ Financial terms: {features['financial_terms_count']}")
    print(f"   ‚Ä¢ Urgency: {'Yes' if features['urgency_indicator'] else 'No'}")
    print(f"   ‚Ä¢ Questions: {features['question_count']}")
    print(f"   ‚Ä¢ Exclamations: {features['exclamation_count']}")
    
    # Show interpretation
    print(f"\nüí° INTERPRETATION:")
    if prediction == 'favour_customer':
        print("   ‚Üí Customer should WIN this dispute")
        print("   ‚Üí Recommendation: Provide refund or relief to customer")
    elif prediction == 'favor_seller':
        print("   ‚Üí Seller/Company should WIN this dispute")
        print("   ‚Üí Recommendation: Uphold company position")
    else:  # split_payment
        print("   ‚Üí COMPROMISE recommended")
        print("   ‚Üí Recommendation: Partial refund or non-monetary relief")
    
    print("\n" + "="*70)

print("‚úÖ Pretty print function created: print_prediction_result()")
print("="*70)

‚úÖ Pretty print function created: print_prediction_result()


In [6]:
# ==================================================
# CELL 6: Test Prediction - Sample 1
# ==================================================

print("üß™ TEST 1: Customer Should Win")
print("="*70)

sample_complaint_1 = """
I opened a credit card account with this company 3 years ago. I always paid 
my bills on time and never missed a payment. Last month, they charged me a 
$50 late fee even though I paid 2 days before the due date. I have proof of 
payment from my bank statement. When I called customer service, they were 
rude and refused to remove the fee. This is completely unacceptable and unfair. 
I want my $50 refunded immediately. I have been a loyal customer and this 
treatment is terrible. I will never use this company again.
"""

print("\nüìù COMPLAINT TEXT:")
print(sample_complaint_1.strip())

print("\nüîÑ Making prediction...")
result = predict_dispute_resolution(sample_complaint_1)

print_prediction_result(result)

üß™ TEST 1: Customer Should Win

üìù COMPLAINT TEXT:
I opened a credit card account with this company 3 years ago. I always paid 
my bills on time and never missed a payment. Last month, they charged me a 
$50 late fee even though I paid 2 days before the due date. I have proof of 
payment from my bank statement. When I called customer service, they were 
rude and refused to remove the fee. This is completely unacceptable and unfair. 
I want my $50 refunded immediately. I have been a loyal customer and this 
treatment is terrible. I will never use this company again.

üîÑ Making prediction...
 üéØ DISPUTE RESOLUTION PREDICTION

üèÜ PREDICTED OUTCOME: FAVOUR_CUSTOMER

üìä CONFIDENCE SCORES:
   favour_customer      ‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë 39.58%
   favor_seller         ‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñ

In [7]:
# ==================================================
# CELL 7: Test Prediction - Sample 2
# ==================================================

print("üß™ TEST 2: Seller Should Win")
print("="*70)

sample_complaint_2 = """
I applied for a loan and was denied. The company said I don't meet their 
credit requirements. I think this is wrong but when I look at my credit 
report, I see that I have several late payments and my credit score is 
below 600. The company clearly explained their requirements on their 
website before I applied. I understand they have lending standards.
"""

print("\nüìù COMPLAINT TEXT:")
print(sample_complaint_2.strip())

print("\nüîÑ Making prediction...")
result = predict_dispute_resolution(sample_complaint_2)

print_prediction_result(result)

üß™ TEST 2: Seller Should Win

üìù COMPLAINT TEXT:
I applied for a loan and was denied. The company said I don't meet their 
credit requirements. I think this is wrong but when I look at my credit 
report, I see that I have several late payments and my credit score is 
below 600. The company clearly explained their requirements on their 
website before I applied. I understand they have lending standards.

üîÑ Making prediction...
 üéØ DISPUTE RESOLUTION PREDICTION

üèÜ PREDICTED OUTCOME: FAVOR_SELLER

üìä CONFIDENCE SCORES:
   favor_seller         ‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë 66.09%
   favour_customer      ‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë 19.43%
   split_payment        ‚ñà‚ñà‚ñà‚ñà‚ñà‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë 14.48%

üìà TE

In [8]:
# ==================================================
# CELL 8: Test Prediction - Sample 3
# ==================================================

print("üß™ TEST 3: Compromise/Split Payment")
print("="*70)

sample_complaint_3 = """
I purchased a product that arrived damaged. The company offered to replace 
it but I would have to pay for return shipping which costs $30. I think 
they should cover the return shipping since the product was damaged when 
it arrived. However, I understand they have a return policy. Maybe we can 
split the cost?
"""

print("\nüìù COMPLAINT TEXT:")
print(sample_complaint_3.strip())

print("\nüîÑ Making prediction...")
result = predict_dispute_resolution(sample_complaint_3)

print_prediction_result(result)

üß™ TEST 3: Compromise/Split Payment

üìù COMPLAINT TEXT:
I purchased a product that arrived damaged. The company offered to replace 
it but I would have to pay for return shipping which costs $30. I think 
they should cover the return shipping since the product was damaged when 
it arrived. However, I understand they have a return policy. Maybe we can 
split the cost?

üîÑ Making prediction...
 üéØ DISPUTE RESOLUTION PREDICTION

üèÜ PREDICTED OUTCOME: FAVOR_SELLER

üìä CONFIDENCE SCORES:
   favor_seller         ‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë 59.71%
   favour_customer      ‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë 27.70%
   split_payment        ‚ñà‚ñà‚ñà‚ñà‚ñà‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë 12.60%

üìà TEXT FEATURES:
   ‚Ä¢ Word count: 31
 

In [9]:
# ==================================================
# CELL 9: Create Batch Prediction Function
# ==================================================

print("üì¶ CREATING BATCH PREDICTION FUNCTION")
print("="*70)

def predict_batch_complaints(complaints_list):
    """
    Predict outcomes for multiple complaints at once
    
    Args:
        complaints_list: List of complaint texts or DataFrame with 'complaint_text' column
        
    Returns:
        DataFrame with predictions
    """
    
    # Convert to DataFrame if it's a list
    if isinstance(complaints_list, list):
        df = pd.DataFrame({'complaint_text': complaints_list})
    else:
        df = complaints_list.copy()
    
    print(f"üîÑ Processing {len(df)} complaints...")
    
    results = []
    
    for idx, row in df.iterrows():
        complaint_text = row['complaint_text'] if 'complaint_text' in row else row[0]
        
        # Make prediction
        result = predict_dispute_resolution(complaint_text)
        
        if 'error' in result:
            results.append({
                'complaint_id': idx,
                'prediction': 'ERROR',
                'confidence': 0,
                'error': result['error']
            })
        else:
            max_confidence = max(result['confidence_scores'].values())
            results.append({
                'complaint_id': idx,
                'complaint_text': complaint_text[:100] + '...',
                'prediction': result['prediction'],
                'confidence': max_confidence,
                'word_count': result['text_features']['processed_word_count'],
                'negative_words': result['text_features']['negative_word_count'],
                'positive_words': result['text_features']['positive_word_count']
            })
        
        # Progress indicator
        if (idx + 1) % 10 == 0:
            print(f"   Processed {idx + 1}/{len(df)} complaints...")
    
    results_df = pd.DataFrame(results)
    print(f"\n‚úÖ Batch prediction complete!")
    
    return results_df

print("‚úÖ Batch prediction function created: predict_batch_complaints()")
print("\nüìù Usage:")
print("   complaints = ['complaint 1', 'complaint 2', 'complaint 3']")
print("   results_df = predict_batch_complaints(complaints)")
print("="*70)

üì¶ CREATING BATCH PREDICTION FUNCTION
‚úÖ Batch prediction function created: predict_batch_complaints()

üìù Usage:
   complaints = ['complaint 1', 'complaint 2', 'complaint 3']
   results_df = predict_batch_complaints(complaints)


In [10]:
# ==================================================
# CELL 10: Test Batch Prediction
# ==================================================

print("üß™ TESTING BATCH PREDICTION")
print("="*70)

# Create sample batch of complaints
test_complaints = [
    "Company charged me fees I did not authorize. This is fraud! I want full refund.",
    "Applied for credit card but was denied due to low credit score. I understand.",
    "Product was damaged. Company will replace but I need to pay shipping. Not fair.",
    "Been waiting 3 months for refund. Called customer service 10 times. Very frustrated!",
    "Service was okay but could be better. No major issues though."
]

print(f"\nüìù Processing {len(test_complaints)} sample complaints...\n")

# Run batch prediction
results_df = predict_batch_complaints(test_complaints)

print("\nüìä BATCH PREDICTION RESULTS:")
print("="*70)
print(results_df.to_string(index=False))

print("\nüìà SUMMARY STATISTICS:")
print("-"*70)
print(results_df['prediction'].value_counts())

print(f"\nAverage confidence: {results_df['confidence'].mean():.2%}")

print("\n" + "="*70)

üß™ TESTING BATCH PREDICTION

üìù Processing 5 sample complaints...

üîÑ Processing 5 complaints...

‚úÖ Batch prediction complete!

üìä BATCH PREDICTION RESULTS:
 complaint_id                                                                          complaint_text   prediction  confidence  word_count  negative_words  positive_words
            0      Company charged me fees I did not authorize. This is fraud! I want full refund.... favor_seller    0.508967           9               0               0
            1        Applied for credit card but was denied due to low credit score. I understand.... favor_seller    0.662264           9               0               0
            2      Product was damaged. Company will replace but I need to pay shipping. Not fair.... favor_seller    0.623196          10               0               0
            3 Been waiting 3 months for refund. Called customer service 10 times. Very frustrated!... favor_seller    0.547913          11           

In [11]:
# ==================================================
# CELL 11: Create Function to Save Predictions
# ==================================================

print("üíæ CREATING PREDICTION EXPORT FUNCTION")
print("="*70)

def save_predictions(results_df, filename=None):
    """
    Save prediction results to CSV file
    """
    
    if filename is None:
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f'predictions_{timestamp}.csv'
    
    output_path = f'../data/processed/{filename}'
    
    results_df.to_csv(output_path, index=False)
    
    print(f"‚úÖ Predictions saved to: {output_path}")
    print(f"   ‚Ä¢ Records: {len(results_df)}")
    print(f"   ‚Ä¢ File size: {pd.read_csv(output_path).memory_usage(deep=True).sum() / 1024:.2f} KB")
    
    return output_path

print("‚úÖ Export function created: save_predictions()")
print("\nüìù Usage:")
print("   save_predictions(results_df)")
print("   save_predictions(results_df, 'my_predictions.csv')")
print("="*70)

üíæ CREATING PREDICTION EXPORT FUNCTION
‚úÖ Export function created: save_predictions()

üìù Usage:
   save_predictions(results_df)
   save_predictions(results_df, 'my_predictions.csv')


In [12]:
# ==================================================
# CELL 12: Interactive Prediction Interface
# ==================================================

print("üéÆ INTERACTIVE PREDICTION INTERFACE")
print("="*70)

def interactive_predict():
    """
    Interactive function to get predictions for custom complaints
    """
    
    print("\n" + "="*70)
    print(" üéØ ESCROW DISPUTE RESOLUTION PREDICTOR")
    print("="*70)
    print("\nEnter a complaint text to get a prediction.")
    print("Type 'quit' to exit.\n")
    
    while True:
        print("-"*70)
        complaint = input("Enter complaint text: ")
        
        if complaint.lower() == 'quit':
            print("\nüëã Goodbye!")
            break
        
        if len(complaint) < 10:
            print("‚ö†Ô∏è Complaint too short! Please enter at least 10 characters.\n")
            continue
        
        print("\nüîÑ Processing...")
        result = predict_dispute_resolution(complaint)
        print_prediction_result(result)
        
        print("\n")

print("‚úÖ Interactive interface created: interactive_predict()")
print("\nüìù To use the interactive predictor, run:")
print("   interactive_predict()")
print("\n‚ö†Ô∏è Note: This works in Jupyter notebooks with input support")
print("="*70)

üéÆ INTERACTIVE PREDICTION INTERFACE
‚úÖ Interactive interface created: interactive_predict()

üìù To use the interactive predictor, run:
   interactive_predict()

‚ö†Ô∏è Note: This works in Jupyter notebooks with input support


In [13]:
# ==================================================
# CELL 13: Complete Summary and Usage Guide
# ==================================================

print("="*70)
print(" ‚úÖ PREDICTION SYSTEM COMPLETE!")
print("="*70)

print("\nüéØ YOUR DISPUTE RESOLUTION MODEL:\n")

print(f"üìä Model Performance:")
print(f"   ‚Ä¢ Model: {metadata['model_name']}")
print(f"   ‚Ä¢ Accuracy: {metadata['accuracy']:.2%}")
print(f"   ‚Ä¢ F1-Score: {metadata['f1_score']:.4f}")
print(f"   ‚Ä¢ Training samples: {metadata['training_samples']:,}")

print(f"\nüé® Prediction Classes:")
for label in target_encoder.classes_:
    print(f"   ‚Ä¢ {label}")

print(f"\nüìù HOW TO USE THIS SYSTEM:\n")

print("1Ô∏è‚É£ SINGLE PREDICTION:")
print("   complaint = 'Your complaint text here'")
print("   result = predict_dispute_resolution(complaint)")
print("   print_prediction_result(result)")

print("\n2Ô∏è‚É£ BATCH PREDICTION:")
print("   complaints = ['complaint 1', 'complaint 2', 'complaint 3']")
print("   results_df = predict_batch_complaints(complaints)")
print("   print(results_df)")

print("\n3Ô∏è‚É£ SAVE PREDICTIONS:")
print("   save_predictions(results_df, 'my_predictions.csv')")

print("\n4Ô∏è‚É£ INTERACTIVE MODE:")
print("   interactive_predict()")

print("\n" + "="*70)
print("üéâ CONGRATULATIONS! YOUR MODEL IS READY FOR PRODUCTION!")
print("="*70)

print("\nüí° NEXT STEPS:\n")
print("   ‚Ä¢ Test with real complaints from your dataset")
print("   ‚Ä¢ Deploy as a web API (Flask/FastAPI)")
print("   ‚Ä¢ Create a user interface")
print("   ‚Ä¢ Monitor model performance over time")
print("   ‚Ä¢ Retrain with new data periodically")

print("\n" + "="*70)

 ‚úÖ PREDICTION SYSTEM COMPLETE!

üéØ YOUR DISPUTE RESOLUTION MODEL:

üìä Model Performance:
   ‚Ä¢ Model: Linear SVC
   ‚Ä¢ Accuracy: 81.92%
   ‚Ä¢ F1-Score: 0.7663
   ‚Ä¢ Training samples: 125,250

üé® Prediction Classes:
   ‚Ä¢ favor_seller
   ‚Ä¢ favour_customer
   ‚Ä¢ split_payment

üìù HOW TO USE THIS SYSTEM:

1Ô∏è‚É£ SINGLE PREDICTION:
   complaint = 'Your complaint text here'
   result = predict_dispute_resolution(complaint)
   print_prediction_result(result)

2Ô∏è‚É£ BATCH PREDICTION:
   complaints = ['complaint 1', 'complaint 2', 'complaint 3']
   results_df = predict_batch_complaints(complaints)
   print(results_df)

3Ô∏è‚É£ SAVE PREDICTIONS:
   save_predictions(results_df, 'my_predictions.csv')

4Ô∏è‚É£ INTERACTIVE MODE:
   interactive_predict()

üéâ CONGRATULATIONS! YOUR MODEL IS READY FOR PRODUCTION!

üí° NEXT STEPS:

   ‚Ä¢ Test with real complaints from your dataset
   ‚Ä¢ Deploy as a web API (Flask/FastAPI)
   ‚Ä¢ Create a user interface
   ‚Ä¢ Monitor model perf

In [14]:
# ==================================================
# CELL 14: Test with Real Data from Your Dataset
# ==================================================

print("üß™ TESTING WITH REAL DATA FROM YOUR DATASET")
print("="*70)

# Load a small sample from your cleaned data
try:
    print("\nüîÑ Loading sample from your cleaned dataset...")
    df_test = pd.read_csv('../data/processed/complaints_cleaned.csv', nrows=10)
    
    print(f"‚úÖ Loaded {len(df_test)} sample complaints")
    
    # Get actual labels and predictions
    print("\nüîÑ Making predictions on real data...")
    
    comparison_results = []
    
    for idx, row in df_test.iterrows():
        complaint_text = row['complaint_text_processed']
        actual_label = row['dispute_resolution']
        
        # Make prediction
        result = predict_dispute_resolution(complaint_text)
        
        if 'error' not in result:
            predicted_label = result['prediction']
            confidence = max(result['confidence_scores'].values())
            
            comparison_results.append({
                'actual': actual_label,
                'predicted': predicted_label,
                'match': '‚úÖ' if actual_label == predicted_label else '‚ùå',
                'confidence': f'{confidence:.2%}'
            })
    
    comparison_df = pd.DataFrame(comparison_results)
    
    print("\nüìä ACTUAL vs PREDICTED:")
    print("="*70)
    print(comparison_df.to_string(index=False))
    
    # Calculate accuracy
    accuracy = (comparison_df['match'] == '‚úÖ').sum() / len(comparison_df)
    print(f"\nüìà Sample Accuracy: {accuracy:.2%}")
    
    print("\n" + "="*70)
    
except FileNotFoundError:
    print("\n‚ö†Ô∏è Could not find cleaned dataset file")
    print("   This is optional - you can skip this cell")
    
except Exception as e:
    print(f"\n‚ö†Ô∏è Error: {e}")

üß™ TESTING WITH REAL DATA FROM YOUR DATASET

üîÑ Loading sample from your cleaned dataset...
‚úÖ Loaded 10 sample complaints

üîÑ Making predictions on real data...

üìä ACTUAL vs PREDICTED:
         actual    predicted match confidence
favour_customer favor_seller     ‚ùå     56.01%
   favor_seller favor_seller     ‚úÖ     74.33%
   favor_seller favor_seller     ‚úÖ     77.70%
   favor_seller favor_seller     ‚úÖ     55.40%
   favor_seller favor_seller     ‚úÖ     62.79%
favour_customer favor_seller     ‚ùå     70.19%
   favor_seller favor_seller     ‚úÖ     75.11%
   favor_seller favor_seller     ‚úÖ     68.45%
   favor_seller favor_seller     ‚úÖ     76.83%
   favor_seller favor_seller     ‚úÖ     71.08%

üìà Sample Accuracy: 80.00%

