In [2]:
import pickle
import pandas as pd
import numpy as np
from datetime import datetime

def load_model_artifacts():
    """Carga todos los artefactos del modelo"""
    with open('../models/final_model.pkl', 'rb') as f:
        model = pickle.load(f)

    with open('../models/preprocessor.pkl', 'rb') as f:
        preprocessor = pickle.load(f)

    with open('../models/label_encoder.pkl', 'rb') as f:
        label_encoder = pickle.load(f)

    return model, preprocessor, label_encoder

def create_all_features(df):
    """
    Aplica todo el feature engineering necesario para que coincida 
    con el preprocesador entrenado
    """
    df_processed = df.copy()
    
    # 1. CARACTER√çSTICAS TEMPORALES
    if 'year_received' in df_processed.columns:
        # Crear caracter√≠sticas temporales derivadas
        df_processed['month_received'] = pd.to_datetime(f"{df_processed['year_received'].iloc[0]}-01-01").month
        df_processed['dayofweek_received'] = 1  # Lunes por defecto
        df_processed['quarter_received'] = 1  # Q1 por defecto
        df_processed['is_weekend'] = 0
        df_processed['is_holiday_season'] = 0
        
        # Tiempo de procesamiento (usar valor por defecto si no se proporciona)
        if 'processing_days' not in df_processed.columns:
            df_processed['processing_days'] = 2  # Valor por defecto
        df_processed['same_day_processing'] = (df_processed['processing_days'] == 0).astype(int)
    
    # 2. IMPUTACI√ìN DE CAMPOS FALTANTES
    # Sub-product
    if 'Sub-product' not in df_processed.columns:
        df_processed['Sub-product'] = 'Not specified'
    
    # Sub-issue
    if 'Sub-issue' not in df_processed.columns:
        df_processed['Sub-issue'] = 'Not specified'
    
    # ZIP code
    if 'ZIP code' not in df_processed.columns:
        df_processed['ZIP code'] = '00000'  # Valor por defecto
    
    # Consumer disputed
    if 'Consumer disputed?' not in df_processed.columns:
        df_processed['Consumer disputed?'] = 'No'
    
    # Timely response
    if 'Timely response?' not in df_processed.columns:
        df_processed['Timely response?'] = 'Yes'
    
    # 3. CARACTER√çSTICAS CATEG√ìRICAS
    # Categor√≠a de producto
    product_mapping = {
        'debt': ['Debt Collection', 'Debt collection'],
        'credit': ['Credit card', 'Credit Card', 'Credit Reporting', 'Credit Report'],
        'mortgage': ['Mortgage'],
        'banking': ['Bank account or service', 'Bank Account Or Service', 'Checking Or Savings Account'],
        'loan': ['Consumer loan', 'Consumer Loan', 'Student Loan', 'Payday loan', 'Payday Loan']
    }
    
    df_processed['product_category'] = 'other'
    if 'Product' in df_processed.columns:
        product_value = df_processed['Product'].iloc[0]
        for category, products in product_mapping.items():
            for product in products:
                if product.lower() in product_value.lower():
                    df_processed['product_category'] = category
                    break
    
    # Regi√≥n geogr√°fica
    regions = {
        'northeast': ['ME', 'NH', 'VT', 'MA', 'RI', 'CT', 'NY', 'NJ', 'PA'],
        'midwest': ['OH', 'IN', 'IL', 'MI', 'WI', 'MN', 'IA', 'MO', 'ND', 'SD', 'NE', 'KS'],
        'south': ['DE', 'MD', 'DC', 'VA', 'WV', 'NC', 'SC', 'GA', 'FL', 'KY', 'TN', 'AL', 'MS', 'AR', 'LA', 'OK', 'TX'],
        'west': ['MT', 'ID', 'WY', 'CO', 'NM', 'AZ', 'UT', 'NV', 'CA', 'OR', 'WA', 'AK', 'HI']
    }
    
    df_processed['region'] = 'unknown'
    if 'State' in df_processed.columns:
        state_value = df_processed['State'].iloc[0]
        for region, states in regions.items():
            if state_value in states:
                df_processed['region'] = region
                break
    
    # 4. CARACTER√çSTICAS AGREGADAS (usar valores promedio/t√≠picos)
    # Company complaint count - usar valores t√≠picos basados en el tipo de empresa
    df_processed['company_complaint_count'] = 50  # Valor medio t√≠pico
    
    # Company size basado en company_complaint_count
    count = df_processed['company_complaint_count'].iloc[0]
    if count <= 10:
        df_processed['company_size'] = 'small'
    elif count <= 50:
        df_processed['company_size'] = 'medium'
    elif count <= 200:
        df_processed['company_size'] = 'large'
    else:
        df_processed['company_size'] = 'enterprise'
    
    # State complaint count
    df_processed['state_complaint_count'] = 100  # Valor medio t√≠pico
    
    # 5. CARACTER√çSTICAS DE TEXTO
    # Issue length
    if 'Issue' in df_processed.columns:
        df_processed['issue_length'] = len(str(df_processed['Issue'].iloc[0]))
    else:
        df_processed['issue_length'] = 20
    
    # Sub-issue length
    if 'Sub-issue' in df_processed.columns:
        df_processed['sub-issue_length'] = len(str(df_processed['Sub-issue'].iloc[0]))
    else:
        df_processed['sub-issue_length'] = 10
    
    # 6. PALABRAS CLAVE EN ISSUES
    keywords = ['fraud', 'identity', 'payment', 'credit', 'debt', 'loan']
    issue_text = str(df_processed.get('Issue', '').iloc[0] if 'Issue' in df_processed.columns else '').lower()
    
    for keyword in keywords:
        df_processed[f'has_{keyword}'] = int(keyword in issue_text)
    
    return df_processed

def predict_complaint_response(complaint_data):
    """
    Predice la respuesta de la empresa para una queja
    Ahora incluye feature engineering completo

    Parameters:
    -----------
    complaint_data : dict or pd.DataFrame
        Datos de la queja con las caracter√≠sticas b√°sicas requeridas
        Campos m√≠nimos requeridos: Product, Issue, State, Company, year_received

    Returns:
    --------
    dict : Prediccion y probabilidades
    """
    # Cargar artefactos
    model, preprocessor, label_encoder = load_model_artifacts()

    # Convertir a DataFrame si es necesario
    if isinstance(complaint_data, dict):
        df = pd.DataFrame([complaint_data])
    else:
        df = complaint_data.copy()

    # Aplicar feature engineering completo
    df_with_features = create_all_features(df)
    
    # Verificar que tenemos todas las columnas necesarias
    try:
        # Preprocesar datos
        X_processed = preprocessor.transform(df_with_features)
    except Exception as e:
        print(f"Error en preprocesamiento: {e}")
        print(f"Columnas disponibles: {list(df_with_features.columns)}")
        print(f"Columnas esperadas por el preprocesador: {preprocessor.feature_names_in_}")
        raise

    # Realizar prediccion
    prediction = model.predict(X_processed)[0]
    probabilities = model.predict_proba(X_processed)[0]

    # Convertir prediccion a etiqueta original
    predicted_class = label_encoder.inverse_transform([prediction])[0]

    # Crear diccionario de probabilidades por clase
    prob_dict = {}
    for i, prob in enumerate(probabilities):
        class_name = label_encoder.inverse_transform([i])[0]
        prob_dict[class_name] = float(prob)

    return {
        'predicted_response': predicted_class,
        'confidence': float(max(probabilities)),
        'probabilities': prob_dict,
        'features_used': list(df_with_features.columns)
    }

# Funci√≥n auxiliar para crear ejemplos de prueba v√°lidos
def create_complaint_example(product, issue, state, company, year_received=2023, processing_days=None):
    """
    Funci√≥n helper para crear ejemplos de queja con la estructura m√≠nima requerida
    
    Parameters:
    -----------
    product : str
        Tipo de producto (ej: 'Credit card', 'Mortgage', 'Debt collection')
    issue : str  
        Descripci√≥n del problema
    state : str
        Estado (c√≥digo de 2 letras, ej: 'CA', 'TX', 'NY')
    company : str
        Nombre de la empresa
    year_received : int
        A√±o de recepci√≥n de la queja (default: 2023)
    processing_days : int, optional
        D√≠as de procesamiento (default: calculado autom√°ticamente)
    """
    example = {
        'Product': product,
        'Issue': issue,
        'State': state,
        'Company': company,
        'year_received': year_received
    }
    
    if processing_days is not None:
        example['processing_days'] = processing_days
    
    return example

def display_prediction_results(complaint_data, result, show_features=False):
    """
    Muestra los resultados de predicci√≥n de forma visual y atractiva
    """
    from datetime import datetime
    
    print("\n" + "="*80)
    print("üîÆ PREDICCI√ìN DE RESPUESTA DE QUEJA FINANCIERA")
    print("="*80)
    
    # Informaci√≥n de la queja
    print("\nüìã DATOS DE LA QUEJA:")
    print("-" * 50)
    for key, value in complaint_data.items():
        if key in ['Product', 'Issue', 'State', 'Company', 'year_received']:
            print(f"   üî∏ {key:<20}: {value}")
    
    # Resultado principal
    print(f"\nüéØ PREDICCI√ìN PRINCIPAL:")
    print("-" * 50)
    confidence_level = "üü¢ ALTA" if result['confidence'] > 0.7 else "üü° MEDIA" if result['confidence'] > 0.5 else "üî¥ BAJA"
    print(f"   üìù Respuesta predicha  : {result['predicted_response']}")
    print(f"   üìä Confianza          : {result['confidence']:.1%} ({confidence_level})")
    
    # Distribuci√≥n de probabilidades
    print(f"\nüìà DISTRIBUCI√ìN DE PROBABILIDADES:")
    print("-" * 50)
    sorted_probs = sorted(result['probabilities'].items(), key=lambda x: x[1], reverse=True)
    
    for i, (response, prob) in enumerate(sorted_probs):
        # Crear barra visual
        bar_length = int(prob * 40)  # Barra de hasta 40 caracteres
        bar = "‚ñà" * bar_length + "‚ñë" * (40 - bar_length)
        
        # Emoji seg√∫n el tipo de respuesta
        emoji = "‚úÖ" if "explanation" in response.lower() else \
                "‚è≥" if "progress" in response.lower() else \
                "üí∞" if "relief" in response.lower() else \
                "‚ùå" if "without relief" in response.lower() else \
                "‚è∞" if "untimely" in response.lower() else "üìã"
        
        print(f"   {emoji} {response:<25}: {bar} {prob:.1%}")
    
    # Interpretaci√≥n del resultado
    print(f"\nüí° INTERPRETACI√ìN:")
    print("-" * 50)
    
    top_response = sorted_probs[0][0]
    top_prob = sorted_probs[0][1]
    
    if "explanation" in top_response.lower():
        interpretation = "La empresa probablemente cerrar√° la queja proporcionando una explicaci√≥n detallada."
    elif "progress" in top_response.lower():
        interpretation = "La queja est√° siendo procesada y a√∫n no tiene resoluci√≥n final."
    elif "relief" in top_response.lower():
        interpretation = "La empresa probablemente ofrecer√° alg√∫n tipo de compensaci√≥n o soluci√≥n."
    elif "without relief" in top_response.lower():
        interpretation = "La empresa cerrar√° la queja sin ofrecer compensaci√≥n."
    elif "untimely" in top_response.lower():
        interpretation = "La empresa no responder√° dentro del tiempo establecido."
    else:
        interpretation = "Resultado est√°ndar de procesamiento de queja."
    
    print(f"   {interpretation}")
    
    # Factores clave identificados
    print(f"\nüîç FACTORES CLAVE IDENTIFICADOS:")
    print("-" * 50)
    
    # Obtener caracter√≠sticas del resultado si est√°n disponibles
    if 'features_used' in result:
        features = result['features_used']
        
        # Mostrar algunas caracter√≠sticas clave
        key_features = []
        if 'product_category' in features:
            key_features.append(f"Categor√≠a de producto identificada")
        if 'region' in features:
            key_features.append(f"Regi√≥n geogr√°fica considerada")
        if any('has_' in f for f in features):
            keywords_found = [f.replace('has_', '') for f in features if f.startswith('has_')]
            key_features.append(f"Palabras clave detectadas en el problema")
        if 'processing_days' in features:
            key_features.append(f"Tiempo de procesamiento evaluado")
        
        for feature in key_features[:5]:  # Mostrar m√°ximo 5
            print(f"   ‚úì {feature}")
    
    # Recomendaciones
    print(f"\nüíº RECOMENDACIONES:")
    print("-" * 50)
    
    if result['confidence'] < 0.5:
        print("   ‚ö†Ô∏è  Confianza baja - Considere proporcionar m√°s detalles espec√≠ficos")
    elif "untimely" in top_response.lower():
        print("   üìû Seguimiento activo recomendado debido a posible retraso")
    elif "relief" in top_response.lower():
        print("   üí∞ Alta probabilidad de resoluci√≥n favorable")
    else:
        print("   üìã Proceder con expectativas est√°ndar de resoluci√≥n")
    
    # Comparaci√≥n con alternativas
    if len(sorted_probs) > 1:
        second_prob = sorted_probs[1][1]
        difference = top_prob - second_prob
        
        if difference < 0.2:  # Diferencia menor al 20%
            print("   ‚öñÔ∏è  Resultado competido - Considere escenarios alternativos")
            print(f"      Segunda opci√≥n m√°s probable: {sorted_probs[1][0]} ({second_prob:.1%})")
    
    # Timestamp
    print(f"\n‚è∞ An√°lisis realizado: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print("="*80)
    
    return result

def batch_predict_complaints(complaints_list, show_summary=True):
    """
    Procesa m√∫ltiples quejas y muestra un resumen comparativo
    """
    results = []
    
    print("\n" + "="*80)
    print("üìä AN√ÅLISIS BATCH DE QUEJAS FINANCIERAS")
    print("="*80)
    
    for i, complaint in enumerate(complaints_list, 1):
        print(f"\nüîç PROCESANDO QUEJA {i}/{len(complaints_list)}...")
        result = predict_complaint_response(complaint)
        results.append((complaint, result))
        
        # Mostrar resultado resumido
        confidence_emoji = "üü¢" if result['confidence'] > 0.7 else "üü°" if result['confidence'] > 0.5 else "üî¥"
        print(f"   {confidence_emoji} {result['predicted_response']} ({result['confidence']:.1%})")
    
    if show_summary:
        print(f"\nüìã RESUMEN COMPARATIVO:")
        print("-" * 80)
        print(f"{'#':<3} {'Producto':<15} {'Estado':<5} {'Predicci√≥n':<25} {'Confianza':<10}")
        print("-" * 80)
        
        for i, (complaint, result) in enumerate(results, 1):
            product_short = complaint.get('Product', 'N/A')[:14]
            state = complaint.get('State', 'N/A')
            prediction_short = result['predicted_response'][:24]
            confidence = f"{result['confidence']:.1%}"
            
            confidence_color = "üü¢" if result['confidence'] > 0.7 else "üü°" if result['confidence'] > 0.5 else "üî¥"
            
            print(f"{i:<3} {product_short:<15} {state:<5} {prediction_short:<25} {confidence_color} {confidence:<9}")
        
        # Estad√≠sticas generales
        confidences = [r[1]['confidence'] for r in results]
        responses = [r[1]['predicted_response'] for r in results]
        
        print(f"\nüìà ESTAD√çSTICAS DEL BATCH:")
        print("-" * 50)
        print(f"   Total quejas procesadas: {len(results)}")
        print(f"   Confianza promedio     : {np.mean(confidences):.1%}")
        print(f"   Confianza m√°xima       : {max(confidences):.1%}")
        print(f"   Confianza m√≠nima       : {min(confidences):.1%}")
        
        # Distribuci√≥n de respuestas
        from collections import Counter
        response_counts = Counter(responses)
        print(f"\n   Distribuci√≥n de respuestas predichas:")
        for response, count in response_counts.most_common():
            pct = (count / len(results)) * 100
            print(f"     ‚Ä¢ {response}: {count} ({pct:.1f}%)")
    
    return results

In [3]:
# Ejemplos de uso con visualizaci√≥n mejorada:
if __name__ == "__main__":
    print("üöÄ INICIANDO SISTEMA DE PREDICCI√ìN DE QUEJAS FINANCIERAS")
    
    # Ejemplo 1: Predicci√≥n individual con visualizaci√≥n completa
    complaint_1 = create_complaint_example(
        product='Credit card',
        issue='Billing disputes and fraudulent charges on my account',
        state='CA',
        company='Big Bank Corp',
        year_received=2023,
        processing_days=2
    )
    
    try:
        result_1 = predict_complaint_response(complaint_1)
        display_prediction_results(complaint_1, result_1)
    except Exception as e:
        print(f"‚ùå Error en predicci√≥n individual: {e}")
    
    # Ejemplo 2: An√°lisis batch
    complaints_batch = [
        create_complaint_example('Credit card', 'Billing disputes', 'CA', 'Bank A'),
        create_complaint_example('Mortgage', 'Application delays', 'TX', 'Lender B'),
        create_complaint_example('Debt collection', 'Debt not owed', 'NY', 'Collector C'),
        create_complaint_example('Bank account or service', 'Unauthorized charges', 'FL', 'Bank D')
    ]
    
    try:
        batch_results = batch_predict_complaints(complaints_batch)
    except Exception as e:
        print(f"‚ùå Error en an√°lisis batch: {e}")
    
    print(f"\n‚úÖ AN√ÅLISIS COMPLETADO EXITOSAMENTE")

üöÄ INICIANDO SISTEMA DE PREDICCI√ìN DE QUEJAS FINANCIERAS





üîÆ PREDICCI√ìN DE RESPUESTA DE QUEJA FINANCIERA

üìã DATOS DE LA QUEJA:
--------------------------------------------------
   üî∏ Product             : Credit card
   üî∏ Issue               : Billing disputes and fraudulent charges on my account
   üî∏ State               : CA
   üî∏ Company             : Big Bank Corp
   üî∏ year_received       : 2023

üéØ PREDICCI√ìN PRINCIPAL:
--------------------------------------------------
   üìù Respuesta predicha  : Closed with explanation
   üìä Confianza          : 53.0% (üü° MEDIA)

üìà DISTRIBUCI√ìN DE PROBABILIDADES:
--------------------------------------------------
   ‚úÖ Closed with explanation  : ‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë 53.0%
   üí∞ Closed with monetary relief: ‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë 19.0%
   üí∞ Closed with non-monetary relie



   üü° Closed with explanation (55.0%)

üîç PROCESANDO QUEJA 3/4...




   üü° Closed with explanation (55.0%)

üîç PROCESANDO QUEJA 4/4...
   üü° Closed with explanation (55.0%)

üìã RESUMEN COMPARATIVO:
--------------------------------------------------------------------------------
#   Producto        Estado Predicci√≥n                Confianza 
--------------------------------------------------------------------------------
1   Credit card     CA    Closed with explanation   üü° 56.0%    
2   Mortgage        TX    Closed with explanation   üü° 55.0%    
3   Debt collectio  NY    Closed with explanation   üü° 55.0%    
4   Bank account o  FL    Closed with explanation   üü° 55.0%    

üìà ESTAD√çSTICAS DEL BATCH:
--------------------------------------------------
   Total quejas procesadas: 4
   Confianza promedio     : 55.2%
   Confianza m√°xima       : 56.0%
   Confianza m√≠nima       : 55.0%

   Distribuci√≥n de respuestas predichas:
     ‚Ä¢ Closed with explanation: 4 (100.0%)

‚úÖ AN√ÅLISIS COMPLETADO EXITOSAMENTE


