# Modelos Baseline y Evaluación

Este notebook implementa modelos baseline para clasificación de toxicidad
basado en los datos preprocesados del pipeline del notebook `andrea-mode-preprocessing.ipynb`.

**Objetivos**:
- Cargar datos preprocesados y divididos
- Implementar modelos baseline simples
- Sistema de evaluación comprehensivo
- Comparación inicial de enfoques
- Análisis de resultados y próximos pasos

## Librerías

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Librerías para modeling
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.metrics import (
    classification_report, confusion_matrix, roc_auc_score, 
    roc_curve, precision_recall_curve, f1_score, accuracy_score,
    multilabel_confusion_matrix, average_precision_score
)

# Modelos baseline
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB, GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.dummy import DummyClassifier

# Vectorización de texto
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.decomposition import TruncatedSVD

# Utilidades
import pickle
import json
import os
from collections import defaultdict
import time
from datetime import datetime

# Configurar visualizaciones
plt.style.use('default')
sns.set_palette("husl")
pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', '{:.4f}'.format)

print("📚 Librerías cargadas exitosamente")

## Carga de datos preprocesados

In [None]:
class DataLoader:
    """Clase para cargar y gestionar datos preprocesados"""
    
    def __init__(self, data_dir='../processed_data'):
        self.data_dir = data_dir
        self.metadata = None
        self.datasets = {}
        self.scaler = None
        
    def load_metadata(self):
        """Carga metadatos del preprocesamiento"""
        try:
            with open(f'{self.data_dir}/metadata.json', 'r') as f:
                self.metadata = json.load(f)
            print("📥 Metadatos cargados exitosamente")
            return self.metadata
        except Exception as e:
            print(f"❌ Error cargando metadatos: {e}")
            return None
    
    def load_scaler(self):
        """Carga el scaler entrenado"""
        try:
            with open(f'{self.data_dir}/scaler.pkl', 'rb') as f:
                self.scaler = pickle.load(f)
            print("📥 Scaler cargado exitosamente")
            return self.scaler
        except Exception as e:
            print(f"❌ Error cargando scaler: {e}")
            return None
    
    def load_dataset(self, dataset_name):
        """Carga un dataset específico"""
        dataset_path = f'{self.data_dir}/{dataset_name}'
        
        if not os.path.exists(dataset_path):
            print(f"❌ Dataset {dataset_name} no encontrado")
            return None
        
        dataset = {}
        
        try:
            # Detectar tipo de dataset
            files = os.listdir(dataset_path)
            
            if 'X_numeric_train.csv' in files:  # Dataset combinado
                dataset['X_numeric_train'] = pd.read_csv(f'{dataset_path}/X_numeric_train.csv')
                dataset['X_numeric_test'] = pd.read_csv(f'{dataset_path}/X_numeric_test.csv')
                dataset['X_text_train'] = pd.read_csv(f'{dataset_path}/X_text_train.csv')
                dataset['X_text_test'] = pd.read_csv(f'{dataset_path}/X_text_test.csv')
                
                # Cargar versiones escaladas si existen
                if 'X_numeric_train_scaled.csv' in files:
                    dataset['X_numeric_train_scaled'] = pd.read_csv(f'{dataset_path}/X_numeric_train_scaled.csv')
                    dataset['X_numeric_test_scaled'] = pd.read_csv(f'{dataset_path}/X_numeric_test_scaled.csv')
                    
            else:  # Dataset regular
                dataset['X_train'] = pd.read_csv(f'{dataset_path}/X_train.csv')
                dataset['X_test'] = pd.read_csv(f'{dataset_path}/X_test.csv')
                
                # Cargar versiones escaladas si existen
                if 'X_train_scaled.csv' in files:
                    dataset['X_train_scaled'] = pd.read_csv(f'{dataset_path}/X_train_scaled.csv')
                    dataset['X_test_scaled'] = pd.read_csv(f'{dataset_path}/X_test_scaled.csv')
            
            # Cargar etiquetas (común para todos)
            dataset['y_train'] = pd.read_csv(f'{dataset_path}/y_train.csv')
            dataset['y_test'] = pd.read_csv(f'{dataset_path}/y_test.csv')
            
            self.datasets[dataset_name] = dataset
            print(f"📥 Dataset '{dataset_name}' cargado exitosamente")
            return dataset
            
        except Exception as e:
            print(f"❌ Error cargando dataset {dataset_name}: {e}")
            return None
    
    def load_all_datasets(self):
        """Carga todos los datasets disponibles"""
        dataset_names = ['numeric_features', 'text_processed', 'text_cleaned', 'combined']
        
        for name in dataset_names:
            self.load_dataset(name)
        
        print(f"📥 {len(self.datasets)} datasets cargados")
        return self.datasets
    
    def get_dataset_info(self):
        """Muestra información de los datasets cargados"""
        if not self.metadata:
            self.load_metadata()
        
        print("📊 INFORMACIÓN DE DATASETS:")
        print("-" * 50)
        
        for name, dataset in self.datasets.items():
            print(f"\n🔹 {name.upper()}:")
            if 'X_train' in dataset:
                print(f"   • X_train: {dataset['X_train'].shape}")
                print(f"   • X_test: {dataset['X_test'].shape}")
            else:
                print(f"   • X_numeric_train: {dataset['X_numeric_train'].shape}")
                print(f"   • X_text_train: {dataset['X_text_train'].shape}")
            
            print(f"   • y_train: {dataset['y_train'].shape}")
            print(f"   • Etiquetas: {list(dataset['y_train'].columns)}")

# Inicializar y cargar datos
loader = DataLoader()
metadata = loader.load_metadata()
scaler = loader.load_scaler()
datasets = loader.load_all_datasets()
loader.get_dataset_info()

## Sistema de evaluación

In [None]:
class ModelEvaluator:
    """Sistema comprehensivo de evaluación de modelos"""
    
    def __init__(self, target_labels=None):
        self.target_labels = target_labels or ['IsToxic']
        self.results = {}
        
    def evaluate_binary_classification(self, y_true, y_pred, y_prob=None, model_name="Model"):
        """Evaluación para clasificación binaria"""
        results = {}
        
        # Métricas básicas
        results['accuracy'] = accuracy_score(y_true, y_pred)
        results['f1'] = f1_score(y_true, y_pred)
        results['f1_macro'] = f1_score(y_true, y_pred, average='macro')
        results['f1_weighted'] = f1_score(y_true, y_pred, average='weighted')
        
        # AUC si hay probabilidades
        if y_prob is not None:
            try:
                results['roc_auc'] = roc_auc_score(y_true, y_prob)
                results['avg_precision'] = average_precision_score(y_true, y_prob)
            except:
                results['roc_auc'] = np.nan
                results['avg_precision'] = np.nan
        
        # Reporte de clasificación
        results['classification_report'] = classification_report(
            y_true, y_pred, output_dict=True
        )
        
        # Matriz de confusión
        results['confusion_matrix'] = confusion_matrix(y_true, y_pred)
        
        return results
    
    def evaluate_multilabel_classification(self, y_true, y_pred, y_prob=None, model_name="Model"):
        """Evaluación para clasificación multilabel"""
        results = {}
        
        # Métricas por etiqueta
        label_results = {}
        for i, label in enumerate(self.target_labels):
            if label in y_true.columns:
                label_results[label] = self.evaluate_binary_classification(
                    y_true[label], 
                    y_pred[:, i] if len(y_pred.shape) > 1 else y_pred,
                    y_prob[:, i] if y_prob is not None and len(y_prob.shape) > 1 else y_prob,
                    f"{model_name}_{label}"
                )
        
        results['label_results'] = label_results
        
        # Métricas agregadas
        if len(y_pred.shape) > 1:
            # Micro y macro promedios
            results['f1_micro'] = f1_score(y_true, y_pred, average='micro')
            results['f1_macro'] = f1_score(y_true, y_pred, average='macro')
            results['f1_weighted'] = f1_score(y_true, y_pred, average='weighted')
            
            if y_prob is not None:
                try:
                    results['roc_auc_micro'] = roc_auc_score(y_true, y_prob, average='micro')
                    results['roc_auc_macro'] = roc_auc_score(y_true, y_prob, average='macro')
                except:
                    results['roc_auc_micro'] = np.nan
                    results['roc_auc_macro'] = np.nan
        
        return results
    
    def cross_validate_model(self, model, X, y, cv_folds=5, scoring='f1'):
        """Cross-validation para un modelo"""
        if len(y.shape) > 1 and y.shape[1] > 1:
            # Multilabel - usar solo la primera etiqueta para CV
            y_cv = y.iloc[:, 0] if hasattr(y, 'iloc') else y[:, 0]
        else:
            y_cv = y
        
        cv_scores = cross_val_score(
            model, X, y_cv, 
            cv=StratifiedKFold(n_splits=cv_folds, shuffle=True, random_state=42),
            scoring=scoring
        )
        
        return {
            'cv_mean': cv_scores.mean(),
            'cv_std': cv_scores.std(),
            'cv_scores': cv_scores
        }
    
    def plot_confusion_matrix(self, cm, title="Confusion Matrix", labels=None):
        """Visualizar matriz de confusión"""
        plt.figure(figsize=(8, 6))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                   xticklabels=labels or ['No Tóxico', 'Tóxico'],
                   yticklabels=labels or ['No Tóxico', 'Tóxico'])
        plt.title(title)
        plt.ylabel('Etiqueta Real')
        plt.xlabel('Predicción')
        plt.tight_layout()
        plt.show()
    
    def plot_roc_curves(self, results_dict, target_label='IsToxic'):
        """Plotear curvas ROC para múltiples modelos"""
        plt.figure(figsize=(10, 8))
        
        for model_name, result in results_dict.items():
            if 'fpr' in result and 'tpr' in result:
                auc_score = result.get('roc_auc', 0)
                plt.plot(result['fpr'], result['tpr'], 
                        label=f'{model_name} (AUC = {auc_score:.3f})', 
                        linewidth=2)
        
        plt.plot([0, 1], [0, 1], 'k--', alpha=0.5)
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title(f'ROC Curves - {target_label}')
        plt.legend(loc="lower right")
        plt.grid(True, alpha=0.3)
        plt.tight_layout()
        plt.show()

# Inicializar evaluador
evaluator = ModelEvaluator(target_labels=metadata['valid_labels'] if metadata else ['IsToxic'])

## Modelos baseline

In [None]:
class BaselineModels:
    """Colección de modelos baseline para clasificación de toxicidad"""
    
    def __init__(self, random_state=42):
        self.random_state = random_state
        self.models = {}
        self.vectorizers = {}
        self.results = {}
        
    def get_dummy_models(self):
        """Modelos dummy para comparación"""
        return {
            'dummy_most_frequent': DummyClassifier(strategy='most_frequent', random_state=self.random_state),
            'dummy_stratified': DummyClassifier(strategy='stratified', random_state=self.random_state),
            'dummy_uniform': DummyClassifier(strategy='uniform', random_state=self.random_state)
        }
    
    def get_text_vectorizers(self, max_features=5000):
        """Vectorizadores de texto"""
        return {
            'tfidf': TfidfVectorizer(
                max_features=max_features,
                ngram_range=(1, 2),
                min_df=2,
                max_df=0.95,
                stop_words='english'
            ),
            'count': CountVectorizer(
                max_features=max_features,
                ngram_range=(1, 2),
                min_df=2,
                max_df=0.95,
                stop_words='english'
            )
        }
    
    def get_numeric_models(self):
        """Modelos para características numéricas"""
        return {
            'logistic_numeric': LogisticRegression(
                random_state=self.random_state, 
                max_iter=1000,
                class_weight='balanced'
            ),
            'rf_numeric': RandomForestClassifier(
                n_estimators=100,
                random_state=self.random_state,
                class_weight='balanced'
            ),
            'nb_gaussian': GaussianNB()
        }
    
    def get_text_models(self):
        """Modelos para características de texto"""
        return {
            'logistic_text': LogisticRegression(
                random_state=self.random_state,
                max_iter=1000,
                class_weight='balanced'
            ),
            'rf_text': RandomForestClassifier(
                n_estimators=100,
                random_state=self.random_state,
                class_weight='balanced'
            ),
            'nb_multinomial': MultinomialNB(alpha=1.0),
            'svm_text': SVC(
                kernel='linear',
                random_state=self.random_state,
                class_weight='balanced',
                probability=True
            )
        }
    
    def train_numeric_models(self, X_train, y_train, X_test, y_test, 
                           use_scaled=True, target_label='IsToxic'):
        """Entrenar modelos con características numéricas"""
        print(f"🔢 Entrenando modelos numéricos para {target_label}...")
        
        # Seleccionar datos
        if use_scaled and 'X_train_scaled' in datasets['numeric_features']:
            X_tr = datasets['numeric_features']['X_train_scaled']
            X_te = datasets['numeric_features']['X_test_scaled']
        else:
            X_tr = X_train
            X_te = X_test
        
        y_tr = y_train[target_label]
        y_te = y_test[target_label]
        
        models = self.get_numeric_models()
        models.update(self.get_dummy_models())
        
        results = {}
        
        for name, model in models.items():
            print(f"   📊 Entrenando {name}...")
            start_time = time.time()
            
            try:
                # Entrenar modelo
                model.fit(X_tr, y_tr)
                
                # Predicciones
                y_pred = model.predict(X_te)
                y_prob = None
                
                if hasattr(model, 'predict_proba'):
                    y_prob = model.predict_proba(X_te)[:, 1]
                elif hasattr(model, 'decision_function'):
                    y_prob = model.decision_function(X_te)
                
                # Evaluación
                result = evaluator.evaluate_binary_classification(
                    y_te, y_pred, y_prob, name
                )
                
                # Cross-validation
                cv_result = evaluator.cross_validate_model(model, X_tr, y_tr)
                result.update(cv_result)
                
                # Tiempo de entrenamiento
                result['training_time'] = time.time() - start_time
                
                # Curva ROC
                if y_prob is not None:
                    fpr, tpr, _ = roc_curve(y_te, y_prob)
                    result['fpr'] = fpr
                    result['tpr'] = tpr
                
                results[name] = result
                
            except Exception as e:
                print(f"   ❌ Error en {name}: {e}")
                results[name] = {'error': str(e)}
        
        self.results[f'numeric_{target_label}'] = results
        return results
    
    def train_text_models(self, X_train_text, y_train, X_test_text, y_test,
                         target_label='IsToxic', max_features=5000):
        """Entrenar modelos con características de texto"""
        print(f"📝 Entrenando modelos de texto para {target_label}...")
        
        y_tr = y_train[target_label]
        y_te = y_test[target_label]
        
        vectorizers = self.get_text_vectorizers(max_features)
        text_models = self.get_text_models()
        
        results = {}
        
        for vec_name, vectorizer in vectorizers.items():
            print(f"   🔤 Usando vectorizador: {vec_name}")
            
            # Vectorizar texto
            X_tr_vec = vectorizer.fit_transform(X_train_text)
            X_te_vec = vectorizer.transform(X_test_text)
            
            # Guardar vectorizador
            self.vectorizers[f'{vec_name}_{target_label}'] = vectorizer
            
            for model_name, model in text_models.items():
                full_name = f"{model_name}_{vec_name}"
                print(f"      📊 Entrenando {full_name}...")
                start_time = time.time()
                
                try:
                    # Entrenar modelo
                    model.fit(X_tr_vec, y_tr)
                    
                    # Predicciones
                    y_pred = model.predict(X_te_vec)
                    y_prob = None
                    
                    if hasattr(model, 'predict_proba'):
                        y_prob = model.predict_proba(X_te_vec)[:, 1]
                    elif hasattr(model, 'decision_function'):
                        y_prob = model.decision_function(X_te_vec)
                    
                    # Evaluación
                    result = evaluator.evaluate_binary_classification(
                        y_te, y_pred, y_prob, full_name
                    )
                    
                    # Cross-validation
                    cv_result = evaluator.cross_validate_model(model, X_tr_vec, y_tr)
                    result.update(cv_result)
                    
                    # Tiempo de entrenamiento
                    result['training_time'] = time.time() - start_time
                    
                    # Información del vectorizador
                    result['vocab_size'] = X_tr_vec.shape[1]
                    
                    # Curva ROC
                    if y_prob is not None:
                        fpr, tpr, _ = roc_curve(y_te, y_prob)
                        result['fpr'] = fpr
                        result['tpr'] = tpr
                    
                    results[full_name] = result
                    
                except Exception as e:
                    print(f"      ❌ Error en {full_name}: {e}")
                    results[full_name] = {'error': str(e)}
        
        self.results[f'text_{target_label}'] = results
        return results
    
    def get_results_summary(self, results_dict, top_n=10):
        """Resumen de resultados ordenados por F1-score"""
        summary = []
        
        for model_name, result in results_dict.items():
            if 'error' not in result:
                summary.append({
                    'Model': model_name,
                    'Accuracy': result.get('accuracy', 0),
                    'F1': result.get('f1', 0),
                    'F1_Macro': result.get('f1_macro', 0),
                    'ROC_AUC': result.get('roc_auc', 0),
                    'CV_Mean': result.get('cv_mean', 0),
                    'CV_Std': result.get('cv_std', 0),
                    'Training_Time': result.get('training_time', 0)
                })
        
        df_summary = pd.DataFrame(summary)
        if not df_summary.empty:
            df_summary = df_summary.sort_values('F1', ascending=False).head(top_n)
        
        return df_summary

# Inicializar modelos baseline
baseline = BaselineModels()


## Experimentos con modelos baseline

In [None]:
# Experimento 1: Modelos con características numéricas
print("\n📊 EXPERIMENTO 1: CARACTERÍSTICAS NUMÉRICAS")
print("-" * 45)

if 'numeric_features' in datasets:
    numeric_results = baseline.train_numeric_models(
        datasets['numeric_features']['X_train'],
        datasets['numeric_features']['y_train'],
        datasets['numeric_features']['X_test'],
        datasets['numeric_features']['y_test'],
        use_scaled=True,
        target_label='IsToxic'
    )
    
    # Mostrar resumen
    numeric_summary = baseline.get_results_summary(numeric_results)
    print("\n📈 TOP MODELOS NUMÉRICOS:")
    print(numeric_summary.round(4))

In [None]:
# Experimento 2: Modelos con texto procesado
print("\n📝 EXPERIMENTO 2: CARACTERÍSTICAS DE TEXTO")
print("-" * 45)

if 'text_processed' in datasets:
    text_results = baseline.train_text_models(
        datasets['text_processed']['X_train'].iloc[:, 0],  # Primera columna (texto)
        datasets['text_processed']['y_train'],
        datasets['text_processed']['X_test'].iloc[:, 0],
        datasets['text_processed']['y_test'],
        target_label='IsToxic',
        max_features=5000
    )
    
    # Mostrar resumen
    text_summary = baseline.get_results_summary(text_results)
    print("\n📈 TOP MODELOS DE TEXTO:")
    print(text_summary.round(4))

## Análisis y visualización de resultados

In [None]:
def plot_model_comparison(results_dict, metric='f1', title="Comparación de Modelos"):
    """Gráfico de barras para comparar modelos"""
    models = []
    scores = []
    
    for model_name, result in results_dict.items():
        if 'error' not in result and metric in result:
            models.append(model_name)
            scores.append(result[metric])
    
    if not models:
        print(f"No hay datos para la métrica {metric}")
        return
    
    # Ordenar por score
    sorted_data = sorted(zip(models, scores), key=lambda x: x[1], reverse=True)
    models, scores = zip(*sorted_data)
    
    plt.figure(figsize=(12, 8))
    colors = plt.cm.viridis(np.linspace(0, 1, len(models)))
    bars = plt.barh(range(len(models)), scores, color=colors)
    
    plt.yticks(range(len(models)), models)
    plt.xlabel(metric.upper())
    plt.title(title)
    plt.grid(axis='x', alpha=0.3)
    
    # Agregar valores en las barras
    for i, (bar, score) in enumerate(zip(bars, scores)):
        plt.text(score + 0.01, i, f'{score:.3f}', 
                va='center', fontweight='bold')
    
    plt.tight_layout()
    plt.show()

In [None]:
def plot_training_time_vs_performance(results_dict):
    """Gráfico de tiempo de entrenamiento vs performance"""
    models = []
    times = []
    f1_scores = []
    
    for model_name, result in results_dict.items():
        if 'error' not in result and 'training_time' in result and 'f1' in result:
            models.append(model_name)
            times.append(result['training_time'])
            f1_scores.append(result['f1'])
    
    if not models:
        print("No hay datos de tiempo de entrenamiento")
        return
    
    plt.figure(figsize=(10, 8))
    scatter = plt.scatter(times, f1_scores, s=100, alpha=0.7, c=range(len(models)), cmap='viridis')
    
    # anotar puntos
    for i, model in enumerate(models):
        plt.annotate(model, (times[i], f1_scores[i]), 
                    xytext=(5, 5), textcoords='offset points', 
                    fontsize=8, rotation=15)
    
    plt.xlabel('Tiempo de Entrenamiento (segundos)')
    plt.ylabel('F1-Score')
    plt.title('Eficiencia vs Performance de Modelos')
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()

In [None]:
# Visualizaciones
print("\n📊 VISUALIZACIÓN DE RESULTADOS")
print("-" * 35)

# Comparar modelos numéricos
if 'numeric_IsToxic' in baseline.results:
    plot_model_comparison(
        baseline.results['numeric_IsToxic'], 
        metric='f1',
        title="Comparación Modelos Numéricos - F1 Score"
    )

# Comparar modelos de texto
if 'text_IsToxic' in baseline.results:
    plot_model_comparison(
        baseline.results['text_IsToxic'], 
        metric='f1',
        title="Comparación Modelos de Texto - F1 Score"
    )

# Curvas ROC
print("\n📈 CURVAS ROC")
if 'numeric_IsToxic' in baseline.results:
    evaluator.plot_roc_curves(baseline.results['numeric_IsToxic'], 'IsToxic - Modelos Numéricos')

if 'text_IsToxic' in baseline.results:
    # Seleccionar top 5 modelos para ROC
    text_results = baseline.results['text_IsToxic']
    top_models = {}
    for model_name, result in text_results.items():
        if 'error' not in result and 'fpr' in result:
            top_models[model_name] = result
    
    # Tomar solo los primeros 5 para no saturar el gráfico
    if len(top_models) > 5:
        sorted_models = sorted(top_models.items(), 
                             key=lambda x: x[1].get('f1', 0), reverse=True)
        top_models = dict(sorted_models[:5])
    
    evaluator.plot_roc_curves(top_models, 'IsToxic - Modelos de Texto (Top 5)')

## Análisis detallado del mejor modelo

In [None]:
def analyze_best_model(results_dict, model_name=None):
    """Análisis detallado del mejor modelo"""
    
    if model_name is None:
        # Encontrar el mejor modelo por F1-score
        best_f1 = 0
        best_model = None
        for name, result in results_dict.items():
            if 'error' not in result and result.get('f1', 0) > best_f1:
                best_f1 = result['f1']
                best_model = name
        model_name = best_model
    
    if model_name not in results_dict:
        print(f"Modelo {model_name} no encontrado")
        return
    
    result = results_dict[model_name]
    
    print(f"\n🏆 ANÁLISIS DETALLADO: {model_name.upper()}")
    print("=" * 60)
    
    # Métricas principales
    print("📊 MÉTRICAS PRINCIPALES:")
    print(f"  • Accuracy: {result.get('accuracy', 'N/A'):.4f}")
    print(f"  • F1-score: {result.get('f1', 'N/A'):.4f}")
    print(f"  • F1 Macro: {result.get('f1_macro', 'N/A'):.4f}")
    print(f"  • ROC AUC: {result.get('roc_auc', 'N/A'):.4f}")
    print(f"  • CV Mean: {result.get('cv_mean', 'N/A'):.4f}")
    print(f"  • Tiempo de entrenamiento: {result.get('training_time', 0):.2f} seg")
    
    # Matriz de confusión
    print("\n📉 MATRIZ DE CONFUSIÓN:")
    cm = result.get('confusion_matrix')
    if cm is not None:
        evaluator.plot_confusion_matrix(cm, title=f"Matriz de Confusión - {model_name}")

    # Reporte de clasificación
    print("\n📄 REPORTE DE CLASIFICACIÓN:")
    report = result.get('classification_report')
    if report:
        df_report = pd.DataFrame(report).transpose()
        display(df_report.style.background_gradient(cmap='RdYlGn', subset=['precision', 'recall', 'f1-score']))
    
    # Curva ROC individual
    print("\n📈 CURVA ROC:")
    if 'fpr' in result and 'tpr' in result:
        plt.figure(figsize=(8, 6))
        plt.plot(result['fpr'], result['tpr'], label=f'{model_name} (AUC = {result["roc_auc"]:.3f})')
        plt.plot([0, 1], [0, 1], 'k--', alpha=0.5)
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title(f'Curva ROC - {model_name}')
        plt.legend()
        plt.grid(True, alpha=0.3)
        plt.tight_layout()
        plt.show()
    else:
        print("No se encontró información de curva ROC.")

In [None]:
# Ejecutar análisis del mejor modelo de texto
print("\n🔍 ANÁLISIS DETALLADO DEL MEJOR MODELO DE TEXTO")
if 'text_IsToxic' in baseline.results:
    analyze_best_model(baseline.results['text_IsToxic'])

# Ejecutar análisis del mejor modelo numérico
print("\n🔍 ANÁLISIS DETALLADO DEL MEJOR MODELO NUMÉRICO")
if 'numeric_IsToxic' in baseline.results:
    analyze_best_model(baseline.results['numeric_IsToxic'])