In [123]:
"""
Script de entrenamiento BERT + MLflow para clasificaci√≥n de hate speech
Dataset: 997 muestras, ~46% hate
Objetivo: F1/Precision/Recall aceptables, overfitting < 5%
"""

import os
import random
import warnings
from pathlib import Path

import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import (
    precision_recall_fscore_support,
    classification_report,
    confusion_matrix
)

from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    get_linear_schedule_with_warmup,
)

import mlflow
import mlflow.pytorch

warnings.filterwarnings('ignore')


In [124]:

# ============================================================================
# 1. CONFIGURACI√ìN Y PATHS
# ============================================================================

def setup_paths():
    """Configura los paths del proyecto"""
    current_dir = Path.cwd()
    
    # Si estamos en notebooks, subir un nivel
    if "notebooks" in str(current_dir):
        project_root = current_dir.parent
    else:
        project_root = current_dir
    
    # Crear carpeta mlruns si no existe
    mlruns_dir = project_root / "mlruns"
    mlruns_dir.mkdir(exist_ok=True)
    
    # Path directo a data/processed
    data_dir = project_root / "data" / "processed"
    
    return project_root, mlruns_dir, data_dir


def set_seed(seed=42):
    """Fija todas las semillas para reproducibilidad"""
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


In [125]:
# ============================================================================
# 2. DATA AUGMENTATION PARA CLASE MINORITARIA
# ============================================================================

def synonym_replacement(text, n=2):
    """
    Augmentaci√≥n simple: intercambio aleatorio de palabras
    (En producci√≥n usar nlpaug o backtranslation)
    """
    words = text.split()
    if len(words) < 3:
        return text
    
    # Intercambiar n palabras aleatorias
    for _ in range(min(n, len(words) // 3)):
        idx1, idx2 = random.sample(range(len(words)), 2)
        words[idx1], words[idx2] = words[idx2], words[idx1]
    
    return " ".join(words)


def augment_minority_class(X, y, target_class=1, augment_factor=0.3):
    """
    Aumenta la clase minoritaria con augmentaci√≥n ligera
    
    Args:
        X: textos originales
        y: labels
        target_class: clase a aumentar (1 = hate)
        augment_factor: porcentaje de nuevas muestras (0.3 = +30%)
    
    Returns:
        X_aug, y_aug: datos aumentados
    """
    minority_mask = y == target_class
    minority_X = X[minority_mask]
    minority_y = y[minority_mask]
    
    # Calcular cu√°ntas muestras crear
    n_samples = int(len(minority_X) * augment_factor)
    
    # Muestreo aleatorio con reemplazo
    indices = np.random.choice(len(minority_X), size=n_samples, replace=True)
    
    aug_texts = []
    for idx in indices:
        # Aplicar augmentaci√≥n
        original_text = minority_X[idx]
        aug_text = synonym_replacement(original_text, n=2)
        aug_texts.append(aug_text)
    
    # Combinar originales + aumentados
    X_aug = np.concatenate([X, np.array(aug_texts)])
    y_aug = np.concatenate([y, np.full(n_samples, target_class)])
    
    return X_aug, y_aug



In [126]:
# ============================================================================
# 3. DATASET Y DATALOADER
# ============================================================================

class HateSpeechDataset(Dataset):
    """Dataset personalizado para clasificaci√≥n de hate speech"""
    
    def __init__(self, texts, labels, tokenizer, max_len=128):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len
    
    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        text = str(self.texts[idx])
        label = int(self.labels[idx])
        
        # Tokenizaci√≥n
        encoding = self.tokenizer(
            text,
            truncation=True,
            padding='max_length',
            max_length=self.max_len,
            return_tensors='pt'
        )
        
        return {
            'input_ids': encoding['input_ids'].squeeze(0),
            'attention_mask': encoding['attention_mask'].squeeze(0),
            'labels': torch.tensor(label, dtype=torch.long)
        }



In [127]:
# ============================================================================
# 4. CARGA Y PREPARACI√ìN DE DATOS
# ============================================================================

def load_and_prepare_data(data_dir, use_augmentation=True):
    """
    Carga el dataset y lo prepara para entrenamiento
    
    Args:
        data_dir: directorio con los archivos pickle
        use_augmentation: si aplicar augmentaci√≥n a clase minoritaria
    
    Returns:
        X_train, X_val, X_test, y_train, y_val, y_test
    """
    # Cargar pickle directamente
    pickle_path = data_dir / "youtube_all_versions.pkl"
    
    print(f"üìÇ Cargando datos desde: {pickle_path}")
    df = pd.read_pickle(pickle_path)
    
    # Si es un dict, extraer DataFrame
    if isinstance(df, dict):
        print(f"üì¶ Pickle es diccionario. Claves: {list(df.keys())}")
        # Usar la primera clave que sea DataFrame
        for key, value in df.items():
            if isinstance(value, pd.DataFrame):
                df = value
                print(f"‚úÖ Usando DataFrame de clave: '{key}'")
                break
    
    print(f"‚úÖ Columnas disponibles: {df.columns.tolist()}")
    
    # Buscar columna de texto
    text_col = None
    for col in ['Text_Lemmatized', 'Text_Cleaned', 'Text_Normalized', 'Text']:
        if col in df.columns:
            text_col = col
            break
    
    print(f"‚úÖ Usando columna: {text_col}")
    
    # Filtrar nulos
    df = df.dropna(subset=[text_col, 'IsHate'])
    
    X = df[text_col].astype(str).values
    y = df['IsHate'].astype(int).values
    
    print(f"\nüìä Dataset: {len(X)} muestras")
    print(f"   - Hate: {np.sum(y)} ({np.mean(y)*100:.1f}%)")
    print(f"   - Normal: {len(y) - np.sum(y)} ({(1-np.mean(y))*100:.1f}%)")
    
    # Split estratificado: 70% train, 15% val, 15% test
    X_temp, X_test, y_temp, y_test = train_test_split(
        X, y, test_size=0.15, random_state=42, stratify=y
    )
    
    X_train, X_val, y_train, y_val = train_test_split(
        X_temp, y_temp, test_size=0.176, random_state=42, stratify=y_temp
    )
    
    print(f"\nüìà Split:")
    print(f"   Train: {len(X_train)} ({len(X_train)/len(X)*100:.1f}%)")
    print(f"   Val:   {len(X_val)} ({len(X_val)/len(X)*100:.1f}%)")
    print(f"   Test:  {len(X_test)} ({len(X_test)/len(X)*100:.1f}%)")
    
    # Augmentaci√≥n
    if use_augmentation:
        print("\nüîÑ Aplicando augmentaci√≥n...")
        X_train, y_train = augment_minority_class(
            X_train, y_train, 
            target_class=1,
            augment_factor=0.3
        )
        print(f"   Train aumentado: {len(X_train)} muestras")
    
    return X_train, X_val, X_test, y_train, y_val, y_test


In [128]:
# %%
# ============================================================================
# 5. FUNCI√ìN DE ENTRENAMIENTO CON CLASS WEIGHT
# ============================================================================

def calculate_class_weights(y_train, device):
    """Calcula pesos de clase balanceados"""
    from sklearn.utils.class_weight import compute_class_weight
    
    class_weights = compute_class_weight(
        class_weight='balanced',
        classes=np.array([0, 1]),
        y=y_train
    )
    return torch.FloatTensor(class_weights).to(device)


def train_epoch(model, data_loader, optimizer, scheduler, device, class_weights=None):
    """Entrena el modelo por una √©poca con class weights"""
    model.train()
    losses = []
    
    for batch in data_loader:
        # Mover batch a device
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        
        # Forward pass
        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            labels=labels
        )
        
        # Calcular loss con class weights si se proporcionan
        if class_weights is not None:
            logits = outputs.logits
            loss_fct = torch.nn.CrossEntropyLoss(weight=class_weights)
            loss = loss_fct(logits, labels)
        else:
            loss = outputs.loss
        
        losses.append(loss.item())
        
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        
        # Gradient clipping
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        
        optimizer.step()
        scheduler.step()
    
    return np.mean(losses)


In [129]:
# %%
# ============================================================================
# 6. PIPELINE COMPLETO DE ENTRENAMIENTO CON FROZEN LAYERS Y CLASS WEIGHTS
# ============================================================================

from sklearn.utils.class_weight import compute_class_weight

def calculate_class_weights(y_train, device):
    """Calcula pesos de clase balanceados"""
    class_weights = compute_class_weight(
        class_weight='balanced',
        classes=np.array([0, 1]),
        y=y_train
    )
    return torch.FloatTensor(class_weights).to(device)


def train_bert_model(
    X_train, y_train,
    X_val, y_val,
    model_name='distilbert-base-uncased',
    max_len=128,
    batch_size=16,
    epochs=2,
    learning_rate=2e-5,
    weight_decay=0.01,
    device=None,
    freeze_base=False
):
    """
    Pipeline completo de entrenamiento BERT con class weights y frozen layers
    """
    if device is None:
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    print(f"\nüöÄ Iniciando entrenamiento en: {device}")
    print(f"   Modelo: {model_name}")
    print(f"   Epochs: {epochs}, Batch size: {batch_size}, Max len: {max_len}")
    print(f"   Freeze base: {freeze_base}")
    
    # 1. Tokenizer
    print("\nüìù Cargando tokenizer...")
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    
    # 2. Datasets
    train_dataset = HateSpeechDataset(X_train, y_train, tokenizer, max_len)
    val_dataset = HateSpeechDataset(X_val, y_val, tokenizer, max_len)
    
    # 3. DataLoaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    
    
    # 4. Modelo (compatible con BERT/DistilBERT/TinyBERT)
    print("üß† Cargando modelo...")
    try:
        # Intenta primero configuraci√≥n BERT/DistilBERT
        model = AutoModelForSequenceClassification.from_pretrained(
            model_name,
            num_labels=2,
            dropout=0.3
        ).to(device)
    except:
        # Fallback: configuraci√≥n b√°sica (TinyBERT)
        model = AutoModelForSequenceClassification.from_pretrained(
            model_name,
            num_labels=2
        ).to(device)

    
    # 5. CONGELAR CAPAS BASE si freeze_base=True
    if freeze_base:
        print("üîí Congelando capas base del transformer...")
        for name, param in model.named_parameters():
            if 'classifier' not in name and 'pre_classifier' not in name:
                param.requires_grad = False
        
        trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
        total = sum(p.numel() for p in model.parameters())
        print(f"   Par√°metros entrenables: {trainable:,} / {total:,} ({trainable/total*100:.1f}%)")
    
    # 6. Class weights para balancear clases
    class_weights = calculate_class_weights(y_train, device)
    print(f"‚öñÔ∏è  Class weights: Normal={class_weights[0]:.3f}, Hate={class_weights[1]:.3f}")
    
    # 7. Optimizer y Scheduler
    optimizer = torch.optim.AdamW(
        filter(lambda p: p.requires_grad, model.parameters()),
        lr=learning_rate,
        weight_decay=weight_decay
    )
    
    total_steps = len(train_loader) * epochs
    warmup_steps = int(0.1 * total_steps)
    
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=warmup_steps,
        num_training_steps=total_steps
    )
    
    # 8. Loss function con class weights
    criterion = torch.nn.CrossEntropyLoss(weight=class_weights)
    
    # 9. Training loop con early stopping
    print("\n‚è≥ Entrenando modelo...\n")
    
    best_f1 = 0.0
    best_epoch = 0
    best_model_state = None
    history = []
    patience = 1
    patience_counter = 0
    
    for epoch in range(epochs):
        print(f"{'='*60}")
        print(f"Epoch {epoch + 1}/{epochs}")
        print(f"{'='*60}")
        
        # TRAIN
        model.train()
        train_losses = []
        
        for batch in train_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            
            # Forward pass
            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
                labels=None
            )
            
            # Loss con class weights
            loss = criterion(outputs.logits, labels)
            train_losses.append(loss.item())
            
            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            scheduler.step()
        
        train_loss = np.mean(train_losses)
        
        # EVALUATE
        val_metrics = eval_model(model, val_loader, device)
        
        # Log
        history.append({
            'epoch': epoch + 1,
            'train_loss': train_loss,
            'val_loss': val_metrics['loss'],
            'val_precision': val_metrics['precision'],
            'val_recall': val_metrics['recall'],
            'val_f1': val_metrics['f1']
        })
        
        # Calcular overfitting
        overfitting_gap = abs(train_loss - val_metrics['loss'])
        overfitting_pct = (overfitting_gap / train_loss) * 100
        
        print(f"   Train Loss:     {train_loss:.4f}")
        print(f"   Val Loss:       {val_metrics['loss']:.4f}")
        print(f"   Val Precision:  {val_metrics['precision']:.4f}")
        print(f"   Val Recall:     {val_metrics['recall']:.4f}")
        print(f"   Val F1:         {val_metrics['f1']:.4f}")
        print(f"   Overfitting:    {overfitting_pct:.2f}% {'‚úÖ' if overfitting_pct < 5 else '‚ö†Ô∏è' if overfitting_pct < 15 else '‚ùå'}")
        
        # Guardar mejor modelo
        if val_metrics['f1'] > best_f1:
            best_f1 = val_metrics['f1']
            best_epoch = epoch + 1
            best_model_state = model.state_dict().copy()
            patience_counter = 0
            print(f"   üéØ Nuevo mejor F1: {best_f1:.4f}")
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"\n‚èπÔ∏è  Early stopping en epoch {epoch + 1}")
                break
        
        print()
    
    # Cargar mejor modelo
    if best_model_state is not None:
        model.load_state_dict(best_model_state)
        print(f"\n‚úÖ Cargado mejor modelo (Epoch {best_epoch}, F1: {best_f1:.4f})")
    
    return model, tokenizer, history


In [130]:
# ============================================================================
# 7. EVALUACI√ìN FINAL EN TEST
# ============================================================================

def evaluate_test_set(model, tokenizer, X_test, y_test, device, max_len=128):
    """Eval√∫a el modelo en el conjunto de test"""
    test_dataset = HateSpeechDataset(X_test, y_test, tokenizer, max_len)
    test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)
    
    print("\n" + "="*60)
    print("üìä EVALUACI√ìN EN TEST SET")
    print("="*60)
    
    metrics = eval_model(model, test_loader, device)
    
    print(f"\n   Test Loss:      {metrics['loss']:.4f}")
    print(f"   Test Precision: {metrics['precision']:.4f}")
    print(f"   Test Recall:    {metrics['recall']:.4f}")
    print(f"   Test F1:        {metrics['f1']:.4f}")
    
    # Matriz de confusi√≥n
    print("\nüìà Confusion Matrix:")
    cm = confusion_matrix(metrics['true_labels'], metrics['predictions'])
    print(cm)
    
    # Reporte de clasificaci√≥n
    print("\nüìã Classification Report:")
    print(classification_report(
        metrics['true_labels'], 
        metrics['predictions'],
        target_names=['Normal', 'Hate']
    ))
    
    return metrics



In [131]:
# ============================================================================
# 8. INTEGRACI√ìN CON MLFLOW (CORREGIDO PARA WINDOWS)
# ============================================================================

def run_experiment_with_mlflow(
    project_root,
    mlruns_dir,
    data_dir,
    experiment_name="youtube_hate_speech_bert"
):
    """
    Pipeline completo con tracking en MLflow
    
    Args:
        project_root: directorio ra√≠z del proyecto
        mlruns_dir: directorio para mlruns
        data_dir: directorio con los datos
        experiment_name: nombre del experimento MLflow
    """
    # Configurar MLflow - FIX DEFINITIVO PARA WINDOWS
    # Convertir Path de Windows a URI v√°lida
    tracking_uri = mlruns_dir.as_uri()
    mlflow.set_tracking_uri(tracking_uri)
    
    print(f"üîß MLflow Tracking URI: {tracking_uri}")
    
    mlflow.set_experiment(experiment_name)
    
    # Fijar semilla
    set_seed(42)
    
    # Device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    # Cargar datos
    X_train, X_val, X_test, y_train, y_val, y_test = load_and_prepare_data(
        data_dir, 
        use_augmentation=True
    )
    
    # Hiperpar√°metros
    # Hiperpar√°metros optimizados para dataset peque√±o con DistilBERT
    # Hiperpar√°metros con TinyBERT (modelo m√°s peque√±o)
    # Hiperpar√°metros CORREGIDOS para TinyBERT
    # Hiperpar√°metros balanceados con DistilBERT
    # Hiperpar√°metros TinyBERT con class_weights
    # Hiperpar√°metros ULTRA conservadores
    params = {
        'model_name': 'huawei-noah/TinyBERT_General_4L_312D',
        'max_len': 128,
        'batch_size': 8,          # M√°s peque√±o
        'epochs': 2,              # Solo 2 epochs (viste que epoch 3-4 explotan)
        'learning_rate': 2e-5,    # LR muy bajo
        'weight_decay': 0.05,     # Regularizaci√≥n m√°xima
        'use_augmentation': True,
        'augment_factor': 0.2,    # M√≠nima augmentaci√≥n
        'seed': 42,
        'freeze_base': False
    }



    # Iniciar run de MLflow
    with mlflow.start_run(run_name=f"bert_{params['max_len']}_{params['epochs']}ep"):
        
        # Log de par√°metros
        mlflow.log_params(params)
        
        # Entrenar modelo
        model, tokenizer, history = train_bert_model(
            X_train, y_train,
            X_val, y_val,
            model_name=params['model_name'],
            max_len=params['max_len'],
            batch_size=params['batch_size'],
            epochs=params['epochs'],
            learning_rate=params['learning_rate'],
            weight_decay=params['weight_decay'],
            device=device
        )
        
        # Log de m√©tricas de validaci√≥n por √©poca
        for epoch_metrics in history:
            mlflow.log_metrics({
                'train_loss': epoch_metrics['train_loss'],
                'val_loss': epoch_metrics['val_loss'],
                'val_precision': epoch_metrics['val_precision'],
                'val_recall': epoch_metrics['val_recall'],
                'val_f1': epoch_metrics['val_f1']
            }, step=epoch_metrics['epoch'])
        
        # Evaluaci√≥n en test
        test_metrics = evaluate_test_set(
            model, tokenizer, X_test, y_test, device, params['max_len']
        )
        
        # Log de m√©tricas finales
        mlflow.log_metrics({
            'test_loss': test_metrics['loss'],
            'test_precision': test_metrics['precision'],
            'test_recall': test_metrics['recall'],
            'test_f1': test_metrics['f1']
        })
        
        # Calcular overfitting final
        final_train_loss = history[-1]['train_loss']
        final_val_loss = history[-1]['val_loss']
        overfitting_pct = abs(final_train_loss - final_val_loss) / final_train_loss * 100
        mlflow.log_metric('overfitting_percentage', overfitting_pct)
        
        # Guardar tokenizer
        tokenizer_dir = project_root / "models" / "tokenizer"
        tokenizer_dir.mkdir(parents=True, exist_ok=True)
        tokenizer.save_pretrained(tokenizer_dir)
        mlflow.log_artifacts(str(tokenizer_dir), artifact_path="tokenizer")
        
        # Guardar modelo en MLflow (sin registro en model registry para evitar problemas)
        mlflow.pytorch.log_model(
            model,
            artifact_path="model"
            # Comentamos registered_model_name para evitar problemas con model registry
            # registered_model_name="bert_hate_speech"
        )
        
        print("\n" + "="*60)
        print("‚úÖ EXPERIMENTO COMPLETADO")
        print("="*60)
        print(f"   MLflow Tracking URI: {mlflow.get_tracking_uri()}")
        print(f"   Experiment ID: {mlflow.get_experiment_by_name(experiment_name).experiment_id}")
        print(f"   Run ID: {mlflow.active_run().info.run_id}")
        print(f"\n   üìä M√©tricas finales:")
        print(f"      Test F1: {test_metrics['f1']:.4f}")
        print(f"      Overfitting: {overfitting_pct:.2f}%")
        print(f"\n   üíæ Artefactos guardados en: {mlruns_dir}")


In [132]:
# ============================================================================
# 9. MAIN
# ============================================================================

def main():
    """Funci√≥n principal"""
    print("="*60)
    print("ü§ñ ENTRENAMIENTO DESTILBERT - HATE SPEECH DETECTION")
    print("="*60)
    
    # Setup
    project_root, mlruns_dir, data_dir = setup_paths()
    
    print(f"\nüìÅ Configuraci√≥n de paths:")
    print(f"   Project root: {project_root}")
    print(f"   MLflow dir:   {mlruns_dir}")
    print(f"   Data dir:     {data_dir}")
    
    # Ejecutar experimento
    run_experiment_with_mlflow(
        project_root=project_root,
        mlruns_dir=mlruns_dir,
        data_dir=data_dir,
        experiment_name="youtube_hate_speech_bert"
    )
    
    print("\n‚ú® Entrenamiento finalizado con √©xito!")
    print(f"\nüí° Para ver resultados en MLflow UI:")
    print(f"   cd {project_root}")
    print(f"   mlflow ui --backend-store-uri file://{mlruns_dir}")
    print(f"   Abrir: http://localhost:5000")


if __name__ == "__main__":
    main()

ü§ñ ENTRENAMIENTO DESTILBERT - HATE SPEECH DETECTION

üìÅ Configuraci√≥n de paths:
   Project root: c:\Users\Administrator\Desktop\NLP\Proyecto_X_NLP_Equipo3
   MLflow dir:   c:\Users\Administrator\Desktop\NLP\Proyecto_X_NLP_Equipo3\mlruns
   Data dir:     c:\Users\Administrator\Desktop\NLP\Proyecto_X_NLP_Equipo3\data\processed
üîß MLflow Tracking URI: file:///c:/Users/Administrator/Desktop/NLP/Proyecto_X_NLP_Equipo3/mlruns
üìÇ Cargando datos desde: c:\Users\Administrator\Desktop\NLP\Proyecto_X_NLP_Equipo3\data\processed\youtube_all_versions.pkl
‚úÖ Columnas disponibles: ['CommentId', 'VideoId', 'Text', 'IsToxic', 'IsAbusive', 'IsThreat', 'IsProvocative', 'IsObscene', 'IsHatespeech', 'IsRacist', 'IsNationalist', 'IsSexist', 'IsHomophobic', 'IsReligiousHate', 'IsRadicalism', 'IsHate', 'num_labels', 'char_count', 'word_count', 'sentence_count', 'avg_word_length', 'uppercase_count', 'uppercase_ratio', 'exclamation_count', 'question_count', 'dots_count', 'emoji_count', 'url_count', 'me

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at huawei-noah/TinyBERT_General_4L_312D and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚öñÔ∏è  Class weights: Normal=1.055, Hate=0.951

‚è≥ Entrenando modelo...

Epoch 1/2
   Train Loss:     0.6806
   Val Loss:       0.6571
   Val Precision:  0.7667
   Val Recall:     0.3333
   Val F1:         0.4646
   Overfitting:    3.46% ‚úÖ
   üéØ Nuevo mejor F1: 0.4646

Epoch 2/2
   Train Loss:     0.6003
   Val Loss:       0.6229
   Val Precision:  0.6076
   Val Recall:     0.6957
   Val F1:         0.6486
   Overfitting:    3.77% ‚úÖ
   üéØ Nuevo mejor F1: 0.6486


‚úÖ Cargado mejor modelo (Epoch 2, F1: 0.6486)

üìä EVALUACI√ìN EN TEST SET

   Test Loss:      0.6082
   Test Precision: 0.6375
   Test Recall:    0.7391
   Test F1:        0.6846

üìà Confusion Matrix:
[[52 29]
 [18 51]]

üìã Classification Report:
              precision    recall  f1-score   support

      Normal       0.74      0.64      0.69        81
        Hate       0.64      0.74      0.68        69

    accuracy                           0.69       150
   macro avg       0.69      0.69      0.69       




‚úÖ EXPERIMENTO COMPLETADO
   MLflow Tracking URI: file:///c:/Users/Administrator/Desktop/NLP/Proyecto_X_NLP_Equipo3/mlruns
   Experiment ID: 718288864077088622
   Run ID: 9d4185ab6f664c00b174c9a04856592f

   üìä M√©tricas finales:
      Test F1: 0.6846
      Overfitting: 3.77%

   üíæ Artefactos guardados en: c:\Users\Administrator\Desktop\NLP\Proyecto_X_NLP_Equipo3\mlruns

‚ú® Entrenamiento finalizado con √©xito!

üí° Para ver resultados en MLflow UI:
   cd c:\Users\Administrator\Desktop\NLP\Proyecto_X_NLP_Equipo3
   mlflow ui --backend-store-uri file://c:\Users\Administrator\Desktop\NLP\Proyecto_X_NLP_Equipo3\mlruns
   Abrir: http://localhost:5000
