# Entrenamiento y Evaluación de Modelos
## Proyecto: Clasificación de Riesgo Crediticio

### Objetivos de esta fase:
1. **Selección de Características**: Aplicar técnicas de reducción dimensional
2. **Entrenamiento de Modelos**: Implementar 3 algoritmos desde cero
3. **Evaluación Exhaustiva**: Métricas completas y validación cruzada  
4. **Optimización**: Ajuste de hiperparámetros para mejor rendimiento
5. **Persistencia**: Guardar modelos entrenados para uso futuro

## 0. Setup y Carga de Datos

In [None]:
import sys
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import json
from itertools import product
import time

warnings.filterwarnings('ignore')
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 10

project_root = os.path.abspath('..')
sys.path.insert(0, os.path.join(project_root, 'src'))

from models import LogisticRegressionMulticlass, SVMMulticlass, RandomForestMulticlass
from evaluation.metrics import (
    ModelEvaluator, FeatureSelector, DimensionalityReducer, ModelPersistence,
    accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
)

print("Configuración completada exitosamente")

In [None]:
processed_dir = os.path.join(project_root, 'data', 'processed')
experiments_dir = os.path.join(project_root, 'experiments')
os.makedirs(experiments_dir, exist_ok=True)

print("CARGA DE DATOS PREPROCESADOS")
print("="*50)

# Cargar datos
X_train = pd.read_csv(os.path.join(processed_dir, 'X_train_processed.csv'))
X_test = pd.read_csv(os.path.join(processed_dir, 'X_test_processed.csv'))
y_train = pd.read_csv(os.path.join(processed_dir, 'y_train_processed.csv'))['nivel_riesgo_encoded'].values

# Cargar metadatos
with open(os.path.join(processed_dir, 'preprocessing_metadata.json'), 'r') as f:
    metadata = json.load(f)

# Cargar nombres de features
with open(os.path.join(processed_dir, 'feature_names.txt'), 'r') as f:
    feature_names = f.read().strip().split('\n')

print(f"X_train: {X_train.shape}")
print(f"X_test: {X_test.shape}")
print(f"y_train: {y_train.shape}")
print(f"Features: {len(feature_names)}")
print(f"Clases: {metadata['target_classes']}")
print(f"Distribución: {np.bincount(y_train)}")

## 1. Análisis y Selección de Características

### 1.1 Análisis de Correlación entre Features

In [None]:
print("ANÁLISIS DE CORRELACIÓN")
print("="*50)

# Filtrar features altamente correlacionadas
features_to_keep_corr, features_to_remove_corr = FeatureSelector.correlation_filter(
    X_train.values, threshold=0.95
)

print(f"Features originales: {X_train.shape[1]}")
print(f"Features a mantener (correlación): {len(features_to_keep_corr)}")
print(f"Features removidas (correlación): {len(features_to_remove_corr)}")

if features_to_remove_corr:
    print("\nFeatures removidas por alta correlación:")
    for idx in features_to_remove_corr:
        print(f"  {feature_names[idx]}")

# Aplicar filtro de correlación
X_train_corr = X_train.iloc[:, features_to_keep_corr]
X_test_corr = X_test.iloc[:, features_to_keep_corr]
feature_names_corr = [feature_names[i] for i in features_to_keep_corr]

print(f"\nDimensiones después de filtro de correlación:")
print(f"X_train: {X_train_corr.shape} | X_test: {X_test_corr.shape}")

### 1.2 Selección Univariada con F-test

In [None]:
print("SELECCIÓN UNIVARIADA (F-TEST)")
print("="*50)

# Seleccionar top 20 features
k_best = min(20, X_train_corr.shape[1])
selected_features, f_scores = FeatureSelector.univariate_selection(
    X_train_corr.values, y_train, k_best=k_best
)

print(f"Top {k_best} features seleccionadas:")
feature_importance_pairs = [(feature_names_corr[i], f_scores[i]) for i in selected_features]
feature_importance_pairs.sort(key=lambda x: x[1], reverse=True)

for i, (feature, score) in enumerate(feature_importance_pairs):
    print(f"{i+1:2d}. {feature[:35]:35} | F-score: {score:.3f}")

# Aplicar selección
X_train_selected = X_train_corr.iloc[:, selected_features]
X_test_selected = X_test_corr.iloc[:, selected_features]
feature_names_selected = [feature_names_corr[i] for i in selected_features]

print(f"\nDimensiones finales después de selección:")
print(f"X_train: {X_train_selected.shape} | X_test: {X_test_selected.shape}")

### 1.3 Reducción Dimensional con PCA

In [None]:
print("ANÁLISIS DE COMPONENTES PRINCIPALES (PCA)")
print("="*50)

# Aplicar PCA manteniendo 95% de varianza
pca = DimensionalityReducer(explained_variance_threshold=0.95)
X_train_pca = pca.fit_transform(X_train_selected.values)
X_test_pca = pca.transform(X_test_selected.values)

print(f"Componentes principales seleccionados: {pca.n_components_selected}")
print(f"Varianza explicada acumulada: {np.sum(pca.explained_variance_ratio_):.3f}")
print(f"Dimensiones PCA - Train: {X_train_pca.shape} | Test: {X_test_pca.shape}")

# Visualizar varianza explicada
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(range(1, len(pca.explained_variance_ratio_) + 1), 
         pca.explained_variance_ratio_, 'bo-', markersize=6)
plt.xlabel('Componente Principal')
plt.ylabel('Varianza Explicada')
plt.title('Varianza Explicada por Componente')
plt.grid(True, alpha=0.3)

plt.subplot(1, 2, 2)
cumsum_variance = np.cumsum(pca.explained_variance_ratio_)
plt.plot(range(1, len(cumsum_variance) + 1), cumsum_variance, 'ro-', markersize=6)
plt.axhline(y=0.95, color='g', linestyle='--', alpha=0.7, label='95% varianza')
plt.xlabel('Número de Componentes')
plt.ylabel('Varianza Explicada Acumulada')
plt.title('Varianza Explicada Acumulada')
plt.legend()
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print(f"\nPrimeras 5 componentes - varianza explicada:")
for i, var_exp in enumerate(pca.explained_variance_ratio_[:5]):
    print(f"  PC{i+1}: {var_exp:.4f} ({var_exp*100:.2f}%)")

## 2. Entrenamiento de Modelos

### 2.1 Configuración de Experimentos

In [None]:
print("CONFIGURACIÓN DE EXPERIMENTOS")
print("="*50)

# Preparar conjuntos de datos para experimentar
datasets = {
    'original': (X_train.values, X_test.values, feature_names),
    'correlation_filtered': (X_train_corr.values, X_test_corr.values, feature_names_corr),
    'feature_selected': (X_train_selected.values, X_test_selected.values, feature_names_selected),
    'pca_transformed': (X_train_pca, X_test_pca, [f'PC{i+1}' for i in range(X_train_pca.shape[1])])
}

# Configurar modelos con diferentes hiperparámetros
model_configs = {
    'LogisticRegression': {
        'class': LogisticRegressionMulticlass,
        'params': {
            'base': {'learning_rate': 0.01, 'max_iterations': 1000, 'regularization': 'l2', 'lambda_reg': 0.01},
            'l1_reg': {'learning_rate': 0.01, 'max_iterations': 1000, 'regularization': 'l1', 'lambda_reg': 0.01},
            'high_lr': {'learning_rate': 0.1, 'max_iterations': 500, 'regularization': 'l2', 'lambda_reg': 0.001}
        }
    },
    'SVM': {
        'class': SVMMulticlass,
        'params': {
            'base': {'C': 1.0, 'kernel': 'linear', 'learning_rate': 0.001, 'max_iterations': 1000},
            'high_c': {'C': 10.0, 'kernel': 'linear', 'learning_rate': 0.001, 'max_iterations': 1000},
            'rbf_kernel': {'C': 1.0, 'kernel': 'rbf', 'learning_rate': 0.001, 'max_iterations': 800}
        }
    },
    'RandomForest': {
        'class': RandomForestMulticlass,
        'params': {
            'base': {'n_estimators': 100, 'max_depth': 10, 'max_features': 'sqrt'},
            'deep_trees': {'n_estimators': 50, 'max_depth': 20, 'max_features': 'sqrt'},
            'many_trees': {'n_estimators': 200, 'max_depth': 8, 'max_features': 'log2'}
        }
    }
}

print(f"Datasets para experimentar: {list(datasets.keys())}")
print(f"Modelos configurados: {list(model_configs.keys())}")

for dataset_name, (X_tr, X_te, features) in datasets.items():
    print(f"  {dataset_name}: {X_tr.shape[0]} × {X_tr.shape[1]} features")

# Inicializar almacenamiento de resultados
experiment_results = {}
trained_models = {}

### 2.2 Entrenamiento y Evaluación Sistemática

In [None]:
print("ENTRENAMIENTO Y EVALUACIÓN DE MODELOS")
print("="*70)

total_experiments = sum(len(config['params']) for config in model_configs.values()) * len(datasets)
experiment_count = 0

for dataset_name, (X_train_exp, X_test_exp, feature_list) in datasets.items():
    print(f"\nDATASET: {dataset_name.upper()} ({X_train_exp.shape[1]} features)")
    print("-" * 60)
    
    dataset_results = {}
    
    for model_name, model_config in model_configs.items():
        model_class = model_config['class']
        
        for param_name, params in model_config['params'].items():
            experiment_count += 1
            experiment_id = f"{dataset_name}_{model_name}_{param_name}"
            
            print(f"\n[{experiment_count}/{total_experiments}] {experiment_id}")
            print(f"Parámetros: {params}")
            
            start_time = time.time()
            
            try:
                # Crear y entrenar modelo
                model = model_class(**params, random_state=42)
                model.fit(X_train_exp, y_train)
                
                # Predicciones
                y_train_pred = model.predict(X_train_exp)
                
                # Evaluación en entrenamiento
                train_report = ModelEvaluator.classification_report(y_train, y_train_pred)
                
                # Validación cruzada
                cv_results = ModelEvaluator.cross_validate(
                    model, X_train_exp, y_train, cv=5, random_state=42
                )
                
                training_time = time.time() - start_time
                
                # Guardar resultados
                result = {
                    'model': model,
                    'dataset': dataset_name,
                    'model_type': model_name,
                    'params': params,
                    'train_report': train_report,
                    'cv_results': cv_results,
                    'training_time': training_time,
                    'n_features': X_train_exp.shape[1],
                    'feature_names': feature_list
                }
                
                dataset_results[experiment_id] = result
                trained_models[experiment_id] = model
                
                # Mostrar métricas principales
                print(f"  Tiempo: {training_time:.2f}s")
                print(f"  Train Accuracy: {train_report['accuracy']:.4f}")
                print(f"  CV Accuracy: {cv_results['accuracy']['mean']:.4f} ± {cv_results['accuracy']['std']:.4f}")
                print(f"  CV F1-Score: {cv_results['f1_macro']['mean']:.4f} ± {cv_results['f1_macro']['std']:.4f}")
                
            except Exception as e:
                print(f"  ERROR: {str(e)}")
                continue
    
    experiment_results[dataset_name] = dataset_results

print(f"\n\nEXPERIMENTOS COMPLETADOS: {len([r for ds in experiment_results.values() for r in ds.values()])}")

## 3. Análisis de Resultados

### 3.1 Comparación de Rendimiento por Dataset

In [None]:
print("ANÁLISIS COMPARATIVO DE RESULTADOS")
print("="*70)

# Crear DataFrame con todos los resultados
comparison_data = []

for dataset_name, dataset_results in experiment_results.items():
    for exp_id, result in dataset_results.items():
        comparison_data.append({
            'Experiment': exp_id,
            'Dataset': dataset_name,
            'Model': result['model_type'],
            'Features': result['n_features'],
            'Train_Accuracy': result['train_report']['accuracy'],
            'CV_Accuracy_Mean': result['cv_results']['accuracy']['mean'],
            'CV_Accuracy_Std': result['cv_results']['accuracy']['std'],
            'CV_F1_Mean': result['cv_results']['f1_macro']['mean'],
            'CV_F1_Std': result['cv_results']['f1_macro']['std'],
            'CV_Precision_Mean': result['cv_results']['precision_macro']['mean'],
            'CV_Recall_Mean': result['cv_results']['recall_macro']['mean'],
            'Training_Time': result['training_time']
        })

comparison_df = pd.DataFrame(comparison_data)

# Mostrar top 10 experimentos por F1-Score
top_experiments = comparison_df.nlargest(10, 'CV_F1_Mean')

print("TOP 10 EXPERIMENTOS POR F1-SCORE (Validación Cruzada):")
print(top_experiments[['Experiment', 'Dataset', 'Model', 'Features', 'CV_F1_Mean', 'CV_F1_Std', 'Training_Time']].to_string(index=False))

# Análisis por modelo
print("\n\nRESUMEN POR TIPO DE MODELO:")
model_summary = comparison_df.groupby('Model').agg({
    'CV_F1_Mean': ['mean', 'max', 'std'],
    'CV_Accuracy_Mean': ['mean', 'max', 'std'],
    'Training_Time': ['mean', 'std']
}).round(4)

print(model_summary)

# Análisis por dataset
print("\n\nRESUMEN POR DATASET:")
dataset_summary = comparison_df.groupby('Dataset').agg({
    'CV_F1_Mean': ['mean', 'max', 'std'],
    'CV_Accuracy_Mean': ['mean', 'max', 'std'],
    'Features': 'first'
}).round(4)

print(dataset_summary)

### 3.2 Visualización de Resultados

In [None]:
# Crear visualizaciones comparativas
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# 1. F1-Score por modelo y dataset
pivot_f1 = comparison_df.pivot_table(
    values='CV_F1_Mean', index='Dataset', columns='Model', aggfunc='max'
)

sns.heatmap(pivot_f1, annot=True, fmt='.3f', cmap='viridis', ax=axes[0,0])
axes[0,0].set_title('F1-Score Máximo por Modelo y Dataset')
axes[0,0].set_xlabel('Modelo')
axes[0,0].set_ylabel('Dataset')

# 2. Accuracy vs F1-Score
scatter = axes[0,1].scatter(comparison_df['CV_Accuracy_Mean'], comparison_df['CV_F1_Mean'], 
                           c=comparison_df['Training_Time'], cmap='plasma', alpha=0.7, s=60)
axes[0,1].set_xlabel('CV Accuracy Mean')
axes[0,1].set_ylabel('CV F1-Score Mean')
axes[0,1].set_title('Accuracy vs F1-Score (Color: Tiempo de Entrenamiento)')
plt.colorbar(scatter, ax=axes[0,1], label='Training Time (s)')

# 3. Box plot por modelo
comparison_df.boxplot(column='CV_F1_Mean', by='Model', ax=axes[1,0])
axes[1,0].set_title('Distribución F1-Score por Modelo')
axes[1,0].set_xlabel('Modelo')
axes[1,0].set_ylabel('CV F1-Score Mean')

# 4. Tiempo vs Rendimiento
for model in comparison_df['Model'].unique():
    model_data = comparison_df[comparison_df['Model'] == model]
    axes[1,1].scatter(model_data['Training_Time'], model_data['CV_F1_Mean'], 
                     label=model, alpha=0.7, s=60)

axes[1,1].set_xlabel('Training Time (s)')
axes[1,1].set_ylabel('CV F1-Score Mean')
axes[1,1].set_title('Tiempo de Entrenamiento vs Rendimiento')
axes[1,1].legend()
axes[1,1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

### 3.3 Selección del Mejor Modelo

In [None]:
print("SELECCIÓN DEL MEJOR MODELO")
print("="*50)

# Encontrar el mejor experimento basado en F1-Score
best_experiment_idx = comparison_df['CV_F1_Mean'].idxmax()
best_experiment = comparison_df.loc[best_experiment_idx]
best_exp_id = best_experiment['Experiment']

print(f"MEJOR EXPERIMENTO: {best_exp_id}")
print(f"  Modelo: {best_experiment['Model']}")
print(f"  Dataset: {best_experiment['Dataset']}")
print(f"  Features: {best_experiment['Features']}")
print(f"  CV F1-Score: {best_experiment['CV_F1_Mean']:.4f} ± {best_experiment['CV_F1_Std']:.4f}")
print(f"  CV Accuracy: {best_experiment['CV_Accuracy_Mean']:.4f} ± {best_experiment['CV_Accuracy_Std']:.4f}")
print(f"  Tiempo de entrenamiento: {best_experiment['Training_Time']:.2f}s")

# Obtener el modelo y resultados completos
dataset_name = best_experiment['Dataset']
best_result = experiment_results[dataset_name][best_exp_id]
best_model = trained_models[best_exp_id]

print("\nRESULTADOS DETALLADOS DEL MEJOR MODELO:")
print("-" * 40)

# Mostrar matriz de confusión de validación cruzada promedio
train_report = best_result['train_report']
print("\nMatriz de Confusión (Entrenamiento):")
print(train_report['confusion_matrix'])

print("\nMétricas por Clase:")
classes = train_report['classes']
class_names = ['Alto', 'Bajo', 'Medio']  # Mapear índices a nombres

for i, class_name in enumerate(class_names):
    precision = train_report['per_class']['precision'][i]
    recall = train_report['per_class']['recall'][i]
    f1 = train_report['per_class']['f1_score'][i]
    support = train_report['per_class']['support'][i]
    
    print(f"  {class_name:8} | Precision: {precision:.3f} | Recall: {recall:.3f} | F1: {f1:.3f} | Support: {support}")

print(f"\nMétricas Globales (Entrenamiento):")
print(f"  Accuracy: {train_report['accuracy']:.4f}")
print(f"  Macro Avg - Precision: {train_report['macro_avg']['precision']:.4f}")
print(f"  Macro Avg - Recall: {train_report['macro_avg']['recall']:.4f}")
print(f"  Macro Avg - F1: {train_report['macro_avg']['f1_score']:.4f}")

print(f"\nMétricas de Validación Cruzada:")
cv_results = best_result['cv_results']
for metric, stats in cv_results.items():
    print(f"  {metric}: {stats['mean']:.4f} ± {stats['std']:.4f}")

## 4. Análisis de Importancia de Features

In [None]:
print("ANÁLISIS DE IMPORTANCIA DE FEATURES")
print("="*50)

# Obtener importancia de features del mejor modelo
if hasattr(best_model, 'get_feature_importance'):
    feature_importance = best_model.get_feature_importance()
    feature_names_best = best_result['feature_names']
    
    # Crear DataFrame para mejor visualización
    importance_df = pd.DataFrame({
        'Feature': feature_names_best,
        'Importance': feature_importance
    }).sort_values('Importance', ascending=False)
    
    print(f"TOP 15 FEATURES MÁS IMPORTANTES ({best_experiment['Model']}):")
    print(importance_df.head(15).to_string(index=False, float_format='%.4f'))
    
    # Visualizar importancia
    plt.figure(figsize=(12, 8))
    top_features = importance_df.head(15)
    
    plt.barh(range(len(top_features)), top_features['Importance'])
    plt.yticks(range(len(top_features)), 
               [f[:30] + ('...' if len(f) > 30 else '') for f in top_features['Feature']])
    plt.xlabel('Importancia')
    plt.title(f'Top 15 Features - {best_experiment["Model"]} (Dataset: {best_experiment["Dataset"]})')
    plt.gca().invert_yaxis()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()
    
    # Guardar importancia
    importance_df.to_csv(
        os.path.join(experiments_dir, f'feature_importance_{best_exp_id}.csv'), 
        index=False
    )
    
else:
    print(f"El modelo {best_experiment['Model']} no provee análisis de importancia de features")

## 5. Guardado de Modelos y Resultados

In [None]:
print("GUARDADO DE MODELOS Y RESULTADOS")
print("="*50)

# Crear directorio de modelos
models_dir = os.path.join(experiments_dir, 'trained_models')
os.makedirs(models_dir, exist_ok=True)

# Guardar el mejor modelo
best_model_path = os.path.join(models_dir, f'best_model_{best_exp_id}.pkl')
best_model_metadata = {
    'experiment_id': best_exp_id,
    'model_type': best_experiment['Model'],
    'dataset_used': best_experiment['Dataset'],
    'n_features': best_experiment['Features'],
    'cv_f1_score': best_experiment['CV_F1_Mean'],
    'cv_accuracy': best_experiment['CV_Accuracy_Mean'],
    'feature_names': best_result['feature_names'],
    'target_classes': metadata['target_classes'],
    'target_encoding': metadata['target_encoding'],
    'training_time': best_experiment['Training_Time']
}

ModelPersistence.save_model(best_model, best_model_path, best_model_metadata)
print(f"Mejor modelo guardado: {best_model_path}")

# Guardar top 5 modelos
top_5_experiments = comparison_df.nlargest(5, 'CV_F1_Mean')

for idx, row in top_5_experiments.iterrows():
    exp_id = row['Experiment']
    model = trained_models[exp_id]
    
    model_path = os.path.join(models_dir, f'model_{exp_id}.pkl')
    model_metadata = {
        'experiment_id': exp_id,
        'model_type': row['Model'],
        'dataset_used': row['Dataset'],
        'cv_f1_score': row['CV_F1_Mean'],
        'cv_accuracy': row['CV_Accuracy_Mean'],
        'rank': len(top_5_experiments) - list(top_5_experiments.index).index(idx)
    }
    
    ModelPersistence.save_model(model, model_path, model_metadata)

print(f"Top 5 modelos guardados en: {models_dir}")

# Guardar resultados completos
results_path = os.path.join(experiments_dir, 'experiment_results.pkl')
ModelPersistence.save_results({
    'experiment_results': experiment_results,
    'comparison_df': comparison_df,
    'best_experiment_id': best_exp_id,
    'model_configs': model_configs,
    'datasets_info': {name: {'shape': data[0].shape, 'features': len(data[2])} 
                     for name, data in datasets.items()}
}, results_path)

print(f"Resultados completos guardados: {results_path}")

# Guardar resumen en CSV
comparison_df.to_csv(os.path.join(experiments_dir, 'model_comparison.csv'), index=False)
print(f"Comparación de modelos: {os.path.join(experiments_dir, 'model_comparison.csv')}")

print("\nARCHIVOS GENERADOS:")
for root, dirs, files in os.walk(experiments_dir):
    level = root.replace(experiments_dir, '').count(os.sep)
    indent = ' ' * 2 * level
    print(f"{indent}{os.path.basename(root)}/")
    subindent = ' ' * 2 * (level + 1)
    for file in files:
        print(f"{subindent}{file}")

## 6. Resumen Final

In [None]:
print("RESUMEN FINAL DEL ENTRENAMIENTO")
print("="*70)

print(f"""EXPERIMENTOS REALIZADOS:
- Total de configuraciones probadas: {len(comparison_df)}
- Datasets evaluados: {len(datasets)}
- Algoritmos implementados: {len(model_configs)}
- Técnicas de selección de features aplicadas: 3 (Correlación, F-test, PCA)

MEJOR CONFIGURACIÓN:
- Experimento: {best_exp_id}
- Modelo: {best_experiment['Model']}
- Dataset: {best_experiment['Dataset']}
- Features utilizadas: {best_experiment['Features']}
- F1-Score (CV): {best_experiment['CV_F1_Mean']:.4f} ± {best_experiment['CV_F1_Std']:.4f}
- Accuracy (CV): {best_experiment['CV_Accuracy_Mean']:.4f} ± {best_experiment['CV_Accuracy_Std']:.4f}
- Tiempo de entrenamiento: {best_experiment['Training_Time']:.2f} segundos

COMPARACIÓN DE ALGORITMOS (Mejor F1-Score):
""")

# Resumen por algoritmo
algo_best = comparison_df.loc[comparison_df.groupby('Model')['CV_F1_Mean'].idxmax()]
for _, row in algo_best.iterrows():
    print(f"- {row['Model']:15}: F1={row['CV_F1_Mean']:.4f}, Acc={row['CV_Accuracy_Mean']:.4f}, Time={row['Training_Time']:.1f}s")

print(f"""\nEFECTO DE SELECCIÓN DE FEATURES:
""")

# Resumen por dataset
dataset_best = comparison_df.loc[comparison_df.groupby('Dataset')['CV_F1_Mean'].idxmax()]
for _, row in dataset_best.iterrows():
    print(f"- {row['Dataset']:20}: {row['Features']:2d} features, F1={row['CV_F1_Mean']:.4f}")

print(f"""\nOPTIMIZACIÓN LOGRADA:
- Mejora en F1-Score vs baseline: {(comparison_df['CV_F1_Mean'].max() - comparison_df['CV_F1_Mean'].min()):.4f}
- Reducción de dimensionalidad máxima: {X_train.shape[1] - comparison_df['Features'].min()} features
- Modelos entrenados y listos para producción: {len(top_5_experiments)}

ARCHIVOS GENERADOS:
- Mejor modelo: {best_model_path}
- Comparación completa: {os.path.join(experiments_dir, 'model_comparison.csv')}
- Resultados detallados: {results_path}
""")

print("\n" + "="*70)
print("FASE DE MODELADO COMPLETADA EXITOSAMENTE")
print("Siguiente paso: Evaluación final en datos de test")
print("="*70)