In [1]:
# Importaci√≥n de librer√≠as

import numpy as np
import pandas as pd
import joblib
import os
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import (accuracy_score, precision_score, recall_score, 
                           f1_score, roc_auc_score, confusion_matrix, roc_curve, auc)
import mlflow
import mlflow.sklearn

In [2]:
# Configuraci√≥n de paths
IMAGES_PATH = r"C:\Users\abrah\Documents\Maestr√≠a\Segundo_semestre\Programacion_2\Challenge_1\Im√°genes"

In [3]:
# Cargar modelo y datos
try:
    # Verificaci√≥n de archivos
    REQUIRED_FILES = ['knn_model.pkl', 'scaler.pkl', 'X_test.npy', 'y_test.npy', 'feature_names.npy']
    missing_files = [f for f in REQUIRED_FILES if not os.path.exists(os.path.join(IMAGES_PATH, f))]
    
    if missing_files:
        raise FileNotFoundError(f"Archivos faltantes:\n- " + "\n- ".join(missing_files))

    # Carga de recursos
    model = joblib.load(os.path.join(IMAGES_PATH, 'knn_model.pkl'))
    scaler = joblib.load(os.path.join(IMAGES_PATH, 'scaler.pkl'))
    X_test = np.load(os.path.join(IMAGES_PATH, 'X_test.npy'))
    y_test = np.load(os.path.join(IMAGES_PATH, 'y_test.npy'))
    feature_names = np.load(os.path.join(IMAGES_PATH, 'feature_names.npy'))

    # Conversi√≥n a DataFrame con los nombres originales
    X_test_df = pd.DataFrame(X_test, columns=feature_names)
    
    # Normalizaci√≥n (conservando nombres)
    X_test_scaled = scaler.transform(X_test_df)

except Exception as e:
    print(f"\n‚ùå Error en carga:\n{str(e)}")
    print("\nüîß Soluci√≥n posible:")
    print("- Ejecuta primero train_model.py para generar los archivos necesarios")
    print("- Verifica que los archivos .npy no est√©n corruptos")
    exit(1)


‚ùå Error en carga:
Object arrays cannot be loaded when allow_pickle=False

üîß Soluci√≥n posible:
- Ejecuta primero train_model.py para generar los archivos necesarios
- Verifica que los archivos .npy no est√©n corruptos


In [4]:
# Evaluaci√≥n del modelo

try:
    # Predicciones
    y_pred = model.predict(X_test_scaled)
    y_probs = model.predict_proba(X_test_scaled)[:, 1]
    
    # M√©tricas
    metrics = {
        'Accuracy': accuracy_score(y_test, y_pred),
        'Precision': precision_score(y_test, y_pred),
        'Recall': recall_score(y_test, y_pred),
        'F1-Score': f1_score(y_test, y_pred),
        'AUC-ROC': roc_auc_score(y_test, y_probs)
    }

    # Matriz de confusi√≥n
    cm = confusion_matrix(y_test, y_pred)
    
    # Curva ROC
    fpr, tpr, _ = roc_curve(y_test, y_probs)
    roc_auc = auc(fpr, tpr)

except Exception as e:
    print(f"\n‚ùå Error en evaluaci√≥n:\n{str(e)}")
    exit(1)


‚ùå Error en evaluaci√≥n:
name 'X_test_scaled' is not defined


In [5]:
# Visualizaci√≥n y guardado

def save_plots(conf_matrix, roc_data, save_path):
    # Matriz de confusi√≥n
    plt.figure(figsize=(6, 4))
    sns.heatmap(
        conf_matrix, 
        annot=True, 
        fmt='d', 
        cmap='Blues',
        xticklabels=['Benigno (B)', 'Maligno (M)'],
        yticklabels=['Benigno (B)', 'Maligno (M)']
    )
    plt.title("Matriz de Confusi√≥n - KNN")
    plt.xlabel("Predicci√≥n")
    plt.ylabel("Real")
    plt.savefig(os.path.join(save_path, "confusion_matrix_knn.png"))
    plt.close()
    
    # Curva ROC
    fpr, tpr, roc_auc = roc_data
    plt.figure(figsize=(8, 6))
    plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC (AUC = {roc_auc:.2f})')
    plt.plot([0, 1], [0, 1], color='navy', linestyle='--')
    plt.xlabel("Tasa de Falsos Positivos")
    plt.ylabel("Tasa de Verdaderos Positivos")
    plt.title("Curva ROC - KNN")
    plt.legend(loc="lower right")
    plt.savefig(os.path.join(save_path, "roc_curve_knn.png"))
    plt.close()

In [6]:
# Registro en MLFlow

def log_to_mlflow(metrics, conf_matrix, run_name="KNN Evaluation"):
    mlflow.set_experiment("Breast Cancer - KNN")
    
    with mlflow.start_run(run_name=run_name):
        # Log metrics
        mlflow.log_metrics(metrics)
        
        # Log artifacts
        mlflow.log_artifacts(IMAGES_PATH)
        
        # Log confusion matrix as a plot
        mlflow.log_artifact(os.path.join(IMAGES_PATH, "confusion_matrix_knn.png"))
        mlflow.log_artifact(os.path.join(IMAGES_PATH, "roc_curve_knn.png"))
        
        print(f"Run ID: {mlflow.active_run().info.run_id}")

In [7]:
# Ejecuci√≥n principal

if __name__ == "__main__":
    try:
        # 1. Cargar y validar datos
        model, X_test_scaled, y_test = load_and_validate_data()
        
        # 2. Evaluar modelo
        metrics, conf_matrix, roc_data = evaluate_model(model, X_test_scaled, y_test)
        
        # 3. Imprimir m√©tricas
        print("\nM√©tricas de Evaluaci√≥n:")
        for name, value in metrics.items():
            print(f"{name}: {value:.4f}")
        
        # 4. Guardar gr√°ficos
        save_plots(conf_matrix, roc_data, IMAGES_PATH)
        
        # 5. Registrar en MLFlow
        log_to_mlflow(metrics, conf_matrix)
        
        print("\n Evaluaci√≥n completada exitosamente.")
    
    except Exception as e:
        print(f"\n Error durante la evaluaci√≥n: {str(e)}")


 Error durante la evaluaci√≥n: name 'load_and_validate_data' is not defined
