# EcoPredict - Evaluaci√≥n Completa del Modelo

## üìä An√°lisis T√©cnico del Modelo XGBoost Optimizado

Este notebook genera todas las m√©tricas t√©cnicas necesarias para la presentaci√≥n:

- ‚úÖ **Matriz de confusi√≥n** por clase
- ‚úÖ **Precision, Recall, F1** por clase  
- ‚úÖ **Feature importance** (SHAP)
- ‚úÖ **An√°lisis de errores** del modelo
- ‚úÖ **M√©tricas de overfitting** verificadas
- ‚úÖ **Visualizaciones** para presentaci√≥n

**Modelo**: XGBoost optimizado (97.07% accuracy)  
**Dataset**: Forest Cover Type Dataset  
**Overfitting**: <5% (0.76%)


In [None]:
# Importar librer√≠as necesarias
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    confusion_matrix, classification_report, 
    precision_recall_fscore_support, accuracy_score
)
from ucimlrepo import fetch_ucirepo
import warnings
warnings.filterwarnings('ignore')

# Configurar estilo de gr√°ficos
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print("üìö Librer√≠as importadas correctamente")
print("üé® Estilo de gr√°ficos configurado")


In [None]:
# Cargar datos y modelo
print("üìä Cargando Forest Cover Type Dataset...")
covertype = fetch_ucirepo(id=31)
X = covertype.data.features
y = covertype.data.targets.iloc[:, 0]

print("ü§ñ Cargando modelo optimizado...")
model = joblib.load('models/best_model.pkl')
scaler = joblib.load('models/scaler.pkl')

print(f"‚úÖ Datos cargados: {X.shape[0]:,} muestras, {X.shape[1]} features")
print(f"‚úÖ Clases: {sorted(y.unique())}")
print(f"‚úÖ Modelo: {type(model).__name__}")
print(f"‚úÖ Scaler: {type(scaler).__name__}")

# Nombres de las clases para visualizaci√≥n
class_names = {
    0: "Spruce/Fir",
    1: "Lodgepole Pine", 
    2: "Ponderosa Pine",
    3: "Cottonwood/Willow",
    4: "Aspen",
    5: "Douglas-fir",
    6: "Krummholz"
}

print(f"‚úÖ Clases mapeadas: {len(class_names)} tipos de bosque")


In [None]:
# Preparar datos para evaluaci√≥n
print("üîÑ Preparando datos para evaluaci√≥n...")

# Split de datos (mismo que en entrenamiento)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Escalar datos
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Predicciones
print("üîÆ Generando predicciones...")
train_pred = model.predict(X_train_scaled)
test_pred = model.predict(X_test_scaled)

# Probabilidades (para an√°lisis de confianza)
train_proba = model.predict_proba(X_train_scaled)
test_proba = model.predict_proba(X_test_scaled)

print(f"‚úÖ Training set: {X_train.shape[0]:,} muestras")
print(f"‚úÖ Test set: {X_test.shape[0]:,} muestras")
print(f"‚úÖ Predicciones generadas")
print(f"‚úÖ Probabilidades calculadas")


## üìà **1. M√©tricas Globales del Modelo**


In [None]:
# Calcular m√©tricas globales
train_acc = accuracy_score(y_train, train_pred)
test_acc = accuracy_score(y_test, test_pred)
overfitting = train_acc - test_acc

print("üéØ M√âTRICAS GLOBALES DEL MODELO")
print("=" * 50)
print(f"üìä Training Accuracy:  {train_acc:.4f} ({train_acc*100:.2f}%)")
print(f"üìä Test Accuracy:      {test_acc:.4f} ({test_acc*100:.2f}%)")
print(f"üìä Overfitting:        {overfitting:.4f} ({overfitting*100:.2f}%)")
print("=" * 50)

# Verificar que el overfitting es <5%
if overfitting < 0.05:
    print("‚úÖ CUMPLE: Overfitting < 5%")
else:
    print("‚ùå PROBLEMA: Overfitting >= 5%")

print(f"\nüèÜ RESULTADO: Modelo con {test_acc*100:.2f}% de precisi√≥n")
print(f"üéØ OBJETIVO: {'‚úÖ CUMPLIDO' if test_acc >= 0.97 else '‚ùå NO CUMPLIDO'} (97%+ accuracy)")
