<a href="https://colab.research.google.com/github/JuanDiaz77/Proyecto-colab/blob/main/Balanceo_de_datos_con_SMOTE_y_evaluaci%C3%B3n_con_F1_y_AUC_ROC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ===========================================
# 1️ Instalación e importación de librerías
# ===========================================

!pip install imbalanced-learn -q

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    classification_report,
    roc_auc_score,
    roc_curve,
    f1_score
)
from imblearn.over_sampling import SMOTE
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

# ===========================================
# 2️ Carga y exploración del dataset real
# ===========================================

data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

print("✅ Dataset cargado correctamente")
print(f"Número de muestras: {X.shape[0]}")
print(f"Número de variables: {X.shape[1]}")
print("\nDistribución de clases (0 = maligno, 1 = benigno):")
print(y.value_counts())

sns.countplot(x=y)
plt.title("Distribución de clases original")
plt.show()

# ===========================================
# 3️ Dividir en entrenamiento y prueba
# ===========================================

X_train, X_test, y_train, y_test = train_test_split(
    X, y, stratify=y, test_size=0.3, random_state=42
)

print("\nTamaño del conjunto de entrenamiento:", X_train.shape)
print("Tamaño del conjunto de prueba:", X_test.shape)

# ===========================================
# 4️ Aplicar SMOTE al conjunto de entrenamiento
# ===========================================

smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X_train, y_train)

print("\nDistribución después de aplicar SMOTE:")
print(y_res.value_counts())

sns.countplot(x=y_res)
plt.title("Distribución balanceada tras SMOTE")
plt.show()

# ===========================================
# 5️ Entrenar modelo de clasificación
# ===========================================

model = RandomForestClassifier(random_state=42)
model.fit(X_res, y_res)

# ===========================================
# 6️ Predicción y evaluación con SMOTE
# ===========================================

y_pred = model.predict(X_test)
y_probs = model.predict_proba(X_test)[:, 1]

print("\n=== Reporte de clasificación con SMOTE ===")
print(classification_report(y_test, y_pred))
print(f"AUC-ROC: {roc_auc_score(y_test, y_probs):.4f}")
print(f"F1-Score: {f1_score(y_test, y_pred):.4f}")

# Curva ROC
fpr, tpr, _ = roc_curve(y_test, y_probs)
plt.figure(figsize=(7, 5))
plt.plot(fpr, tpr, label=f"AUC = {roc_auc_score(y_test, y_probs):.3f}")
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel("Tasa de falsos positivos (FPR)")
plt.ylabel("Tasa de verdaderos positivos (TPR)")
plt.title("Curva ROC - Modelo con SMOTE")
plt.legend()
plt.show()

# ===========================================
# 7️ Comparación con modelo sin SMOTE
# ===========================================

model_no_smote = RandomForestClassifier(random_state=42)
model_no_smote.fit(X_train, y_train)

y_pred_ns = model_no_smote.predict(X_test)
y_probs_ns = model_no_smote.predict_proba(X_test)[:, 1]

print("\n=== Reporte de clasificación sin SMOTE ===")
print(classification_report(y_test, y_pred_ns))
print(f"AUC-ROC: {roc_auc_score(y_test, y_probs_ns):.4f}")
print(f"F1-Score: {f1_score(y_test, y_pred_ns):.4f}")

# Comparación gráfica de curvas ROC
fpr_ns, tpr_ns, _ = roc_curve(y_test, y_probs_ns)

plt.figure(figsize=(7, 5))
plt.plot(fpr, tpr, label=f"Con SMOTE (AUC={roc_auc_score(y_test, y_probs):.3f})")
plt.plot(fpr_ns, tpr_ns, label=f"Sin SMOTE (AUC={roc_auc_score(y_test, y_probs_ns):.3f})")
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel("Tasa de falsos positivos")
plt.ylabel("Tasa de verdaderos positivos")
plt.title("Comparación de curvas ROC: Con vs Sin SMOTE")
plt.legend()
plt.show()
