<a href="https://colab.research.google.com/github/Haz-mor/Render_DATA32/blob/main/Modelos_Supervisados_Comparacion_Sprint9_S2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Sprint 9 - Sesión 2

## Comparación de Modelos Supervisados: Árbol de Decisión, Regresión Logística y Random Forest

In [None]:
import pandas as pd

# Cargar los datos
df = pd.read_csv("users_behavior.csv")
df.head()

In [None]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns



In [None]:
# Preparación de datos
X = df.drop("is_ultra", axis=1)
y = df["is_ultra"]


In [None]:
# Escalar valores
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
# Se agrega stratify para evitar que un conjunto esté desbalanceado respecto al otro
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42, stratify=y)

In [None]:
# Definir modelos y grids
dt = DecisionTreeClassifier(random_state=42)
lr = LogisticRegression(random_state=42, max_iter=1000)
rf = RandomForestClassifier(random_state=42)

param_dt = {'max_depth': [3, 5, 7, 10] #Profundidad del arbol
            , 'criterion': ['gini', 'entropy']} # funciones de división
param_lr = {'C': [0.01, 0.1, 1, 10]} # inverso de la regularización
param_rf = {'n_estimators': [50, 100, 200] # numero de árboles
            ,'max_depth': [3, 5, 10]} #Profundidad máxima de cada arbol

grid_dt = GridSearchCV(dt, param_dt, cv=5)
grid_lr = GridSearchCV(lr, param_lr, cv=5)
grid_rf = GridSearchCV(rf, param_rf, cv=5)
#Usa validación cruzada de 5 pliegues (cv=5) para evaluar cada combinación.

In [None]:
# Entrenamiento
grid_dt.fit(X_train, y_train)
grid_lr.fit(X_train, y_train)
grid_rf.fit(X_train, y_train)

# Resultados
results = {
    "Decision Tree": grid_dt,
    "Logistic Regression": grid_lr,
    "Random Forest": grid_rf
}

# Evaluación
metrics = []
for name, model in results.items():
    y_pred = model.predict(X_test)
    report = classification_report(y_test, y_pred, output_dict=True)
    metrics.append({
        "Model": name,
        "Accuracy": report["accuracy"],
        "Precision": report["1"]["precision"],
        "Recall": report["1"]["recall"],
        "F1-score": report["1"]["f1-score"]
    })

df_metrics = pd.DataFrame(metrics)
df_metrics

In [None]:
# Mostrar mejores hiperparámetros encontrados
best_params = {
    "Decision Tree": grid_dt.best_params_,
    "Logistic Regression": grid_lr.best_params_,
    "Random Forest": grid_rf.best_params_
}

df_best_params = pd.DataFrame.from_dict(best_params, orient='index')
df_best_params

In [None]:
# Matrices de confusión
fig, axes = plt.subplots(1, 3, figsize=(18, 5))
for ax, (name, model) in zip(axes, results.items()):
    y_pred = model.predict(X_test)
    cm = confusion_matrix(y_test, y_pred)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax)
    ax.set_title(name)
    ax.set_xlabel("Predicted")
    ax.set_ylabel("Actual")

plt.tight_layout()
plt.show()