In [111]:
from sklearn.model_selection import StratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.base import clone
from sklearn.metrics import precision_score, recall_score, f1_score
from itertools import product
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
import pandas as pd
import numpy as np
import warnings

warnings.filterwarnings("ignore")


In [113]:
df=pd.read_excel("C:\Proyecto\hhrr_dataset.xlsx")
X=df.drop(columns=["Attrition"])
y=df["Attrition"].map({"Yes":1,'No':0})

In [117]:

# Separar el dataset
X_trainval, X_test, y_trainval, y_test = train_test_split(
    X, y, test_size=0.10, stratify=y, random_state=42
)

# Columnas
num_cols = X.select_dtypes(exclude="object").columns.tolist()
cat_cols = X.select_dtypes(include="object").columns.tolist()

numeric_transformer = Pipeline([
    ("imputer", SimpleImputer(strategy="mean")),
    ("scaler", StandardScaler())
])

categorical_transformer = Pipeline([
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("encoder", OneHotEncoder(handle_unknown="ignore"))
])

preprocessor = ColumnTransformer([
    ("num", numeric_transformer, num_cols),
    ("cat", categorical_transformer, cat_cols)
])

# Modelos y parámetros
model_configs = {
    "SVM": (
        SVC(),
        {
            "C": [0.01, 0.1, 1, 10, 100],
            "kernel": ["linear", "rbf", "poly", "sigmoid"],
            "gamma": ["scale", "auto"],
            "degree": [2, 3, 4]
        } # combinaciones 120 
    ),
    "LogisticRegression": (
    LogisticRegression(max_iter=1000),
        {
            "C": [0.01, 0.1, 1, 10, 100],
            "penalty": ["l1", "l2"],
            "solver": ["liblinear", "saga"],  # evita usar 'sag'
            "class_weight": [None, "balanced"]
        }
    ),
    "DecisionTree": (
        DecisionTreeClassifier(),
        {
            "max_depth": list(range(3, 13)),        
            "min_samples_split": [2, 5, 10],        
            "criterion": ["gini", "entropy", "log_loss"], 
            "max_features": [None, "sqrt", "log2"]  # <- 'auto' eliminado
        }
    ), # combinaciones 270
    "RandomForest": (
        RandomForestClassifier(),
        {
            "n_estimators": [50, 100, 150, 200, 250, 300],
            "max_depth": [None, 10, 20, 30,40],
            "max_features": ["sqrt", "log2", None], 
            "bootstrap": [True, False],
            "min_samples_split":[2, 4, 6]
        }#  540 combinaciones 
    ),
    "KNN": (
    KNeighborsClassifier(),
        {
            "n_neighbors": list(range(1, 21)),        
            "weights": ["uniform", "distance"],       
            "p": [1, 2, 3, 4, 5] 
        } # 200 combinaciones 
    ),
    "GaussianNB": (
    GaussianNB(),
        {
            "var_smoothing": np.logspace(-9, -6, 5)
        }
    )  # solo 5 combinaciones, pero sirve para redondear
}

# Inner CV: 10 folds sobre trainval
inner_cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

# Para guardar los resultados
results = []

for model_name, (model_class, param_grid) in model_configs.items():
    print(f"\n🔁 Modelo: {model_name}")
    param_combinations = [dict(zip(param_grid, v)) for v in product(*param_grid.values())]

    for params in param_combinations:
        print(f"⚙️ Hiperparámetros: {params}")
        for fold_idx, (train_idx, val_idx) in enumerate(inner_cv.split(X_trainval, y_trainval), start=1):
            X_tr, X_val = X_trainval.iloc[train_idx], X_trainval.iloc[val_idx]
            y_tr, y_val = y_trainval.iloc[train_idx], y_trainval.iloc[val_idx]

            # Modelo con hiperparámetros actuales
            model = clone(model_class).set_params(**params)
            pipeline = Pipeline([
                ("preprocessor", preprocessor),
                ("classifier", model)
            ])

            pipeline.fit(X_tr, y_tr)

            # Validación
            y_val_pred = pipeline.predict(X_val)
            precision_val = precision_score(y_val, y_val_pred, zero_division=0)
            recall_val = recall_score(y_val, y_val_pred, zero_division=0)
            f1_val = f1_score(y_val, y_val_pred, zero_division=0)

            # Test
            y_test_pred = pipeline.predict(X_test)
            precision_test = precision_score(y_test, y_test_pred, zero_division=0)
            recall_test = recall_score(y_test, y_test_pred, zero_division=0)
            f1_test = f1_score(y_test, y_test_pred, zero_division=0)

            # Guardar en una sola fila
            results.append({
                "Modelo": model_name,
                "Fold": fold_idx,
                "Precision_Val": round(precision_val, 4),
                "Recall_Val": round(recall_val, 4),
                "F1_Val": round(f1_val, 4),
                "Precision_Test": round(precision_test, 4),
                "Recall_Test": round(recall_test, 4),
                "F1_Test": round(f1_test, 4),
                "Hiperparametros": params
            })

# Guardar resultados
df_combined = pd.DataFrame(results)
df_combined.to_csv("resultados_fold_val_test_unificado.csv", index=False)

print("\n📄 Muestra del archivo consolidado:")
print(df_combined.head())


🔁 Modelo: SVM
⚙️ Hiperparámetros: {'C': 0.01, 'kernel': 'linear', 'gamma': 'scale', 'degree': 2}
⚙️ Hiperparámetros: {'C': 0.01, 'kernel': 'linear', 'gamma': 'scale', 'degree': 3}
⚙️ Hiperparámetros: {'C': 0.01, 'kernel': 'linear', 'gamma': 'scale', 'degree': 4}
⚙️ Hiperparámetros: {'C': 0.01, 'kernel': 'linear', 'gamma': 'auto', 'degree': 2}
⚙️ Hiperparámetros: {'C': 0.01, 'kernel': 'linear', 'gamma': 'auto', 'degree': 3}
⚙️ Hiperparámetros: {'C': 0.01, 'kernel': 'linear', 'gamma': 'auto', 'degree': 4}
⚙️ Hiperparámetros: {'C': 0.01, 'kernel': 'rbf', 'gamma': 'scale', 'degree': 2}
⚙️ Hiperparámetros: {'C': 0.01, 'kernel': 'rbf', 'gamma': 'scale', 'degree': 3}
⚙️ Hiperparámetros: {'C': 0.01, 'kernel': 'rbf', 'gamma': 'scale', 'degree': 4}
⚙️ Hiperparámetros: {'C': 0.01, 'kernel': 'rbf', 'gamma': 'auto', 'degree': 2}
⚙️ Hiperparámetros: {'C': 0.01, 'kernel': 'rbf', 'gamma': 'auto', 'degree': 3}
⚙️ Hiperparámetros: {'C': 0.01, 'kernel': 'rbf', 'gamma': 'auto', 'degree': 4}
⚙️ Hiperparám