# Métodos de Validación

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, KFold, LeaveOneOut
from sklearn.preprocessing import StandardScaler

def hold_out(X, y, r=0.2):  # Valor por defecto para r
    """Hold-out validation."""
    if not 0 < r < 1:
        raise ValueError("r debe estar entre 0 y 1")
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=r, random_state=42)
    return X_train, X_test, y_train, y_test

def k_fold_cross_validation(X, y, k=5):  # Valor por defecto para k
    """K-fold cross-validation."""
    if not isinstance(k, int) or k <= 1:
        raise ValueError("k debe ser un entero mayor que 1")
    kf = KFold(n_splits=k, shuffle=True, random_state=42)
    return kf

def leave_one_out(X, y):
    """Leave-one-out cross-validation."""
    loo = LeaveOneOut()
    return loo

# ---  CARGA Y PREPROCESAMIENTO DE DATOS ---
def load_and_preprocess(dataset_name):
    """Carga y preprocesa el dataset especificado."""
    try:
        if dataset_name == "credit":
            data = pd.read_csv("Dataset\\default_credit_card_clients.csv")
            data = data.drop("ID", axis=1)  # Eliminar ID
            X = data.drop("default payment next month", axis=1)
            y = data["default payment next month"]
            # Escalar datos numéricos
            numerical_cols = X.select_dtypes(include=np.number).columns
            scaler = StandardScaler()
            X[numerical_cols] = scaler.fit_transform(X[numerical_cols])

        elif dataset_name == "bank":
            data = pd.read_csv("Dataset\\bank_full.csv", delimiter=";")
            X = data.drop("y", axis=1)
            y = (data["y"] == "yes").astype(int) # Convertir y a numérica
            # One-hot encode variables categóricas
            X = pd.get_dummies(X) 
            # Escalar datos numéricos (después de one-hot encoding)
            numerical_cols = X.select_dtypes(include=np.number).columns
            scaler = StandardScaler()
            X[numerical_cols] = scaler.fit_transform(X[numerical_cols])
        else:
            raise ValueError("Nombre de dataset inválido. Debe ser 'credit' o 'bank'.")
        return X, y
    except FileNotFoundError:
        print("Error: No se encontró el archivo CSV.")
        return None, None
    except Exception as e:
        print(f"Un error ocurrió: {e}")
        return None, None

# --- EJEMPLO DE USO ---

X_credit, y_credit = load_and_preprocess("credit")
X_bank, y_bank = load_and_preprocess("bank")

if X_credit is not None and y_credit is not None:
    print("\n--- Resultados para default_credit_card_clients.csv ---")
    X_train_ho, X_test_ho, y_train_ho, y_test_ho = hold_out(X_credit, y_credit, r=0.3)
    print("Hold-out: Tamaños de los conjuntos:", len(X_train_ho), len(X_test_ho))

    kf = k_fold_cross_validation(X_credit, y_credit, k=5)
    print("K-fold:")
    for i, (train_index, test_index) in enumerate(kf.split(X_credit)):
        print(f"  Fold {i+1}:", len(train_index), len(test_index))

    loo = leave_one_out(X_credit, y_credit)
    print("Leave-One-Out: Número de splits:", loo.get_n_splits(X_credit))


if X_bank is not None and y_bank is not None:
    print("\n--- Resultados para bank-full.csv ---")
    X_train_ho, X_test_ho, y_train_ho, y_test_ho = hold_out(X_bank, y_bank, r=0.3)
    print("Hold-out: Tamaños de los conjuntos:", len(X_train_ho), len(X_test_ho))

    kf = k_fold_cross_validation(X_bank, y_bank, k=5)
    print("K-fold:")
    for i, (train_index, test_index) in enumerate(kf.split(X_bank)):
        print(f"  Fold {i+1}:", len(train_index), len(test_index))

    loo = leave_one_out(X_bank, y_bank)
    print("Leave-One-Out: Número de splits:", loo.get_n_splits(X_bank))


--- Resultados para default_credit_card_clients.csv ---
Hold-out: Tamaños de los conjuntos: 21000 9000
K-fold:
  Fold 1: 24000 6000
  Fold 2: 24000 6000
  Fold 3: 24000 6000
  Fold 4: 24000 6000
  Fold 5: 24000 6000
Leave-One-Out: Número de splits: 30000

--- Resultados para bank-full.csv ---
Hold-out: Tamaños de los conjuntos: 31647 13564
K-fold:
  Fold 1: 36168 9043
  Fold 2: 36169 9042
  Fold 3: 36169 9042
  Fold 4: 36169 9042
  Fold 5: 36169 9042
Leave-One-Out: Número de splits: 45211
