In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, KFold, LeaveOneOut
from sklearn.datasets import load_iris

# Cargar el dataset Iris
iris = load_iris(as_frame=True)
X_iris = iris.data
y_iris = iris.target

# Cargar el dataset Wine Quality desde UCI
wine = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv', sep=';')
X_wine = wine.drop('quality', axis=1)  # Todas las características menos la columna 'quality'
y_wine = wine['quality']  # Columna objetivo

# Función para Hold Out Validation
def hold_out_validation(X, y, r):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=r, random_state=42)
    return X_train, X_test, y_train, y_test

# Función para K-Fold Cross Validation
def k_fold_validation(X, y, K):
    kf = KFold(n_splits=K, shuffle=True, random_state=42)
    for train_index, test_index in kf.split(X):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
        yield X_train, X_test, y_train, y_test

# Función para Leave-One-Out Cross Validation
def leave_one_out_validation(X, y):
    loo = LeaveOneOut()
    for train_index, test_index in loo.split(X):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
        yield X_train, X_test, y_train, y_test

# Uso con el dataset Iris (Hold Out)
X_train, X_test, y_train, y_test = hold_out_validation(X_iris, y_iris, 0.3)
print("Hold Out - Iris dataset:")
print(f"Training set size: {len(X_train)}")
print(f"Test set size: {len(X_test)}\n")

# Uso con el dataset Wine Quality (Hold Out)
X_train, X_test, y_train, y_test = hold_out_validation(X_wine, y_wine, 0.3)
print("Hold Out - Wine Quality dataset:")
print(f"Training set size: {len(X_train)}")
print(f"Test set size: {len(X_test)}\n")

# Uso con el dataset Wine Quality (K-Fold)
print("K-Fold - Wine Quality dataset:")
for i, (X_train, X_test, y_train, y_test) in enumerate(k_fold_validation(X_wine, y_wine, 5), 1):
    print(f"Fold {i}: Training set size: {len(X_train)}, Test set size: {len(X_test)}")

# Uso con Leave-One-Out (Iris dataset)
print("\nLeave-One-Out - Iris dataset (first 5 iterations):")
for i, (X_train, X_test, y_train, y_test) in enumerate(leave_one_out_validation(X_iris, y_iris)):
    if i >= 5:  # Mostrar solo las primeras 5 iteraciones para no saturar la salida
        break
    print(f"Iteration {i + 1}: Training set size: {len(X_train)}, Test set size: {len(X_test)}")


Hold Out - Iris dataset:
Training set size: 105
Test set size: 45

Hold Out - Wine Quality dataset:
Training set size: 1119
Test set size: 480

K-Fold - Wine Quality dataset:
Fold 1: Training set size: 1279, Test set size: 320
Fold 2: Training set size: 1279, Test set size: 320
Fold 3: Training set size: 1279, Test set size: 320
Fold 4: Training set size: 1279, Test set size: 320
Fold 5: Training set size: 1280, Test set size: 319

Leave-One-Out - Iris dataset (first 5 iterations):
Iteration 1: Training set size: 149, Test set size: 1
Iteration 2: Training set size: 149, Test set size: 1
Iteration 3: Training set size: 149, Test set size: 1
Iteration 4: Training set size: 149, Test set size: 1
Iteration 5: Training set size: 149, Test set size: 1
