In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold, LeaveOneOut
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.neighbors import KNeighborsClassifier
from sklearn.datasets import load_iris, load_wine, load_breast_cancer


In [2]:
# Cargar los datasets
datasets = {
    "Iris": load_iris(),
    "Wine": load_wine(),
    "Breast Cancer": load_breast_cancer()
}


In [3]:
# Hold-Out 70/30 Estratificado
def hold_out_validation(data, k):
    X, y = data.data, data.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)

    # Clasificador KNN
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train, y_train)
    
    # Predicción y métricas
    y_pred = knn.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)
    
    print("Accuracy:", acc)
    print("Matriz de Confusión:\n", cm)


In [4]:
# 10-Fold Cross-Validation Estratificado
def cross_validation(data, k):
    X, y = data.data, data.target
    skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

    # Clasificador KNN
    knn = KNeighborsClassifier(n_neighbors=k)
    scores = cross_val_score(knn, X, y, cv=skf)

    print("Accuracy promedio en 10-Fold:", np.mean(scores))


In [5]:
# Leave-One-Out (LOO)
def leave_one_out(data, k):
    X, y = data.data, data.target
    loo = LeaveOneOut()

    # Clasificador KNN
    knn = KNeighborsClassifier(n_neighbors=k)
    scores = cross_val_score(knn, X, y, cv=loo)

    print("Accuracy promedio en LOO:", np.mean(scores))


In [6]:
# Elección del mejor valor de K
def find_best_k(data, method="cross_val"):
    X, y = data.data, data.target
    best_k, best_score = 0, 0
    
    for k in range(1, 21):
        if method == "cross_val":
            skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
            knn = KNeighborsClassifier(n_neighbors=k)
            score = np.mean(cross_val_score(knn, X, y, cv=skf))
        elif method == "hold_out":
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)
            knn = KNeighborsClassifier(n_neighbors=k)
            knn.fit(X_train, y_train)
            score = knn.score(X_test, y_test)

        if score > best_score:
            best_k, best_score = k, score
    
    print(f"Mejor valor de K: {best_k} con Accuracy: {best_score}")
    return best_k


In [7]:
for name, data in datasets.items():
    print(f"\n--- Dataset: {name} ---")
    
    # Buscar mejor K usando Cross-Validation
    print("\nBuscando mejor K con 10-Fold Cross Validation:")
    best_k = find_best_k(data, method="cross_val")
    
    # Hold-Out Validation
    print("\nValidación Hold-Out 70/30 Estratificado:")
    hold_out_validation(data, best_k)
    
    # 10-Fold Cross-Validation
    print("\nValidación 10-Fold Cross-Validation Estratificado:")
    cross_validation(data, best_k)
    
    # Leave-One-Out
    print("\nValidación Leave-One-Out:")
    leave_one_out(data, best_k)



--- Dataset: Iris ---

Buscando mejor K con 10-Fold Cross Validation:
Mejor valor de K: 17 con Accuracy: 0.9800000000000001

Validación Hold-Out 70/30 Estratificado:
Accuracy: 0.9555555555555556
Matriz de Confusión:
 [[15  0  0]
 [ 0 14  1]
 [ 0  1 14]]

Validación 10-Fold Cross-Validation Estratificado:
Accuracy promedio en 10-Fold: 0.9800000000000001

Validación Leave-One-Out:
Accuracy promedio en LOO: 0.9733333333333334

--- Dataset: Wine ---

Buscando mejor K con 10-Fold Cross Validation:
Mejor valor de K: 1 con Accuracy: 0.7300653594771241

Validación Hold-Out 70/30 Estratificado:
Accuracy: 0.7037037037037037
Matriz de Confusión:
 [[14  3  1]
 [ 1 15  5]
 [ 1  5  9]]

Validación 10-Fold Cross-Validation Estratificado:
Accuracy promedio en 10-Fold: 0.7300653594771241

Validación Leave-One-Out:
Accuracy promedio en LOO: 0.7696629213483146

--- Dataset: Breast Cancer ---

Buscando mejor K con 10-Fold Cross Validation:


found 0 physical cores < 1
  File "C:\Users\cap2268\AppData\Roaming\Python\Python312\site-packages\joblib\externals\loky\backend\context.py", line 282, in _count_physical_cores
    raise ValueError(f"found {cpu_count_physical} physical cores < 1")


Mejor valor de K: 12 con Accuracy: 0.9385025062656641

Validación Hold-Out 70/30 Estratificado:
Accuracy: 0.935672514619883
Matriz de Confusión:
 [[ 57   7]
 [  4 103]]

Validación 10-Fold Cross-Validation Estratificado:
Accuracy promedio en 10-Fold: 0.9385025062656641

Validación Leave-One-Out:
Accuracy promedio en LOO: 0.9367311072056239
