# Parte I. 

Programa y valida el Clasificador Naïve Bayes, valídalo con 3 datasets (Iris, Wine y Digits)  y los siguientes métodos de validación. 

Hold-Out 70/30 estratificado
10-Fold Cross-Validation estratificado
Leave-One-Out.




Pasos a realizar en la practica:

Implementar el clasificador Naive Bayes.

Validar el modelo en cada dataset con:
Hold-Out (70/30 estratificado).
10-Fold Cross-Validation estratificado.
Leave-One-Out (LOO).

Analizar los resultados y justificar el valor más adecuado de k (si aplica).

In [1]:
# Importar librerías necesarias
from sklearn.datasets import load_iris, load_wine, load_digits
from sklearn.model_selection import train_test_split, StratifiedKFold, LeaveOneOut
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.naive_bayes import GaussianNB
import numpy as np

# Función para evaluar Naive Bayes
def evaluate_naive_bayes(X, y, dataset_name):
    print(f"\nResultados para el Dataset: {dataset_name}")
    results = {}

    # Clasificador Naive Bayes
    nb = GaussianNB()

    # Hold-Out 70/30 estratificado
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)
    nb.fit(X_train, y_train)
    y_pred = nb.predict(X_test)
    acc_holdout = accuracy_score(y_test, y_pred)
    cm_holdout = confusion_matrix(y_test, y_pred)
    print(f"Hold-Out Accuracy: {acc_holdout:.4f}")
    print(f"Hold-Out Confusion Matrix:\n{cm_holdout}")

    # 10-Fold Cross-Validation estratificado
    skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
    cv_scores = []
    for train_idx, test_idx in skf.split(X, y):
        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]
        nb.fit(X_train, y_train)
        y_pred = nb.predict(X_test)
        cv_scores.append(accuracy_score(y_test, y_pred))
    acc_cv = np.mean(cv_scores)
    print(f"10-Fold CV Accuracy: {acc_cv:.4f}")

    # Leave-One-Out
    loo = LeaveOneOut()
    loo_scores = []
    for train_idx, test_idx in loo.split(X):
        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]
        nb.fit(X_train, y_train)
        y_pred = nb.predict(X_test)
        loo_scores.append(accuracy_score(y_test, y_pred))
    acc_loo = np.mean(loo_scores)
    print(f"Leave-One-Out Accuracy: {acc_loo:.4f}")

    # Guardar resultados
    results['Hold-Out'] = {"Accuracy": acc_holdout, "Confusion Matrix": cm_holdout}
    results['10-Fold CV'] = {"Accuracy": acc_cv}
    results['Leave-One-Out'] = {"Accuracy": acc_loo}
    return results

# Cargar datasets y evaluar Naive Bayes
datasets = {
    "Iris": load_iris(),
    "Wine": load_wine(),
    "Digits": load_digits()
}

all_results = {}
for name, data in datasets.items():
    X, y = data.data, data.target
    results = evaluate_naive_bayes(X, y, name)
    all_results[name] = results

# Mostrar resumen de resultados
print("\nResumen de Resultados:")
for dataset_name, dataset_results in all_results.items():
    print(f"\nDataset: {dataset_name}")
    for method, metrics in dataset_results.items():
        print(f"{method} -> Accuracy: {metrics['Accuracy']:.4f}")
        if method == 'Hold-Out':
            print(f"Confusion Matrix:\n{metrics['Confusion Matrix']}")



Resultados para el Dataset: Iris
Hold-Out Accuracy: 0.9111
Hold-Out Confusion Matrix:
[[15  0  0]
 [ 0 14  1]
 [ 0  3 12]]
10-Fold CV Accuracy: 0.9533
Leave-One-Out Accuracy: 0.9533

Resultados para el Dataset: Wine
Hold-Out Accuracy: 1.0000
Hold-Out Confusion Matrix:
[[18  0  0]
 [ 0 21  0]
 [ 0  0 15]]
10-Fold CV Accuracy: 0.9778
Leave-One-Out Accuracy: 0.9775

Resultados para el Dataset: Digits
Hold-Out Accuracy: 0.8222
Hold-Out Confusion Matrix:
[[49  1  0  0  3  1  0  0  0  0]
 [ 0 46  2  0  0  0  1  0  6  0]
 [ 0  7 34  0  1  0  0  0 11  0]
 [ 0  2  1 37  0  1  0  2 11  1]
 [ 0  1  0  0 45  0  1  5  2  0]
 [ 0  1  0  0  0 50  0  2  0  2]
 [ 0  1  0  0  0  0 53  0  0  0]
 [ 0  0  0  0  0  1  0 53  0  0]
 [ 0 11  1  0  0  1  0  1 38  0]
 [ 0  4  1  0  0  0  0  6  4 39]]
10-Fold CV Accuracy: 0.8425
Leave-One-Out Accuracy: 0.8408

Resumen de Resultados:

Dataset: Iris
Hold-Out -> Accuracy: 0.9111
Confusion Matrix:
[[15  0  0]
 [ 0 14  1]
 [ 0  3 12]]
10-Fold CV -> Accuracy: 0.9533
L