In [1]:
import warnings
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold, LeaveOneOut
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, confusion_matrix

# Eliminar las advertencias.
warnings.simplefilter("ignore", category=UserWarning)

In [2]:
# Función para validación Hold-Out 70/30 estratificado
def hold_out_validation(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)
    model = GaussianNB()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)
    return accuracy, conf_matrix

In [3]:
# Función para validación 10-Fold Cross-Validation estratificado
def k_fold_validation(X, y):
    skf = StratifiedKFold(n_splits=10)
    accuracies = []
    conf_matrices = []
    for train_index, test_index in skf.split(X, y):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        model = GaussianNB()
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        accuracies.append(accuracy_score(y_test, y_pred))
        conf_matrices.append(confusion_matrix(y_test, y_pred))
    avg_accuracy = np.mean(accuracies)
    avg_conf_matrix = np.mean(conf_matrices, axis=0)
    return avg_accuracy, avg_conf_matrix

In [4]:
# Función para validación Leave-One-Out Cross-Validation
def loo_validation(X, y):
    loo = LeaveOneOut()
    accuracies = []
    conf_matrices = []
    for train_index, test_index in loo.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        model = GaussianNB()
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        accuracies.append(accuracy_score(y_test, y_pred))
        conf_matrices.append(confusion_matrix(y_test, y_pred))
    avg_accuracy = np.mean(accuracies)
    avg_conf_matrix = np.mean(conf_matrices, axis=0)
    return avg_accuracy, avg_conf_matrix

In [5]:
# Cargar datasets
datasets = ['bezdekiris.csv', 'breast-cancer-wisconsin-modificado.csv', 'winequality-red-modificado-weka-ajustado.csv']

In [6]:
for dataset in datasets:
    data = pd.read_csv(dataset)
    X = data.iloc[:, :-1].values
    y = data.iloc[:, -1].values
    
    print(f"\n***\tEvaluando dataset: {dataset} \t***")
    
    # Hold-Out 70/30 estratificado
    hold_out_acc, hold_out_conf_matrix = hold_out_validation(X, y)
    print(f"\n* Hold-Out 70/30 Estratificado - Precisión: {hold_out_acc}")
    print(f"\n* Hold-Out 70/30 Estratificado - Matriz de Confusión:\n{hold_out_conf_matrix}")
    
    # 10-Fold Cross-Validation estratificado
    k_fold_acc, k_fold_conf_matrix = k_fold_validation(X, y)
    print(f"\n* 10-Fold Cross-Validation Estratificado - Precisión: {k_fold_acc}")
    print(f"\n* 10-Fold Cross-Validation Estratificado - Matriz de Confusión:\n{k_fold_conf_matrix}")
    
    # Leave-One-Out Cross-Validation
    loo_acc, loo_conf_matrix = loo_validation(X, y)
    print(f"\n* Leave-One-Out Cross-Validation - Precisión: {loo_acc}")
    print(f"\n* Leave-One-Out Cross-Validation - Matriz de Confusión:\n{loo_conf_matrix}")


***	Evaluando dataset: bezdekiris.csv 	***

* Hold-Out 70/30 Estratificado - Precisión: 0.9111111111111111

* Hold-Out 70/30 Estratificado - Matriz de Confusión:
[[15  0  0]
 [ 0 14  1]
 [ 0  3 12]]

* 10-Fold Cross-Validation Estratificado - Precisión: 0.9533333333333334

* 10-Fold Cross-Validation Estratificado - Matriz de Confusión:
[[5.  0.  0. ]
 [0.  4.7 0.3]
 [0.  0.4 4.6]]

* Leave-One-Out Cross-Validation - Precisión: 0.9533333333333334

* Leave-One-Out Cross-Validation - Matriz de Confusión:
[[0 0]
 [0 0]]

***	Evaluando dataset: breast-cancer-wisconsin-modificado.csv 	***

* Hold-Out 70/30 Estratificado - Precisión: 0.8523809523809524

* Hold-Out 70/30 Estratificado - Matriz de Confusión:
[[136   2]
 [ 29  43]]

* 10-Fold Cross-Validation Estratificado - Precisión: 0.8525051759834369

* 10-Fold Cross-Validation Estratificado - Matriz de Confusión:
[[44.   1.8]
 [ 8.5 15.6]]

* Leave-One-Out Cross-Validation - Precisión: 0.8741058655221745

* Leave-One-Out Cross-Validation -