# Clasificador Naive Bayes

In [1]:
# Importar las librerias
import numpy as np
import pandas as pd
from sklearn.naive_bayes import GaussianNB # Libreria para el clasificador Naive Bayes
from sklearn.metrics import accuracy_score, confusion_matrix

In [2]:
# Funcion de validacion Hold-Out
def hold_out_validation(data, test_size=0.3):
    X = data.iloc[:, :-1].values
    y = data.iloc[:, -1].values
    
    n = len(X)
    test_size = int(n * test_size)
    indices = list(range(n))
    np.random.shuffle(indices)

    train_indices = indices[:n - test_size]
    test_indices = indices[n - test_size:]
    X_train, y_train = X[train_indices], y[train_indices]
    X_test, y_test = X[test_indices], y[test_indices]

    # Prediccion con Naive Bayes
    model = GaussianNB()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    return y_test, y_pred

In [3]:
# Funcion de validacion K fold cross validation
def k_fold_validation(data, k=10):
    X = data.iloc[:, :-1].values
    y = data.iloc[:, -1].values
    folds = np.array_split(data, k)

    y_tests = []
    y_preds = []
    
    for i in range(k):
        test_data = folds[i]
        train_data = pd.concat([folds[j] for j in range(k) if j != i])

        X_train = train_data.iloc[:, :-1].values
        y_train = train_data.iloc[:, -1].values
        X_test = test_data.iloc[:, :-1].values
        y_test = test_data.iloc[:, -1].values

        # Prediccion con Naive Bayes
        model = GaussianNB()
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        
        y_tests.extend(y_test)
        y_preds.extend(y_pred)
    
    return y_tests, y_preds

In [4]:
def split_data(data, iteracion):
    indices = data.index.tolist()  # Lista de índices de las filas en el conjunto de datos
    test_data = data.iloc[[iteracion]]  # Seleccionar una fila (el patrón para prueba)
    train_data = data.drop(iteracion)  # Todas las filas excepto la de test (para entrenamiento)
    return train_data, test_data

# Funcion de validacion leave one out
def leave_one_out_validation(data):
    y_tests = []
    y_preds = []

    for i in range(len(data)):
        # Separar un punto para prueba y el resto para entrenamiento
        train_data, test_data = split_data(data, i)
        X_train = train_data.iloc[:, :-1].values
        y_train = train_data.iloc[:, -1].values
        X_test = test_data.iloc[:, :-1].values
        y_test = test_data.iloc[:, -1].values

        # Prediccion con Naive Bayes
        model = GaussianNB()
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        
        y_tests.extend(y_test)
        y_preds.extend(y_pred)
    
    return y_tests, y_preds

In [5]:
# Funcion para evaluar el desempeño
def evaluate_performance(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    conf_matrix = confusion_matrix(y_true, y_pred)
    
    print("Accuracy:", accuracy)
    print("Matriz de Confusión:\n", conf_matrix)

In [8]:
# Cargar y validar cada dataset
datasets = ['raisin.csv', 'glass.csv', 'iris.csv']

for dataset in datasets:
    data = pd.read_csv(dataset)
    print("\n", "*"*50)
    print(f"--- Validación en {dataset} ---")

    # Hold-Out 70/30
    y_test, y_pred = hold_out_validation(data)
    print("\nHold-Out (70/30 Estratificado):")
    evaluate_performance(y_test, y_pred)

    # 10-Fold Cross-Validation
    y_test, y_pred = k_fold_validation(data)
    print("\n10-Fold Cross-Validation Estratificado:")
    evaluate_performance(y_test, y_pred)

    # Leave-One-Out
    y_test, y_pred = leave_one_out_validation(data)
    print("\nLeave-One-Out:")
    evaluate_performance(y_test, y_pred)    


 **************************************************
--- Validación en raisin.csv ---

Hold-Out (70/30 Estratificado):
Accuracy: 0.8148148148148148
Matriz de Confusión:
 [[102  42]
 [  8 118]]

10-Fold Cross-Validation Estratificado:
Accuracy: 0.8177777777777778
Matriz de Confusión:
 [[322 128]
 [ 36 414]]


  return bound(*args, **kwds)



Leave-One-Out:
Accuracy: 0.8233333333333334
Matriz de Confusión:
 [[327 123]
 [ 36 414]]

 **************************************************
--- Validación en glass.csv ---

Hold-Out (70/30 Estratificado):
Accuracy: 0.8253968253968254
Matriz de Confusión:
 [[26  2  0  0  0  0]
 [ 7 14  0  0  0  0]
 [ 0  0  3  0  0  0]
 [ 0  1  0  1  0  0]
 [ 0  0  0  0  1  1]
 [ 0  0  0  0  0  7]]

10-Fold Cross-Validation Estratificado:
Accuracy: 0.6807511737089202
Matriz de Confusión:
 [[64  5  0  0  0  0]
 [17 57  1  0  0  1]
 [ 0 10  7  0  0  0]
 [ 0  9  0  3  0  1]
 [ 0  5  0  1  0  3]
 [ 0 15  0  0  0 14]]


  return bound(*args, **kwds)



Leave-One-Out:
Accuracy: 0.812206572769953
Matriz de Confusión:
 [[67  2  0  0  0  0]
 [22 47  6  0  0  1]
 [ 0  2 15  0  0  0]
 [ 0  4  0  8  0  1]
 [ 0  0  0  0  8  1]
 [ 0  1  0  0  0 28]]

 **************************************************
--- Validación en iris.csv ---

Hold-Out (70/30 Estratificado):
Accuracy: 1.0
Matriz de Confusión:
 [[19  0  0]
 [ 0 12  0]
 [ 0  0 13]]

10-Fold Cross-Validation Estratificado:
Accuracy: 0.9463087248322147
Matriz de Confusión:
 [[49  0  0]
 [ 0 46  4]
 [ 0  4 46]]


  return bound(*args, **kwds)



Leave-One-Out:
Accuracy: 0.9530201342281879
Matriz de Confusión:
 [[49  0  0]
 [ 0 47  3]
 [ 0  4 46]]
