In [17]:
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from skimage.feature import local_binary_pattern
from skimage import io, color
from math import sqrt, pi, exp

class GaussianNaiveBayes:
    def fit(self, X, y):
        self.classes = np.unique(y)
        self.mean = {}
        self.var = {}

        for c in self.classes:
            X_c = X[y == c]
            self.mean[c] = np.mean(X_c, axis=0)
            self.var[c] = np.var(X_c, axis=0)

    def predict(self, X):
        predictions = [self._predict(x) for x in X]
        return np.array(predictions)

    def _predict(self, x):
        posteriors = []

        for c in self.classes:
            prior = 1.0 / len(self.classes)  # P(c) = 1/n_classes
            class_conditional = np.sum(np.log(self._pdf(c, x)))
            posterior = np.log(prior) + class_conditional
            posteriors.append(posterior)

        return self.classes[np.argmax(posteriors)]

    def _pdf(self, class_label, x):
        mean = self.mean[class_label]
        var = self.var[class_label]
        numerator = np.exp(-(x - mean) ** 2 / (2 * var))
        denominator = np.sqrt(2 * pi * var)
        return numerator / denominator

def extract_features(image_path):
    image = io.imread(image_path)

    # Convertir a escala de grises 
    if len(image.shape) == 2:
        image = color.gray2rgb(image)

    gray_image = color.rgb2gray(image)
    lbp_image = local_binary_pattern(gray_image, P=8, R=1, method='uniform')
    hist, _ = np.histogram(lbp_image.ravel(), bins=np.arange(0, 10), range=[0, 9])
    return hist

def load_dataset(root_dir):
    X, y = [], []

    for class_label, class_name in enumerate(os.listdir(root_dir)):
        class_dir = os.path.join(root_dir, class_name)
        
        for file_name in os.listdir(class_dir):
            file_path = os.path.join(class_dir, file_name)
            features = extract_features(file_path)
            X.append(features)
            y.append(class_label)

    return np.array(X), np.array(y)

# Cargar datos
root_directory = "datos/"
X, y = load_dataset(root_directory)

# Dividir en conjunto de entrenamiento y prueba 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)

# Crear y entrenar el modelo
model = GaussianNaiveBayes()
model.fit(X_train, y_train)

# Realizar predicciones 
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

# Calcular la precisión 
accuracy_train = accuracy_score(y_train, y_train_pred)
accuracy_test = accuracy_score(y_test, y_test_pred)

print(f'Precisión en conjunto de entrenamiento: {accuracy_train:.2f}')
print(f'Precisión en conjunto de prueba: {accuracy_test:.2f}')

# Generar matriz de confusión 
conf_matrix = confusion_matrix(y_test, y_test_pred)
print('Matriz de confusión en conjunto de prueba:')
print(conf_matrix)




Precisión en conjunto de entrenamiento: 0.88
Precisión en conjunto de prueba: 0.85
Matriz de confusión en conjunto de prueba:
[[5 0 0 0 0 0 2 0 0 1]
 [0 8 0 0 0 0 0 0 0 0]
 [0 0 7 0 1 0 0 0 0 0]
 [0 0 0 8 0 0 0 0 0 0]
 [0 0 1 0 7 0 0 0 0 0]
 [0 0 0 0 0 8 0 0 0 0]
 [5 0 0 0 0 0 3 0 0 0]
 [0 0 0 0 0 0 0 8 0 0]
 [0 0 0 0 1 0 0 0 7 0]
 [0 0 0 0 0 0 1 0 0 7]]
