In [1]:
import numpy as np
from skimage.io import imread
from skimage.transform import resize
from skimage.feature import hog
from sklearn.model_selection import train_test_split
from sklearn import metrics
import os
from glob import glob

class GaussianNaiveBayes:
    def __init__(self, epsilon=1e-3):
        self.classes = None
        self.mean = None
        self.var = None
        self.priors = None
        self.epsilon = epsilon

    def fit(self, X, y):
        self.classes = np.unique(y)
        n_classes = len(self.classes)
        n_features = X.shape[1]
        self.mean = np.zeros((n_classes, n_features), dtype=np.float64)
        self.var = np.zeros((n_classes, n_features), dtype=np.float64)
        self.priors = np.zeros(n_classes, dtype=np.float64)

        for idx, c in enumerate(self.classes):
            X_c = X[y == c]
            self.mean[idx, :] = X_c.mean(axis=0)
            self.var[idx, :] = X_c.var(axis=0) + self.epsilon
            self.priors[idx] = X_c.shape[0] / float(X.shape[0])

    def predict(self, X):
        y_pred = [self._predict(x) for x in X]
        return np.array(y_pred)

    def _predict(self, x):
        posteriors = []
        for idx, c in enumerate(self.classes):
            prior = np.log(self.priors[idx])
            class_conditional = np.sum(np.log(self._pdf(idx, x)))
            posterior = prior + class_conditional
            posteriors.append(posterior)
        return self.classes[np.argmax(posteriors)]

    def _pdf(self, class_idx, x):
        mean = self.mean[class_idx]
        var = self.var[class_idx]
        numerator = np.exp(- (x - mean) ** 2 / (2 * var))
        denominator = np.sqrt(2 * np.pi * var)
        return numerator / denominator


def extract_hog_features(image_path):
    try:
        image = imread(image_path, as_gray=True)
        image_resized = resize(image, (128, 64))
        hog_features = hog(image_resized, pixels_per_cell=(8, 8), cells_per_block=(2, 2), block_norm='L2-Hys')
        return hog_features
    except Exception as e:
        print(f"No se pudo procesar la imagen {image_path}: {e}")
        return None

features = []
labels = []

# Diccionario para asociar carpetas con etiquetas
label_dict = {
    'glass_tiles': 0,
    'hairs': 1,
    'flowers': 2,
    'oil_painting': 3,
    'jelly_beans': 4,
    'paints': 5,
    'photo_and_face': 6,
    'pompoms': 7,
    'sponges': 8,
    'stuffed_toys': 9
}

# Procesar cada subcarpeta y extraer características
base_path = 'complete_ms_data'  # Cambiar a tu ruta base
for folder_name, label in label_dict.items():
    folder_path = os.path.join(base_path, folder_name)
    image_files = glob(os.path.join(folder_path, '*.png'))
    for image_path in image_files:
        feature = extract_hog_features(image_path)
        if feature is not None:
            features.append(feature)
            labels.append(label)

# Convertir listas a arrays de numpy
features = np.array(features)
labels = np.array(labels)

# Dividir el conjunto de datos en entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.25, stratify=labels)

# Crear y entrenar el clasificador gaussiano ingenuo manualmente
gnb_manual = GaussianNaiveBayes()
gnb_manual.fit(X_train, y_train)

# Realizar predicciones en el conjunto de prueba con el clasificador manual
y_pred_manual = gnb_manual.predict(X_test)

# Calcular el rendimiento del clasificador manual
accuracy_manual = metrics.accuracy_score(y_test, y_pred_manual)
print(f'Accuracy (Manual): {accuracy_manual}')

# Generar y mostrar la matriz de confusión para el clasificador manual
conf_matrix_manual = metrics.confusion_matrix(y_test, y_pred_manual)
print(f'Confusion Matrix (Manual):\n{conf_matrix_manual}')



Accuracy (Manual): 1.0
Confusion Matrix (Manual):
[[8 0 0 0 0 0 0 0 0 0]
 [0 8 0 0 0 0 0 0 0 0]
 [0 0 7 0 0 0 0 0 0 0]
 [0 0 0 8 0 0 0 0 0 0]
 [0 0 0 0 8 0 0 0 0 0]
 [0 0 0 0 0 7 0 0 0 0]
 [0 0 0 0 0 0 8 0 0 0]
 [0 0 0 0 0 0 0 8 0 0]
 [0 0 0 0 0 0 0 0 8 0]
 [0 0 0 0 0 0 0 0 0 8]]


In [2]:
import numpy as np
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

class GaussianNaiveBayesImproved:
    def __init__(self, epsilon=1e-10):
        self.classes = None
        self.mean = None
        self.std = None
        self.priors = None
        self.epsilon = epsilon

    def fit(self, X, y):
        self.classes = np.unique(y)
        n_classes = len(self.classes)
        n_features = X.shape[1]
        self.mean = np.zeros((n_classes, n_features), dtype=np.float64)
        self.std = np.zeros((n_classes, n_features), dtype=np.float64)
        self.priors = np.zeros(n_classes, dtype=np.float64)

        for idx, c in enumerate(self.classes):
            X_c = X[y == c]
            self.mean[idx, :] = X_c.mean(axis=0)
            self.std[idx, :] = X_c.std(axis=0) + self.epsilon
            self.priors[idx] = X_c.shape[0] / float(X.shape[0])

    def _pdf(self, class_idx, x):
        mean = self.mean[class_idx]
        std = self.std[class_idx]
        numerator = np.exp(- (x - mean) ** 2 / (2 * std ** 2))
        denominator = np.sqrt(2 * np.pi * std ** 2)
        return numerator / denominator

    def _predict(self, x):
        posteriors = []
        for idx, c in enumerate(self.classes):
            prior = np.log(self.priors[idx])
            class_conditional = np.sum(np.log(self._pdf(idx, x)))
            posterior = prior + class_conditional
            posteriors.append(posterior)
        return self.classes[np.argmax(posteriors)]

    def predict(self, X):
        return np.array([self._predict(x) for x in X])

# Uso del clasificador mejorado
gnb_improved = GaussianNaiveBayesImproved()
gnb_improved.fit(X_train, y_train)
y_pred_improved = gnb_improved.predict(X_test)

# Evaluación mejorada
accuracy_improved = accuracy_score(y_test, y_pred_improved)
conf_matrix_improved = confusion_matrix(y_test, y_pred_improved)
classification_report_improved = classification_report(y_test, y_pred_improved)

print(f'Accuracy (Improved): {accuracy_improved}')
print(f'Confusion Matrix (Improved):\n{conf_matrix_improved}')
print(f'Classification Report (Improved):\n{classification_report_improved}')

Accuracy (Improved): 0.8589743589743589
Confusion Matrix (Improved):
[[8 0 0 0 0 0 0 0 0 0]
 [2 6 0 0 0 0 0 0 0 0]
 [1 0 6 0 0 0 0 0 0 0]
 [0 0 0 8 0 0 0 0 0 0]
 [0 0 0 0 8 0 0 0 0 0]
 [2 0 0 0 0 5 0 0 0 0]
 [2 0 0 0 0 0 6 0 0 0]
 [0 0 0 0 0 0 0 8 0 0]
 [0 0 0 0 0 0 0 0 8 0]
 [4 0 0 0 0 0 0 0 0 4]]
Classification Report (Improved):
              precision    recall  f1-score   support

           0       0.42      1.00      0.59         8
           1       1.00      0.75      0.86         8
           2       1.00      0.86      0.92         7
           3       1.00      1.00      1.00         8
           4       1.00      1.00      1.00         8
           5       1.00      0.71      0.83         7
           6       1.00      0.75      0.86         8
           7       1.00      1.00      1.00         8
           8       1.00      1.00      1.00         8
           9       1.00      0.50      0.67         8

    accuracy                           0.86        78
   macro avg    

  class_conditional = np.sum(np.log(self._pdf(idx, x)))
