In [1]:
import numpy as np
from collections import defaultdict

In [2]:
class NaiveBayesClassifier:
    def __init__(self):
        self.class_priors = defaultdict(float)
        self.feature_probs = defaultdict(lambda: defaultdict(float))
        self.classes = None

    def fit(self, X, y):
        """
        Train the Naive Bayes classifier.
        :param X: List of feature vectors (list of lists).
        :param y: List of labels (classes).
        """
        self.classes = np.unique(y)
        total_samples = len(y)

        # Calculate class priors P(Class)
        for c in self.classes:
            self.class_priors[c] = np.sum(y == c) / total_samples

        # Calculate conditional probabilities P(Feature | Class)
        for c in self.classes:
            class_samples = X[y == c]
            total_class_samples = len(class_samples)
            for feature_index in range(X.shape[1]):
                feature_values, counts = np.unique(class_samples[:, feature_index], return_counts=True)
                for value, count in zip(feature_values, counts):
                    self.feature_probs[c][(feature_index, value)] = count / total_class_samples

    def predict(self, X):
        """
        Predict the class for each sample in X.
        :param X: List of feature vectors (list of lists).
        :return: List of predicted classes.
        """
        predictions = []
        for sample in X:
            class_probs = {}
            for c in self.classes:
                # Start with the prior probability
                class_prob = np.log(self.class_priors[c])
                for feature_index, value in enumerate(sample):
                    # Add the conditional probability if it exists, otherwise use a small smoothing value
                    class_prob += np.log(
                        self.feature_probs[c].get((feature_index, value), 1e-6)
                    )
                class_probs[c] = class_prob
            # Assign the class with the highest probability
            predictions.append(max(class_probs, key=class_probs.get))
        return np.array(predictions)

In [3]:
# Example usage
if __name__ == "__main__":
    # Sample dataset: Features are categorical
    X = np.array([
        ["sunny", "hot", "high", "weak"],
        ["sunny", "hot", "high", "strong"],
        ["overcast", "hot", "high", "weak"],
        ["rain", "mild", "high", "weak"],
        ["rain", "cool", "normal", "weak"],
        ["rain", "cool", "normal", "strong"],
        ["overcast", "cool", "normal", "strong"],
        ["sunny", "mild", "high", "weak"],
        ["sunny", "cool", "normal", "weak"],
        ["rain", "mild", "normal", "weak"],
        ["sunny", "mild", "normal", "strong"],
        ["overcast", "mild", "high", "strong"],
        ["overcast", "hot", "normal", "weak"],
        ["rain", "mild", "high", "strong"],
    ])
    y = np.array([
        "no", "no", "yes", "yes", "yes", "no", "yes", "no", "yes", "yes",
        "yes", "yes", "yes", "no"
    ])

    # Train the classifier
    nb = NaiveBayesClassifier()
    nb.fit(X, y)

    # Test the classifier
    test_samples = np.array([
        ["sunny", "cool", "high", "strong"],
        ["overcast", "mild", "normal", "weak"],
    ])
    predictions = nb.predict(test_samples)
    print(f"Predictions: {predictions}")

Predictions: ['no' 'yes']
