In [1]:
import numpy as np

class NaiveBayes:
    """
    Naive Bayes classifier for discrete features (Multinomial NB).
    Suitable for text classification, categorical data, etc.
    """

    def fit(self, X, y):
        """
        Fit the Naive Bayes classifier from the training set.

        Parameters:
        -----------
        X : numpy.ndarray
            Training features (n_samples, n_features). Should be integer counts or categories.
        y : numpy.ndarray
            Target labels (n_samples,). Should be integers (0, 1, ..., n_classes-1).
        """
        n_samples, n_features = X.shape
        self.classes = np.unique(y)
        n_classes = len(self.classes)

        # Initialize prior and likelihood
        self.class_priors = np.zeros(n_classes)
        self.feature_likelihoods = np.zeros((n_classes, n_features, np.max(X) + 1))

        for idx, c in enumerate(self.classes):
            X_c = X[y == c]
            self.class_priors[idx] = X_c.shape[0] / n_samples
            # Laplace smoothing for likelihood estimation
            for f in range(n_features):
                feature_counts = np.bincount(X_c[:, f], minlength=np.max(X) + 1)
                self.feature_likelihoods[idx, f, :] = (feature_counts + 1) / (X_c.shape[0] + np.max(X) + 1)

    def predict(self, X):
        """
        Predict class labels for samples in X.

        Parameters:
        -----------
        X : numpy.ndarray
            Test features (n_samples, n_features).

        Returns:
        --------
        numpy.ndarray
            Predicted class labels.
        """
        n_samples = X.shape[0]
        n_classes = len(self.classes)
        log_probs = np.zeros((n_samples, n_classes))

        for idx, c in enumerate(self.classes):
            # Log prior
            log_prob = np.log(self.class_priors[idx])
            for f in range(X.shape[1]):
                # Log likelihood of each feature value
                log_prob += np.log(self.feature_likelihoods[idx, f, X[:, f]])
            log_probs[:, idx] = log_prob

        return self.classes[np.argmax(log_probs, axis=1)]

Predictions: [0 1 0]


> ### Example usage:

In [2]:
# Toy dataset: 2 features, 2 classes
# Feature values are integers (categories), labels are 0 or 1
X_train = np.array([
        [1, 0],
        [1, 1],
        [0, 1],
        [0, 0],
        [2, 1],
        [2, 0]
    ])
y_train = np.array([0, 0, 1, 1, 0, 1])

X_test = np.array([
        [1, 1],
        [0, 0],
        [2, 1]
    ])

nb = NaiveBayes()
nb.fit(X_train, y_train)
predictions = nb.predict(X_test)
print("Predictions:", predictions)

Predictions: [0 1 0]
