In [27]:
class MultinomialNB:
    def __init__(self):
        self.classes = None
        self.class_prior = None
        self.feature_counts = None
        self.n_features = None
        self.n_samples = None
        
    def fit(self, X, y):
        self.classes = np.unique(y)
        n_classes = len(self.classes)
        n_samples, n_features = X.shape
        
        self.class_prior = np.zeros(n_classes)
        self.feature_counts = np.zeros((n_classes, n_features))
        
        # count number of samples in each class
        for i in range(n_classes):
            self.class_prior[i] = np.sum(y == self.classes[i]) / n_samples
            
        # count number of occurrences of each feature in each class
        for i in range(n_classes):
            X_class = X[y == self.classes[i]]
            self.feature_counts[i] = np.sum(X_class, axis=0)
        print(self.feature_counts)

        self.n_features = n_features
        self.n_samples = n_samples
        
    def predict(self, X):
        likelihood = np.zeros((X.shape[0], len(self.classes)))
        
        # calculate log likelihood for each sample and class
        for i in range(len(self.classes)):
            log_likelihood = np.sum(X * np.log(self.feature_counts[i] / np.sum(self.feature_counts[i])), axis=1)
            likelihood[:, i] = log_likelihood + np.log(self.class_prior[i])
            
        # return class with highest likelihood for each sample
        return self.classes[np.argmax(likelihood, axis=1)]


In [28]:
# Sample data
X = np.array([ [1, 2, 3, 4],  # 0
               [3, 4, 1, 1],  # 1
               [1, 1, 1, 2],  # 1
               [4, 1, 1, 3],  # 0
               [3, 4, 4, 3]]) # 0

y = np.array([0, 1, 1, 0, 0])

# Initialize and fit the model
model = MultinomialNB()
model.fit(X, y)




[[ 8.  7.  8. 10.]
 [ 4.  5.  2.  3.]]


In [21]:
# Predict classes for new data
X_new = np.array([[3, 3, 2, 1], [3, 4, 3, 3]])
y_pred = model.predict(X_new)
print(y_pred)

[0, 1]
