In [77]:
import numpy as np

In [79]:
rng = np.random.RandomState(1)
X = rng.randint(5, size=(6, 100))
y = np.array([1, 2, 3, 4, 5, 6])

In [83]:
from sklearn.naive_bayes import MultinomialNB
multi = MultinomialNB()
multi.fit(X, y)

MultinomialNB()

In [84]:
print(multi.predict(X[2:3]))

[3]


In [89]:
class MultiNB(object):
    
    def __init__(self):
        self.priors = None
        self.params = None
        self.unique_labels = None
    
    def fit(self, X, y, alpha=1.0):
        assert ((alpha <= 1.0) and (alpha > 0.0)), "ERROR: smoothing parameter alpha should have value within [0.0, 1.0]!"
        self.unique_labels = np.unique(y)
        self.params = np.zeros(shape = (X.shape[1], len(self.unique_labels)))
        self.priors = np.zeros(shape = (len(self.unique_labels),))
        
        for ix,label in enumerate(self.unique_labels):
            # Boolean mask for extracting training samples corresponding to label
            mask = (y == label)
            
            # Add-1 smoothing; verified numerically that probabilities column-sum to 1
            token_counts_in_label = (np.sum(X[mask, :], axis=0) + alpha)
            total_tokens_in_label = np.sum(X[mask, :]) + X.shape[1] * alpha
            self.params[:, ix] = token_counts_in_label / total_tokens_in_label
            self.priors[ix] = np.sum(mask)/len(y)
    
    def predict_log_likelihood(self, X):

        log_params = np.log(self.params)
        log_likelihoods = np.dot(X, log_params)
        return log_likelihoods
            
    def predict(self, X):
        
        log_likelihoods = self.predict_log_likelihood(X)
        index_to_label = np.argmax(log_likelihoods, axis=1)
        pred_y = np.asarray([self.unique_labels[index] for index in index_to_label])
        
        return pred_y

In [90]:
like = MultiNB()
like.fit(X,y)

In [91]:
like.predict(X[2:3])

array([3])