# Multinomial Naive Bayes Classifier
- This notebook contains the code for implementing the Multinomial naive bayes classifier algorithm from scratch

## Importing Libraries

In [14]:
# arrays
import numpy as np

# dataset
from sklearn.datasets import fetch_20newsgroups

# data preparation
from sklearn.feature_extraction.text import (CountVectorizer,
                                             TfidfTransformer)
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline

## Custom Class

In [2]:
class MultinomialNB:

  def __init__(self, alpha=1.0):
    """
    Parameters:
    -----------

    alpha: float
           Used for performing Laplace smoothing and training a robust model
    """
    self.alpha = alpha


  def _get_params(self, X, y):
    """
    This method will return the estimated priors and parameters for feature w.r.t. each class
    """
    n, p = X.shape
    K = len(self.classes_)

    priors = np.bincount(y) / len(y)
    params = np.empty((K, p))

    for k in self.classes_:
      subset = X[y == k, :]
      total_sum = subset.sum().sum()
      params[k, :] = (subset.sum(axis=0) + self.alpha) / (total_sum + (self.alpha * p))

    return (priors, params)


  def _get_probabilities(self, X):
    n, p = X.shape
    K = len(self.classes_)
    log_posteriors = (X @ np.log(self.params_).T) + np.log(self.priors_)
    probs = np.exp(log_posteriors)
    probs = probs / probs.sum(axis=1, keepdims=True)
    return probs


  def fit(self, X, y=None):
    """
    This method will train a Multinomial Naive Bayes Classifier model
    """
    self.classes_ = np.unique(y)
    self.priors_, self.params_ = self._get_params(X, y)
    print("Successfully Trained the Multinomial Naive Bayes Classifier model\n")
    return self


  def predict_proba(self, X):
    """
    This method will return the predicted probabilities
    """
    return self._get_probabilities(X)


  def predict(self, X):
    """
    This method will return the predicted labels
    """
    return np.argmax(self.predict_proba(X), axis=1)


  def score(self, X, y):
    """
    This method will return the accuracy of the model on the given data
    """
    y_pred = self.predict(X)
    return (y == y_pred).mean()

## Getting the Data

In [25]:
categories = ['alt.atheism', 'soc.religion.christian', 'comp.graphics', 'sci.med']

twenty_train = fetch_20newsgroups(subset='train', categories=categories, shuffle=True, random_state=42)
twenty_test = fetch_20newsgroups(subset='test', categories=categories, shuffle=True, random_state=42)

data_preprocessor = Pipeline(steps=[("bow", CountVectorizer()),
                                    ("tfidf", TfidfTransformer())])

X_train = data_preprocessor.fit_transform(twenty_train.data)
y_train = twenty_train.target

X_test = data_preprocessor.transform(twenty_test.data)
y_test = twenty_test.target

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(2257, 35788) (2257,)
(1502, 35788) (1502,)


## Training the Model

In [26]:
mnb = MultinomialNB()
mnb.fit(X_train, y_train)

Successfully Trained the Multinomial Naive Bayes Classifier model



<__main__.MultinomialNB at 0x7ccb7fc53eb0>

In [27]:
mnb.classes_

array([0, 1, 2, 3])

In [28]:
mnb.priors_

array([0.21267169, 0.25875055, 0.26318121, 0.26539654])

In [29]:
mnb.params_

array([[2.73023013e-05, 6.61239253e-05, 2.46389994e-05, ...,
        2.46389994e-05, 2.46389994e-05, 2.46389994e-05],
       [5.54996132e-05, 4.11211535e-05, 2.48068577e-05, ...,
        2.48068577e-05, 2.74254204e-05, 2.48068577e-05],
       [4.79849790e-05, 4.34078758e-05, 2.44266889e-05, ...,
        2.93324910e-05, 2.41558496e-05, 2.69548278e-05],
       [7.95384151e-05, 2.71251183e-05, 2.38023817e-05, ...,
        2.38023817e-05, 2.38023817e-05, 2.38023817e-05]])

In [30]:
mnb.params_.shape

(4, 35788)

## Evaluating the Model

In [31]:
mnb.score(X_train, y_train)

0.9636685866194062

In [32]:
mnb.score(X_test, y_test)

0.8348868175765646

## Final Remarks:
- Multinomial NB classifier is mostly used with text data and document classification problems
- It can be used for binary and multi-class classification problems directly
- It's based on the concept of `Maximum Likelihood Estimation`