# Gaussian Naive Bayes Classifier
- This notebook contains the code for implementing the Gaussian naive bayes classifier algorithm from scratch

## Importing Libraries

In [1]:
# arrays
import numpy as np

# dataset
from sklearn.datasets import make_blobs

# data preparation
from sklearn.model_selection import train_test_split

## Custom Class

In [11]:
class GaussianNB:

  def _get_priors(self, y):
    """
    This method will return the estimated priors for each unique class label
    """
    return  np.bincount(y) / len(y)


  def _get_params(self, X, y):
    """
    This method will return the estimated parameters for feature w.r.t. each class
    """
    n, p = X.shape
    K = len(self.classes_)

    means = np.empty((K, p))
    vars = np.empty((K, p))

    for k in self.classes_:
      subset = X[y == k, :]
      means[k, :] = np.mean(subset, axis=0)
      vars[k, :] = np.var(subset, axis=0)

    return (means, vars)


  def _get_probabilities(self, X):
    n, p = X.shape
    K = len(self.classes_)

    log_posteriors = np.empty((n, K))

    for k in self.classes_:
      mu = self.means_[k]
      sigma = np.diag(self.vars_[k])
      sigma_det = np.linalg.det(sigma)
      sigma_inv = np.linalg.inv(sigma)

      value = (((X - mu) @ sigma_inv) * (X - mu)).sum(axis=1)
      value = value + np.log(sigma_det)
      value = (-0.5 * value) + np.log(self.priors_[k])

      log_posteriors[:, k] = value

    probs = np.exp(log_posteriors)
    probs = probs / probs.sum(axis=1, keepdims=True)

    return probs


  def fit(self, X, y=None):
    """
    This method will train a Gaussian Naive Bayes Classifier model
    """
    self.classes_ = np.unique(y)
    self.priors_ = self._get_priors(y)
    self.means_, self.vars_ = self._get_params(X, y)
    print("Successfully Trained the Gaussian Naive Bayes Classifier model\n")
    return self


  def predict_proba(self, X):
    """
    This method will return the predicted probabilities
    """
    return self._get_probabilities(X)


  def predict(self, X):
    """
    This method will return the predicted labels
    """
    return np.argmax(self.predict_proba(X), axis=1)


  def score(self, X, y):
    """
    This method will return the accuracy of the model on the given data
    """
    y_pred = self.predict(X)
    return (y == y_pred).mean()

## Getting the Data

In [3]:
X, y = make_blobs(n_samples=500,
                  n_features=3,
                  centers=np.array([[5, 5, 5],
                                    [10, 10, 10]]),
                  cluster_std=1.5,
                  random_state=42)

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(375, 3) (375,)
(125, 3) (125,)


## Training the Model

In [12]:
gnb = GaussianNB()
gnb.fit(X_train, y_train)

Successfully Trained the Gaussian Naive Bayes Classifier model



<__main__.GaussianNB at 0x783d10bff700>

In [13]:
gnb.classes_

array([0, 1])

In [14]:
gnb.priors_

array([0.49866667, 0.50133333])

In [15]:
gnb.means_

array([[ 5.07781412,  4.83332092,  5.02693375],
       [10.28241684, 10.18374867, 10.24550578]])

In [16]:
gnb.vars_

array([[1.74817003, 2.28920683, 2.37703923],
       [2.33630778, 2.15789439, 2.31162458]])

In [19]:
gnb.predict_proba(X_test)[:5]

array([[8.44863176e-12, 1.00000000e+00],
       [9.99999366e-01, 6.33673235e-07],
       [9.69626669e-08, 9.99999903e-01],
       [1.80380640e-12, 1.00000000e+00],
       [1.00000000e+00, 2.04837828e-10]])

In [20]:
gnb.predict(X_test)[:5]

array([1, 0, 1, 1, 0])

## Evaluating the Model

In [17]:
gnb.score(X_train, y_train)

1.0

In [18]:
gnb.score(X_test, y_test)

1.0

## Final Remarks:
- Gaussian NB classifier is mostly used with real-valued and continous features
- It can be used for binary and multi-class classification problems directly
- It's based on the concept of `Maximum Likelihood Estimation`