# Naive Bayes from Scratch

Based on [this tutorial](http://kenzotakahashi.github.io/naive-bayes-from-scratch-in-python.html)

## Preparing Data

In [1]:
import pandas as pd

csv = pd.read_csv('../datasetall.csv', usecols=(0, 1))

csv

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


FileNotFoundError: [Errno 2] No such file or directory: '../datasetall.csv'

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
  csv['text'],
  csv['label'],
  test_size=0.2,
  random_state=0,
  stratify=csv['label']
)

In [None]:
X_train

686      Happy Friday PILI pinas espsa mga kaUniteam!Tw...
23048    *binay's ad plays on tv nanay: *whispers pwet ...
28045    samin may best in english grade 5. tapos pag b...
12627    Di ko alam pero everytime makita ko muka ni ma...
22606    Sarap patayin ng tv pag pinapalabas yung "Baki...
                               ...                        
12067    If you won't vote for Dutertevote for Miriam. ...
5776     Naka sale sa shoppee bili na mga kakampinksThe...
3021     [USERNAME] Hello po[USERNAME] [USERNAME] [USER...
2576     Bakit kaya ang mga pilipino uto-uto, konting s...
7238     he has zero integrity. cant believe people are...
Name: text, Length: 22768, dtype: object

In [None]:
y_train.value_counts()

label
1    11476
0    11292
Name: count, dtype: int64

In [None]:
countvec = CountVectorizer()

transformed_text = countvec.fit_transform(X_train)

In [None]:
print(transformed_text)

  (0, 12426)	1
  (0, 10735)	1
  (0, 28459)	1
  (0, 28743)	1
  (0, 9683)	1
  (0, 21866)	1
  (0, 16523)	1
  (0, 37039)	1
  (0, 27778)	1
  (0, 32916)	1
  (0, 36289)	1
  (0, 34255)	1
  (0, 2440)	1
  (0, 7415)	1
  (0, 18054)	1
  (0, 36275)	1
  (0, 21554)	1
  (1, 3845)	1
  (1, 849)	1
  (1, 29055)	1
  (1, 26055)	1
  (1, 36999)	1
  (1, 24223)	1
  (1, 38573)	1
  (1, 30468)	1
  :	:
  (22766, 22736)	2
  (22766, 28492)	1
  (22766, 11035)	1
  (22766, 32027)	1
  (22766, 11054)	1
  (22766, 24334)	1
  (22766, 37656)	2
  (22766, 17092)	1
  (22766, 22640)	1
  (22767, 2180)	1
  (22767, 20985)	3
  (22767, 12582)	1
  (22767, 12475)	1
  (22767, 10571)	1
  (22767, 33475)	1
  (22767, 19502)	1
  (22767, 12847)	1
  (22767, 8882)	1
  (22767, 28125)	1
  (22767, 34327)	1
  (22767, 5372)	1
  (22767, 3519)	1
  (22767, 38097)	1
  (22767, 39389)	1
  (22767, 14336)	1


In [None]:
transformed_test = countvec.transform(X_test)

In [None]:
print(transformed_test)

  (0, 2846)	1
  (0, 3845)	1
  (0, 6371)	1
  (0, 18042)	1
  (0, 21523)	1
  (0, 22736)	1
  (0, 24091)	1
  (0, 33183)	1
  (1, 1452)	1
  (1, 14553)	1
  (1, 16716)	1
  (1, 16805)	1
  (1, 18096)	2
  (1, 18147)	1
  (1, 24662)	1
  (1, 31545)	1
  (1, 35772)	1
  (1, 36275)	1
  (1, 38358)	1
  (2, 8882)	1
  (2, 19502)	1
  (2, 20985)	3
  (2, 33475)	1
  (3, 7441)	1
  (3, 10571)	1
  :	:
  (5691, 17266)	1
  (5691, 17969)	2
  (5691, 18096)	3
  (5691, 18265)	2
  (5691, 22133)	1
  (5691, 22468)	1
  (5691, 22703)	2
  (5691, 24855)	1
  (5691, 25601)	1
  (5691, 27736)	1
  (5691, 31682)	1
  (5691, 32294)	1
  (5691, 35772)	3
  (5691, 35913)	1
  (5691, 36048)	1
  (5691, 36275)	3
  (5691, 38066)	1
  (5691, 38285)	1
  (5691, 38358)	1
  (5691, 38397)	1
  (5691, 38912)	1
  (5692, 2671)	1
  (5692, 3845)	1
  (5692, 33183)	1
  (5692, 33475)	1


## Multinomial (because it's close to Bernoulli)

In [None]:
import numpy as np
from collections import defaultdict
from scipy.special import logsumexp

class MultinomialNB():
  def __init__(self, alpha=1.0):
    # Smoothing parameter
    # Used to prevent features from being totally 0 when getting feature counts
    # Essentially tells NB that there's an extra count to all features
    # which would otherwise zero things when in the case that a feature
    # when predicting was not learned from fitting
    self.alpha = alpha

  def fit(self, X, y):
    if X.shape[0] != len(y):
      print("ERROR: X and y have different lengths")
      return

    num_of_samples = X.shape[0]

    X = X.toarray()

    # Group X by class
    # Use a default dict for this. This automatically makes a list
    # when it encounters a key that is unknown. So we can iterate over
    # each X and y, and append to the list of each respective class
    grouped = defaultdict(list)

    for X_sample, y_sample in zip(X, y):
      grouped[y_sample].append(X_sample)

    # STEP 1: Getting prior log probability of each class
    # Essentially, the (log) probability of class being 0 or 1 based on train set
    # Log scale is used to prevent underflow due to lack of floating point precision
    self.class_log_prior_ = [
      np.log(len(grouped[data_class]) / num_of_samples)
      for data_class
      in grouped.keys()
    ]
    # Output: array([-0.70126153, -0.68509814])
    # These are log probabilities. To reverse this, we can use np.exp()
    # and the result after np.exp() is
    # Output: array([0.49595924, 0.50404076])
    # We will consistently use these log probabilities elsewhere in calculations

    # STEP 2: Count each word in each class. Add smoothing parameter (self.alpha)
    # Make a list so it's easy to append to, numpy arrays are annoying to make 2D arrays
    # from several 1D arrays
    word_count_per_class = []

    # Iterate over each class (0, then 1)
    for data_class in grouped.keys():
      # Sum the count of every word occurrence by column, going downward
      # So if the array looks something like
      # [[1 2 3]
      #  [2 3 4]
      #  [3 4 5]]
      # sum(axis=0) will turn this into
      # [6, 9, 12]
      count_in_class = np.array(grouped[data_class]).sum(axis=0)

      word_count_per_class.append(count_in_class)
    # Make a final array by joining together the 2 lists to make it 2D
    # then we add the alpha value to everything
    word_count_per_class = np.array(word_count_per_class) + self.alpha

    # Reshape to make it a 2D array where the sum corresponds to the row of a class
    # [[244154], -> 0
    #  [246500]] -> 1
    sum_of_words_in_each_class = word_count_per_class.sum(axis=1).reshape(-1, 1)

    # STEP 3: Calculate log probability of each word
    self.feature_log_prob_ = np.log(word_count_per_class / sum_of_words_in_each_class)
  
  def predict_log_proba(self, X):
    X = X.toarray()
    samples = []
    for sample in X:
      feature_log_prob_by_count = self.feature_log_prob_ * sample
      sum_of_feature_probabilities = feature_log_prob_by_count.sum(axis=1)
      feature_probabilities_with_class_prior = sum_of_feature_probabilities + self.class_log_prior_
      samples.append(feature_probabilities_with_class_prior)
    return np.array(samples)
    
  def predict_proba(self, X):
    outputs = self.predict_log_proba(X)
    
    results = []
    for data_class in outputs:
      output_max = data_class.max()
      logsumexp_result = output_max + np.log(np.sum(np.exp(data_class - output_max)))
      results.append(logsumexp_result)
    results = np.array(results).reshape(-1, 1)

    return np.exp(outputs - results)
    # return np.exp(outputs - logsumexp_result.reshape(-1, 1))

  def predict(self, X):
    outputs = self.predict_log_proba(X)

    return np.argmax(outputs, axis=1)


In [None]:
mnb = MultinomialNB()
mnb

<__main__.MultinomialNB at 0x7f6e55d63880>

In [None]:
mnb.fit(transformed_text, y_train)

In [None]:
mnb.predict_proba(transformed_test)

array([[4.16214331e-04, 9.99583786e-01],
       [9.99999977e-01, 2.34820687e-08],
       [4.64053288e-08, 9.99999954e-01],
       ...,
       [4.62296229e-06, 9.99995377e-01],
       [1.00000000e+00, 1.73354161e-18],
       [1.64564609e-02, 9.83543539e-01]])

In [None]:
predictions = mnb.predict(transformed_test)

In [None]:
from sklearn.metrics import accuracy_score

accuracy_score(y_test, predictions)

0.8171438608817847

## Sklearn comparison

In [None]:
from sklearn.naive_bayes import MultinomialNB as SklearnMultinomialNB

# Fit scikit-learn implementation
sklearn_nb = SklearnMultinomialNB()
sklearn_nb.fit(transformed_text, y_train)

# Compare class log priors
print("Custom class log priors:", mnb.class_log_prior_)
print("Sklearn class log priors:", sklearn_nb.class_log_prior_)

# Compare feature log probabilities
print("Custom feature log probabilities:", mnb.feature_log_prob_)
print("Sklearn feature log probabilities:", sklearn_nb.feature_log_prob_)

# Compare predictions
custom_pred = mnb.predict(transformed_test)
sklearn_pred = sklearn_nb.predict(transformed_test)
print("Custom predictions:", custom_pred)
print("Sklearn predictions:", sklearn_pred)

# Compare prediction probabilities
custom_proba = mnb.predict_proba(transformed_test)
sklearn_proba = sklearn_nb.predict_proba(transformed_test)
print("Custom prediction probabilities:", custom_proba)
print("Sklearn prediction probabilities:", sklearn_proba)

# Compare prediction log probabilities
custom_log_proba = mnb.predict_log_proba(transformed_test)
sklearn_log_proba = sklearn_nb.predict_log_proba(transformed_test)
print("Custom prediction log probabilities:", custom_log_proba)
print("Sklearn prediction log probabilities:", sklearn_log_proba)

# Compare accuracy
custom_accuracy = accuracy_score(y_test, custom_pred)
sklearn_accuracy = accuracy_score(y_test, sklearn_pred)
print("Custom accuracy:", custom_accuracy)
print("Sklearn accuracy:", sklearn_accuracy)


Custom class log priors: [-0.7012615309562537, -0.6850981432283781]
Sklearn class log priors: [-0.70126153 -0.68509814]
Custom feature log probabilities: [[-10.32611291 -10.61379498 -11.71240727 ... -12.40555445 -12.40555445
  -12.40555445]
 [-11.72197009 -11.31650498 -12.41511727 ... -11.02882291 -11.72197009
  -11.72197009]]
Sklearn feature log probabilities: [[-10.32611291 -10.61379498 -11.71240727 ... -12.40555445 -12.40555445
  -12.40555445]
 [-11.72197009 -11.31650498 -12.41511727 ... -11.02882291 -11.72197009
  -11.72197009]]
Custom predictions: [1 0 1 ... 1 0 1]
Sklearn predictions: [1 0 1 ... 1 0 1]
Custom prediction probabilities: [[4.16214331e-04 9.99583786e-01]
 [9.99999977e-01 2.34820687e-08]
 [4.64053288e-08 9.99999954e-01]
 ...
 [4.62296229e-06 9.99995377e-01]
 [1.00000000e+00 1.73354161e-18]
 [1.64564609e-02 9.83543539e-01]]
Sklearn prediction probabilities: [[4.16214331e-04 9.99583786e-01]
 [9.99999977e-01 2.34820687e-08]
 [4.64053288e-08 9.99999954e-01]
 ...
 [4.62296

## Bernoulli NB

In [None]:
class BernoulliNB():
  def __init__(self, alpha=1.0):
    # Smoothing parameter
    # Used to prevent features from being totally 0 when getting feature counts
    # Essentially tells NB that there's an extra count to all features
    # which would otherwise zero things when in the case that a feature
    # when predicting was not learned from fitting
    self.alpha = alpha

  def fit(self, X, y):
    if X.shape[0] != len(y):
      print("ERROR: X and y have different lengths")
      return

    num_of_samples = X.shape[0]

    X = X.toarray()

    # Turn counts into binary values
    # NOTE: This is the assumption of Bernoulli NB
    X = np.where(X > 0, 1, 0)

    # Group X by class
    # Use a default dict for this. This automatically makes a list
    # when it encounters a key that is unknown. So we can iterate over
    # each X and y, and append to the list of each respective class
    grouped = defaultdict(list)

    for X_sample, y_sample in zip(X, y):
      grouped[y_sample].append(X_sample)

    # STEP 1: Getting prior log probability of each class
    # Essentially, the (log) probability of class being 0 or 1 based on train set
    # Log scale is used to prevent underflow due to lack of floating point precision
    # Equation: Number of samples in class / number of overall samples
    # If there are 10 samples, 4 are 0 and 6 are 1
    # The equation then looks like
    # np.log(4/10) for 0
    # np.log(6/10) for 1
    self.class_log_prior_ = [
      np.log(len(grouped[data_class]) / num_of_samples)
      for data_class
      in grouped.keys()
    ]
    # Output: array([-0.70126153, -0.68509814])
    # These are log probabilities. To reverse this, we can use np.exp()
    # and the result after np.exp() is
    # Output: array([0.49595924, 0.50404076])
    # This is the same as:
    # 11292 / (11476 + 11292)
    # 11476 / (11476 + 11292)
    # We will consistently use these log probabilities elsewhere in calculations

    # STEP 2: Count each word in each class. Add smoothing parameter (self.alpha)
    # The smoothing parameter is important to prevent divison by 0
    # Make a list so it's easy to append to, numpy arrays are annoying to make 2D arrays
    # from several 1D arrays
    word_count_per_class = []

    # Iterate over each class (0, then 1)
    for data_class in grouped.keys():
      # Sum the count of every word occurrence by column, going downward
      # So if the array looks something like
      # [[1 2 3]
      #  [2 3 4]
      #  [3 4 5]]
      # sum(axis=0) will turn this into
      # [6, 9, 12]
      count_in_class = np.array(grouped[data_class]).sum(axis=0)

      word_count_per_class.append(count_in_class)
    # Make a final array by joining together the 2 lists to make it 2D
    # then we add the alpha value to everything
    # This alpha is important to prevent division by 0. You can observe this
    # by setting the alpha to 0. You will get a lot of nan values
    word_count_per_class = np.array(word_count_per_class) + self.alpha

    # Reshape to make it a 2D array where the sum corresponds to the row of a class
    # [[223218], -> 0
    #  [225122]] -> 1
    sum_of_words_in_each_class = word_count_per_class.sum(axis=1).reshape(-1, 1)

    # STEP 3: Calculate log probability of each word
    # NOTE: Not actually used in BNB
    self.feature_log_prob_ = np.log(word_count_per_class / sum_of_words_in_each_class)

    # STEP 4: BNB
    # Smoothing parameter for classes
    # Multiplied by 2 because of 2 classes, 0 and 1
    smoothing = 2 * self.alpha
    
    # Add the smoothing to number of documents
    num_of_documents_with_smoothing = np.array([
      len(grouped[data_class]) + smoothing
      for data_class 
      in grouped.keys()
    ])

    # STEP 5: Feature Probabilities
    # (Word count in class [0 or 1] + alpha) / (value count [11292, 11476] + alpha * 2)
    # Given an alpha of 1,
    # If you plug in 1.77085178e-04 * 11294, you get 1.999~. This means the original word
    # appeared once (2 - alpha = 1) in the 0 class
    self.feature_prob_ = word_count_per_class / num_of_documents_with_smoothing.reshape(-1, 1)

    return self
  
  def predict_log_proba(self, X):
    """
    This function gets the numerator part of Bernoulli NB
    Read carefully
    """
    X = X.toarray()
    # Turn counts into binary values
    X = np.where(X > 0, 1, 0)

    samples = []
    for sample in X:
      # This part is solving for the numerator
      # For each sample of data,
      # Sum the log probabilities of all the features that appear in the class
      # and add that to the log probabilities that the features do not appear in the class
      # then add once again to the log probability of the class (this is the class prior)
      negative_prob = 1 - self.feature_prob_

      # Invert from 0 to 1, or 1 to 0
      inverted_sample = np.abs(sample - 1)

      # We multiply feature prob to sample to essentially
      # zero out all features that do not appear
      # This leaves us with the features that appear as the only nonzero values
      # so we can simply add them; this is the same as
      # multiplying all the probabilities of all the features
      # but we can just add because we use the log scale
      # The inverted probabilities are based on the Bernoulli rule
      # P(x_i∣y) = P(i∣y)x_i + (1 − P(i∣y)) * (1 − x_i)
      # sample probabilities = P(i∣y)x_i
      # and the inverted probabilities represent the 
      # (1 − P(i∣y)) * (1 − x_i)
      # portion
      sample_probabilities = np.log(self.feature_prob_) * sample
      inverted_probabilities = np.log(negative_prob) * inverted_sample

      log_sum_of_probabilities = (sample_probabilities + inverted_probabilities).sum(axis=1)
      feature_probabilities_with_class_prior = log_sum_of_probabilities + self.class_log_prior_
      samples.append(feature_probabilities_with_class_prior)
    return np.array(samples)
    
  def predict_proba(self, X):
    # Get the numerator part
    outputs = self.predict_log_proba(X)
    
    results = []
    for data_class in outputs:
      # The following two lines are a manual implementation
      # of the logsumexp function
      # The main purpose is to prevent numerical underflow
      # in the denominator due to the need for summation in the formula
      # Reference: https://stats.stackexchange.com/questions/105602/example-of-how-the-log-sum-exp-trick-works-in-naive-bayes
      output_max = data_class.max()
      logsumexp_result = output_max + np.log(np.sum(np.exp(data_class - output_max)))

      results.append(logsumexp_result)
    results = np.array(results).reshape(-1, 1)

    return np.exp(outputs - results)

  def predict(self, X):
    outputs = self.predict_log_proba(X)

    return np.argmax(outputs, axis=1)


In [None]:
from sklearn.naive_bayes import BernoulliNB as SklearnBernoulliNB

custom_bnb = BernoulliNB()
custom_bnb.fit(transformed_text, y_train)

# Fit scikit-learn implementation
sklearn_bnb = SklearnBernoulliNB()
sklearn_bnb.fit(transformed_text, y_train)

# Compare class log priors
print("Custom class log priors:", custom_bnb.class_log_prior_)
print("Sklearn class log priors:", sklearn_bnb.class_log_prior_)

# Compare feature log probabilities
print("Custom feature log probabilities:", np.log(custom_bnb.feature_prob_))
print("Sklearn feature log probabilities:", sklearn_bnb.feature_log_prob_)

# Compare predictions
custom_pred = custom_bnb.predict(transformed_test)
sklearn_pred = sklearn_bnb.predict(transformed_test)
print("Custom predictions:", custom_pred)
print("Sklearn predictions:", sklearn_pred)

# Compare prediction probabilities
custom_proba = custom_bnb.predict_proba(transformed_test)
sklearn_proba = sklearn_bnb.predict_proba(transformed_test)
print("Custom prediction probabilities:", custom_proba)
print("Sklearn prediction probabilities:", sklearn_proba)

# Compare prediction log probabilities
custom_log_proba = custom_bnb.predict_log_proba(transformed_test)
sklearn_log_proba = sklearn_bnb.predict_log_proba(transformed_test)
print("Custom prediction log probabilities:", custom_log_proba)
print("Sklearn prediction log probabilities:", sklearn_log_proba)

# Compare accuracy
custom_accuracy = accuracy_score(y_test, custom_pred)
sklearn_accuracy = accuracy_score(y_test, sklearn_pred)
print("Custom accuracy:", custom_accuracy)
print("Sklearn accuracy:", sklearn_accuracy)


Custom class log priors: [-0.7012615309562537, -0.6850981432283781]
Sklearn class log priors: [-0.70126153 -0.68509814]
Custom feature log probabilities: [[-7.38611674 -7.54026742 -8.63887971 ... -9.33202689 -9.33202689
  -9.33202689]
 [-8.65504026 -8.65504026 -9.34818744 ... -7.96189308 -8.65504026
  -8.65504026]]
Sklearn feature log probabilities: [[-7.38611674 -7.54026742 -8.63887971 ... -9.33202689 -9.33202689
  -9.33202689]
 [-8.65504026 -8.65504026 -9.34818744 ... -7.96189308 -8.65504026
  -8.65504026]]
Custom predictions: [1 0 1 ... 1 0 1]
Sklearn predictions: [1 0 1 ... 1 0 1]
Custom prediction probabilities: [[2.11246308e-04 9.99788754e-01]
 [9.99999931e-01 6.87523480e-08]
 [7.01404124e-06 9.99992986e-01]
 ...
 [1.11873689e-05 9.99988813e-01]
 [1.00000000e+00 8.02383436e-14]
 [9.58222418e-03 9.90417776e-01]]
Sklearn prediction probabilities: [[2.11246308e-04 9.99788754e-01]
 [9.99999931e-01 6.87523480e-08]
 [7.01404124e-06 9.99992986e-01]
 ...
 [1.11873689e-05 9.99988813e-01]


## Random tests below

In [None]:
pd.DataFrame(custom_bnb.feature_log_prob_[0][:50])

Unnamed: 0,0
0,-10.369994
1,-10.524145
2,-11.622757
3,-12.315904
4,-11.622757
5,-11.622757
6,-11.622757
7,-11.217292
8,-11.622757
9,-10.706466


In [None]:
pd.DataFrame(sklearn_bnb.feature_log_prob_[0][:50])

Unnamed: 0,0
0,-7.386117
1,-7.540267
2,-8.63888
3,-9.332027
4,-8.63888
5,-8.63888
6,-8.63888
7,-8.233415
8,-8.63888
9,-7.722589


In [None]:
np.log(7.0/223218)

-10.369994002343624

In [None]:
custom_bnb.feature_prob_[0][:50]

array([6.19798123e-04, 5.31255534e-04, 1.77085178e-04, 8.85425890e-05,
       1.77085178e-04, 1.77085178e-04, 1.77085178e-04, 2.65627767e-04,
       1.77085178e-04, 4.42712945e-04, 1.77085178e-04, 1.77085178e-04,
       8.85425890e-05, 3.54170356e-04, 1.77085178e-04, 1.77085178e-04,
       8.85425890e-05, 1.77085178e-04, 1.77085178e-04, 1.77085178e-04,
       1.77085178e-04, 1.77085178e-04, 3.54170356e-04, 1.77085178e-04,
       3.54170356e-04, 1.77085178e-04, 8.85425890e-05, 8.85425890e-05,
       3.09899061e-03, 5.31255534e-04, 8.85425890e-05, 1.77085178e-04,
       8.85425890e-05, 8.85425890e-05, 8.85425890e-05, 8.85425890e-05,
       1.77085178e-04, 8.85425890e-05, 1.77085178e-04, 2.65627767e-04,
       8.85425890e-05, 2.65627767e-04, 8.85425890e-05, 1.77085178e-04,
       1.77085178e-04, 1.77085178e-04, 1.94793696e-03, 1.77085178e-04,
       1.77085178e-04, 1.77085178e-04])

In [None]:
1 - custom_bnb.feature_prob_[0][:50]

array([0.9993802 , 0.99946874, 0.99982291, 0.99991146, 0.99982291,
       0.99982291, 0.99982291, 0.99973437, 0.99982291, 0.99955729,
       0.99982291, 0.99982291, 0.99991146, 0.99964583, 0.99982291,
       0.99982291, 0.99991146, 0.99982291, 0.99982291, 0.99982291,
       0.99982291, 0.99982291, 0.99964583, 0.99982291, 0.99964583,
       0.99982291, 0.99991146, 0.99991146, 0.99690101, 0.99946874,
       0.99991146, 0.99982291, 0.99991146, 0.99991146, 0.99991146,
       0.99991146, 0.99982291, 0.99991146, 0.99982291, 0.99973437,
       0.99991146, 0.99973437, 0.99991146, 0.99982291, 0.99982291,
       0.99982291, 0.99805206, 0.99982291, 0.99982291, 0.99982291])

In [None]:
test_normal_prob = custom_bnb.feature_prob_[0][:10] * np.array([
  0, 1, 1, 0, 1,
  0, 0, 0, 1, 1
])
test_normal_prob

array([0.        , 0.00053126, 0.00017709, 0.        , 0.00017709,
       0.        , 0.        , 0.        , 0.00017709, 0.00044271])

In [None]:
test_inverted_prob = (1 - custom_bnb.feature_prob_[0][:10]) * np.array([
  1, 0, 0, 1, 0,
  1, 1, 1, 0, 0
])
test_inverted_prob

array([0.9993802 , 0.        , 0.        , 0.99991146, 0.        ,
       0.99982291, 0.99982291, 0.99973437, 0.        , 0.        ])

In [None]:
(test_normal_prob + test_inverted_prob)

array([9.99380202e-01, 5.31255534e-04, 1.77085178e-04, 9.99911457e-01,
       1.77085178e-04, 9.99822915e-01, 9.99822915e-01, 9.99734372e-01,
       1.77085178e-04, 4.42712945e-04])

In [None]:
(test_normal_prob + test_inverted_prob).sum()

5.00017708517797

In [None]:
(test_normal_prob + test_inverted_prob).sum()

5.00017708517797

In [None]:
custom_bnb.feature_prob_[1][:10] * np.array([
  0, 1, 1, 0, 1,
  0, 0, 0, 1, 1
])

array([0.00000000e+00, 1.74246384e-04, 8.71231922e-05, 0.00000000e+00,
       8.71231922e-05, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       8.71231922e-05, 8.71231922e-05])

In [None]:
np.exp(-5)

0.006737946999085467