In [2]:
import numpy as np

In [23]:
def get_model(matrix, labels, Laplace = False):
  """
  Get the model's phis from the matrix
  Args:
        Matrix - Each row contain the number of time group j appeared in the sample i, M_ij
        Labels - Label of each sample
        Optional :
        Laplace (Laplace's smoothing) - if true use Laplace's smoothing
  Returns:
     Two numpy array containing the phis, in the cases: y=0/1, and a phi(y)=0/1
  """
  _, n = matrix.shape
  phi_y = np.mean(labels)
  phi_k_y1 = matrix[labels == 1].sum(axis=0) / matrix[labels == 1].sum()
  phi_k_y0 = matrix[labels == 0].sum(axis=0) / matrix[labels == 0].sum()
  if Laplace:
    phi_y = np.mean(labels)
    phi_k_y1 = (1 + matrix[labels == 1].sum(axis=0)) / (n + matrix[labels == 1].sum())
    phi_k_y0 = (1 + matrix[labels == 0].sum(axis=0)) / (n + matrix[labels == 0].sum())
  return phi_y, phi_k_y0, phi_k_y1

In [24]:
def predict_naive_bayes(model, matrix):
    """Use a Naive Bayes model to compute predictions for a target matrix.
    Args:
        model: A trained Naive Bayes model returned by get_model
        matrix: A numpy array containing word counts

    Returns: A numpy array containg the predictions from the model
    """

    phi_y, phi_k_y1, phi_k_y0 = model

    return matrix @ (np.log(phi_k_y1) - np.log(phi_k_y0)) + np.log(phi_y / (1 - phi_y)) >= 0

In [25]:
def get_indicative_groups_naive_bayes(model, top = 3):
    """Compute the the top groups that are most indicative of the target classes
    Args:
        model: The Naive Bayes model returned from fit_naive_bayes_model

    Returns: The top groups most indicative words in sorted order with the most indicative first
    """

    _, phi_k_y0, phi_k_y1 = model
    return np.argsort(phi_k_y0 / phi_k_y1)[:top]
