## 1. Prepare data

In [114]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import datasets
import matplotlib.pyplot as plt

In [122]:
X, y = datasets.make_blobs(n_samples=100, n_features=2, cluster_std=1.05, random_state=2)
y = [1 if i > 0 else 0 for i in y]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)

## 2. Class

In [123]:
class NaiveBayes:
  
  def fit(self, X, y):
    """ Train the weights"""

    self.classes = np.unique(y)
    self.priors = self._get_priors(y)
    self.means, self.variances = self._get_mean_variance(X, y)
  
  def _get_priors(self, y):
    self.priors = []
    n_records = len(y)
    
    for index, c_i in enumerate(self.classes):
      prior = y.count(c_i)/n_records
      self.priors.append(prior)
    return self.priors

  
  def _get_mean_variance(self, X, y):
    n_features = len(X[0])
    n_classes = len(self.classes)
    
    self.variances = np.zeros((n_classes,n_features))
    self.means = np.zeros((n_classes,n_features))
    
    rows = []
    
    for index, c in enumerate(self.classes):
      rows = X[y==c]
      self.means[index, :] = rows.mean(axis=0)
      self.variances[index, :] = rows.var(axis=0)
    
    return self.means, self.variances

  def predict(self, X):
    for x_i in X:
      prediction = self._get_prediction(x_i)
    return prediction
  
  def _get_prediction(self, x_i):
    posteriors = []

    for index, c in enumerate(self.classes):
      prior = np.sum(np.log(self._class_conditional(index, x_i)))
      posterior = np.log(self.priors[index])
      posterior = prior + posterior
      posteriors.append(posterior)
    return self.classes[np.argmax(posteriors)]
    

  
  def _class_conditional(self, index, x_i):
    mean = self.means[index]
    variance = self.variances[index]
    n = np.exp(-(x_i-mean)**2 / (2 * variance))
    d = np.sqrt(2 * np.pi * variance)
    return n / d


In [124]:
obj = NaiveBayes()
obj.fit(X_train, y_train)
predictions = obj.predict(X_test)
print(predictions)


1


In [125]:
X_test.shape

(20, 2)

## 2. Functions

In [None]:
def fit(X, y):
  classes = np.unique(y)

  priors = _get_priors(y, classes)
  means, variances = _get_mean_variance(X, y, classes)
  return means, variances


In [None]:
def _get_priors(y, classes):
  priors = []
  n_records = len(y)

  for c_i in classes:
    prior = y.count(c_i)/n_records

    priors.append(prior)
  
  return priors

In [None]:
def _get_mean_variance(X, y, classes):
  n_features = len(X[0])
  n_classes = len(classes)

  variances = np.zeros((n_classes,n_features))
  means = np.zeros((n_classes,n_features))

  rows = []
  for index, c in enumerate(classes):
    rows = X[y==c]
    means[index, :] = rows.mean(axis=0)
    variances[index, :] = rows.var(axis=0)
  
  return means, variances


  

In [None]:
fit(X, y)

(array([[-1.30860503, -9.54029732],
        [-0.64969351, -2.03280863]]), array([[0.39738545, 1.66873761],
        [1.63616732, 1.58572566]]))