In [41]:
import numpy as np
from collections import Counter

class NaiveBayes:

  def fit(self,X,y):

    self.classes = np.unique(y)
    n_classes = len(self.classes)
    n_samples,n_features = X.shape

    self._mean = np.zeros((n_classes,n_features),dtype=np.float64)
    self._var = np.zeros((n_classes,n_features),dtype=np.float64)
    self._prior = np.zeros(n_classes,dtype=np.float64)

    for index,val in enumerate(self.classes):

      x_col = X[y==val]
      self._mean[index,:] = x_col.mean(axis=0)
      self._var[index,:] = x_col.var(axis=0)
      self._prior[index] = x_col.shape[0]/float(n_samples)


  def _conditional_prob(self,x):
    posteriors = []


    for index,val in enumerate(self.classes):
      prior = np.log(self._prior[index])
      posterior = np.sum(np.log(self._pdf(index,x)))
      posterior = posterior+prior
      posteriors.append(posterior)

    return self.classes[np.argmax(posteriors)]

  def _pdf(self,index,x):
    mean = self._mean[index]
    var = self._var[index]

    num = np.exp(-((x-mean) ** 2)/ (2*var))
    denom = np.sqrt(2*np.pi*var)
    return num/denom



  def predict(self,X):

    y_pred = [self._conditional_prob(x) for x in X]
    return np.array(y_pred)






In [52]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
import numpy as np


X, y = datasets.make_classification(
    n_samples=1000,
    n_features = 10,
    n_classes = 2,
    n_informative=2,
    random_state=1234
)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=1234
)

clf = NaiveBayes()
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)

def accuracy(y_test, y_pred):
    return np.sum(y_test == y_pred) / len(y_test)

acc = accuracy(y_test, predictions)
print(acc)

0.93
