In [None]:
import numpy as np
from sklearn.datasets import load_digits, load_breast_cancer

data = load_digits()


X, y = data.data, data.target

In [None]:
from functools import reduce

class Bayes:
  def __init__(self):
    pass

  def fit(self, X, y):
    #get prior probs
    classes_ = np.unique(y)
    Prior = {} # P(y) for every y
    for class_ in classes_: Prior.update({class_: sum(y == class_) / len(y)})

    #get cond probs
    dist_x_y = {}
    for class_ in classes_:
      dist_x_y[class_] = {}
      elemens_class_ = X[np.arange(len(y))[y == class_], :]
      for j in range(X.shape[1]):
        dist_x_y[class_][j] = {elemens_class_[:, j][i]: sum(elemens_class_[:, j] == elemens_class_[:, j][i]) / len(elemens_class_) for i in range(len(elemens_class_))}
      print("done:", (class_ + 1) / len(classes_))
    self.Prior = Prior
    self.dist_x_y = dist_x_y
    self.classes_ = classes_
      
  def predict(self, x):
    probs = []
    for class_ in self.classes_:   
      try:
        P_x_y = reduce(lambda x, y: x * y, [self.dist_x_y[class_][j][x[j]] for j in range(len(x))])
      except:
        P_x_y = 0
      probs.append(P_x_y * self.Prior[class_]) 

    a = np.argmin(probs)

    return a
  

In [None]:
model = Bayes()

model.fit(X, y)

done: 0.1
done: 0.2
done: 0.3
done: 0.4
done: 0.5
done: 0.6
done: 0.7
done: 0.8
done: 0.9
done: 1.0


In [None]:
from sklearn.naive_bayes import BernoulliNB, MultinomialNB, GaussianNB
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score

model_1 = BernoulliNB()
model_2 = MultinomialNB()
model_3 = GaussianNB()

model_1.fit(X, y)
model_2.fit(X, y)
model_3.fit(X, y)

for ind, data in enumerate([load_digits(), load_breast_cancer()]):
  X, y = data.data, data.target
  results = []
  for model in [model_1, model_2, model_3]:
    results.append(cross_val_score(model, X, y).mean())
  print(f"{ind + 1} dataset :", results)

1 dataset : [0.8241736304549674, 0.8703497369235531, 0.8069281956050759]
2 dataset : [0.6274181027790716, 0.8963204471355379, 0.9385188635305075]


In [None]:
data = load_breast_cancer()
data.data

array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
        1.189e-01],
       [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
        8.902e-02],
       [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
        8.758e-02],
       ...,
       [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
        7.820e-02],
       [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
        1.240e-01],
       [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
        7.039e-02]])