In [172]:
import numpy as np
from math import pi
from scipy.special import logsumexp

In [214]:
class MyGaussianNB:
    def __init__(self):
        self.n_classes = None
        self._statistics = {}

    def fit(self, X:np.ndarray=None, y:np.array=None):
        if len(y) != X.shape[0]:
            raise ValueError('X and y should have the same dimension')

        if y is None:
            raise ValueError('y is None')

        len_y = len(y)
        classes = np.unique(y)
        self.n_classes = len(classes)

        for c in classes:
            indexes = (y == c).astype(int)
            fit_data_c = X[indexes,:]
            self._statistics[c] = {
                'means': np.mean(fit_data_c, axis=0),
                'variance': np.var(fit_data_c, axis=0),
                'prior_prob': np.sum(indexes, axis=0) / len_y
            }

    @staticmethod
    def likelihood(X: np.ndarray, m: np.array, v: np.array):
        """
        Calculates likelihood of Gaussian distibution
        """
        coef = 1 / np.sqrt(2 * pi * v)
        return coef * np.exp(-(X - m) ** 2 / (2 * v))

    def predict_proba(self, X: np.ndarray=None):
        probas = []
        for x in X:
            class_prob = []
            for c in self._statistics.values():
                log_feature_likelihoods = np.log(MyGaussianNB.likelihood(x, c['means'], c['variance']))
                prob = np.sum(log_feature_likelihoods, axis=0) + np.log(c['prior_prob'])
                class_prob.append(prob)
            class_prob -= logsumexp(class_prob)
            probas.append(np.exp(class_prob))
        return probas


    def predict(self, X:np.array=None):
        return np.argmax(self.predict_proba(X), axis=1)

In [207]:
X = np.random.normal(size=(100, 10))
y = np.random.randint(2, size=100)

In [228]:
clf = MyGaussianNB()
clf.fit(X, y)
my_probas = clf.predict_proba(X)
my_pred = clf.predict(X)

In [220]:
from sklearn.naive_bayes import GaussianNB

In [227]:
clf_1 = GaussianNB()
clf_1.fit(X, y)
sklearn_probas = clf_1.predict_proba(X)
sklearn_pred = clf_1.predict(X)

In [229]:
from sklearn.metrics import f1_score
print(f1_score(y, my_pred), f1_score(y, sklearn_pred))

0.45652173913043476 0.6972477064220184
