In [1]:
import numpy as np
import pandas as pd
from scipy.stats import norm
pdf = norm.pdf

In [2]:
class Bayes():
    
    def __init__(self, priors=None):
        self.means_dict = {}
        self.stds_dict = {}
        self.priors = priors
        self.classes = []
        self.scores = None
    
    def _create_class_stats_dict(self, x_data, y_data):
        for class_ in self.classes:
            self.means_dict[class_] = np.mean(x_data[y_data == class_], axis=0)
            self.stds_dict[class_] = np.std(x_data[y_data == class_], axis=0, ddof=1)
            
    def fit(self, x_data, y_data):
        self.classes, counts = np.unique(y_data, return_counts=True)
        self._create_class_stats_dict(x_data, y_data)
        self.scores = np.empty(shape=(0, len(self.classes)))
        
        if not self.priors:
            self.priors = dict(zip(self.classes, counts/counts.sum()))
            
    def score(self, data):
        for vector in data:
            c_score = []
            for class_ in self.classes:
                score_for_class = self.priors[class_]
                for feature_index, value in enumerate(vector):
                    score_for_class *= pdf(value, loc=self.means_dict[class_][feature_index], 
                                          scale=self.stds_dict[class_][feature_index])
                c_score.append(score_for_class)
            self.scores = np.append(self.scores, [c_score], axis=0)
        return self.scores

    def predict(self, data):
        self.score(data)
        return np.vectorize(lambda x: self.classes[x])(np.argmax(self.scores, axis=1))
    

In [3]:
import seaborn as sns
iris = sns.load_dataset('iris')
iris.head(3)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa


In [4]:
y_iris = iris['species']
iris.drop('species', axis=1, inplace=True)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(iris, y_iris, test_size=.3, random_state=42)

In [5]:
b = Bayes()
b.fit(X_train, y_train)

In [6]:
res = b.predict(X_test.values)

In [7]:
y_test.values == res

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True, False,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True], dtype=bool)

In [8]:
from scipy import stats

In [9]:
from sklearn.naive_bayes import GaussianNB

In [10]:
smodel = GaussianNB()

In [11]:
smodel.fit(X_train, y_train)

GaussianNB(priors=None)

In [13]:
smodel.predict(X_test) == y_test.values

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True, False,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True], dtype=bool)