In [20]:
from sklearn.datasets import load_iris
import numpy as np
from pprint import pprint
from sklearn.model_selection import train_test_split
import scipy.stats

class NaiveBayes:
    
    def __init__(self):
        self.means = None
        self.stds = None
        self.y_prob = None
    
    def fit(self, x, y):
        self.n_classes = len(np.unique(y))
        self.n_features = x.shape[-1]
        
        self.means = np.zeros((self.n_classes, self.n_features))
        self.stds = np.zeros((self.n_classes, self.n_features))
        self.y_prob = np.zeros(self.n_classes)
        
        for yi in np.unique(y):
            feat = x[y == yi]
            self.means[yi] = np.mean(feat, 0)
            self.stds[yi] = np.std(feat, 0)
            self.y_prob[yi] = (y == yi).sum() / len(y)
            
                    
    def predict(self, x):
        probs = np.zeros((len(x), self.n_classes))
                
        for i, y in enumerate(range(self.n_classes)):
            prob = np.prod(scipy.stats.norm(self.means[y], self.stds[y]).pdf(x), 1)
            probs[:, i] = prob
            
        probs *= self.y_prob
                               
        y = np.argmax(probs, 1)
                
        return y
    
    
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, test_size=0.5)

nb = NaiveBayes()
nb.fit(X_train, y_train)
y_pred = nb.predict(X_test)

n_err = (y_pred != y_test).sum()
print(n_err)


4
