In [149]:
import numpy as np
import pandas as pd

In [389]:
class NaiveBayes:
    def __init__(self):
        self.priors = None
        self.means_0 = None
        self.means_1 = None
        self.sigmas_0 = None
        self.sigmas_1 = None
    
    def fit(self, X, y):
        self.priors = [np.sum(y == 0)/len(y),np.sum(y == 1)/len(y)]
        
        X_0 = np.zeros((X.shape))
        X_1 = np.zeros((X.shape))
        
        for i in range(X.shape[0]):
            if y[i] == 1:
                X_1[i] = X[i]
            else:
                X_0[i] = X[i]
                
        zero_rows = np.all(X_0 == 0, axis=1)
        X_0 = X_0[~zero_rows]
        zero_row = np.all(X_1 == 0, axis=1)
        X_1 = X_1[~zero_row]
        
        self.means_0 = np.mean(X_0, axis =0)
        self.sigmas_0 = np.std(X_0, axis =0)
        
        self.means_1 = np.mean(X_1, axis =0)
        self.sigmas_1 = np.std(X_1, axis =0)
        
        return self.priors, self.means_0, self.means_1, self.sigmas_0, self.sigmas_1
    
    
    def predict(self, X):
        y_pred = np.zeros((X.shape[0],1))
        
        for i in range(X.shape[0]):
            liklihood_0 = 1
            liklihood_1 = 1
            for j in range(X.shape[1]):
                liklihood_0 *= 1/(self.sigmas_0[j] * np.sqrt(2 * np.pi)) * np.exp(-1/2 * ((X[i][j] - self.means_0[j])/self.sigmas_0[j])**2)
                liklihood_1 *= 1/(self.sigmas_1[j] * np.sqrt(2 * np.pi)) * np.exp(-1/2 * ((X[i][j] - self.means_1[j])/self.sigmas_1[j])**2)
            posterior_0 = self.priors[0]*liklihood_0
            posterior_1 = self.priors[1]*liklihood_1
            
            if posterior_0 > posterior_1:
                y_pred[i] = 0
            else:
                y_pred[i] = 1
        
        return y_pred
                
                
                

In [390]:
import pandas as pd
from sklearn.datasets import load_iris
X, y = load_iris(return_X_y=True)

In [391]:
y = y[0:100]

In [392]:
X = X[0:100]

In [393]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

In [394]:
X_train.shape

(75, 4)

In [395]:
y_train.shape

(75,)

In [396]:
clf = NaiveBayes()

In [397]:
clf.fit(X_train,y_train)

([0.49333333333333335, 0.5066666666666667],
 array([5.06486486, 3.45135135, 1.47297297, 0.24054054]),
 array([5.92368421, 2.78684211, 4.26578947, 1.33947368]),
 array([0.33467711, 0.37677265, 0.14823037, 0.09432394]),
 array([0.52437141, 0.31553718, 0.48627278, 0.20201547]))

In [410]:
y_test

array([0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 1, 0])

In [405]:
Y_test = y_test.reshape(-1,1)

In [406]:
pr = clf.predict(X_test)

In [417]:
accuracy = np.mean(pr == Y_test)

In [418]:
accuracy

1.0