In [31]:
import numpy as np

In [44]:
class GaussianNB:
    
    def fit(self,x_train,y_train):
        
        nSamples , nFeatures = x_train.shape
        self._classes = np.unique(y_train)
        n_classes = len(self._classes)
        
        # calculating mean , var and probability of each class
        self._mean = np.zeros((n_classes,nFeatures),dtype=np.float64)
        self._var = np.zeros((n_classes,nFeatures),dtype=np.float64)
        self._probability = np.zeros(n_classes,dtype=np.float64)
        
        for idx , c in enumerate(self._classes):
            
            x_class = x_train[y_train == c]
            self._mean[idx,:] = x_class.mean(axis=0)
            self._var[idx,:] = x_class.var(axis = 0)
            self._probability[idx] = x_class.shape[0] / nSamples
        
    def predict(self,x_test):
        y_pred = [self._predictHelper(x) for x in x_test]
        return np.array(y_pred)
        
    def _predictHelper(self,x):
        best_prob = []
        for index,c in enumerate(self._classes):
            y_prob = self._probability[index]
            prob = self._pdf(index,x)
            result_prob = np.log(prob) + np.log(y_prob)
            best_prob.append(result_prob)
        return self._classes[np.argmax(best_prob)]
        
    def _pdf(self,cls_index,x):
            
        cls_mean = self._mean[cls_index]
        cls_var = self._var[cls_index]
            
        num = np.exp(-((x-cls_mean)**2)/(2*cls_var))
        deno = np.sqrt(2*np.pi*cls_var)
            
        return np.sum(num/deno)
            

In [1]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix

In [2]:
iris=datasets.load_iris()
x=iris.data
y=iris.target
x_train,x_test,y_train,y_test = train_test_split(x,y)

In [57]:
clf =GaussianNB()
clf.fit(x_train,y_train)

In [58]:
y_pred=clf.predict(x_test)

In [59]:
print(confusion_matrix(y_test,y_pred))

[[16  0  0]
 [ 0 13  0]
 [ 0  0  9]]
