In [16]:
import numpy as np
from sklearn.model_selection import train_test_split

from sklearn import datasets
import matplotlib.pylab as plt

In [17]:
X,y=datasets.make_classification(n_samples=100,n_features=10,n_classes=2,random_state=123)

In [26]:
class NaiveBayes():
    
    def fit(self,X,y):
        
        n_samples,n_features=X.shape
        self._classes=np.unique(y)
        self.classes=len(self._classes)
        
        self._mean=np.zeros((self.classes,n_features),dtype=np.float64)
        self._var= np.zeros((self.classes,n_features),dtype=np.float64)
        self._priors = np.zeros(self.classes,dtype=np.float64)
        
        for idx,c in enumerate(self._classes):
            X_c=X[y==c]
            self._mean[idx,:]=X_c.mean(axis=0)
            self._var[idx,:]=X_c.var(axis=0)
            self._priors[idx]=X_c.shape[0]/float(n_samples)
            
        
    def predict(self,X):
        pred= [self._predict(i) for i in X]
        return np.array(pred)
    
    def _predict(self,x):
        
        posts=[]
        for idx,c in enumerate(self._classes):
            prior=np.log(self._priors[idx])
            conditional=np.sum(np.log(self._pdf(idx,x)))
            post=prior+conditional
            posts.append(post)
        return self._classes[np.argmax(posts)]
            
            
    def _pdf(self, class_idx, x):
        mean = self._mean[class_idx]
        var = self._var[class_idx]
        numerator = np.exp(- (x-mean)**2 / (2 * var))
        denominator = np.sqrt(2 * np.pi * var)
        return numerator / denominator
    
    def accuracy(self,y,y_pred):
        
        acc=(np.sum(y==y_pred)/len(y))*100
        return str(acc)+"%"
        
        
            
            
        
        
        
        
        
        

In [27]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=1234)

naive=NaiveBayes()
naive.fit(X_train,y_train)

In [28]:
y_pred=naive.predict(X_test)
print(naive.accuracy(y_test,y_pred))

90.0%
