In [7]:
import pandas as pd
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from collections import Counter
from matplotlib.colors import ListedColormap
cmap = ListedColormap(['#FF0000','#00FF00','#0000FF'])

In [111]:
class NaiveBayes:
    
    def fit(self,X,y):
        n_samples, n_features = X.shape
        self._classes = np.unique(y)
        n_classes = len(self._classes)
        
        # Initialise mean,variance and priors
        self._mean = np.zeros((n_classes,n_features),dtype=np.float64)
        self._variance = np.zeros((n_classes,n_features),dtype=np.float64)
        self._priors = np.zeros(n_classes,dtype=np.float64)
        
        for c in self._classes:
            # Get array fo each class
            X_c = X[c==y]
            # Get mean for each feature belonging to that class
            self._mean[c,:] = X_c.mean(axis=0)
            # Get variance for each feature belonging to that class
            self._variance[c,:] = X_c.var(axis=0)
            # Get frequency for each class belonging to that class
            self._priors[c] = X_c.shape[0] / float(n_samples)
        
    def predict(self,X):
        y_pred = [self._predict(x) for x in X]
        return y_pred
    
    def _predict(self,x):
        print(x)
        posteriors = []
        for index,c in enumerate(self._classes):
            priors = np.log(self._priors[index])
            print("PDF:",self._pdf(index,x))
            print("Log PDF:",np.log(self._pdf(index,x)))
            class_conditional = np.sum(np.log(self._pdf(index,x)))
            print("Class Conditional of class {}: ".format(c),class_conditional)
            posterior = priors+class_conditional
            print("Posterior of class {}: ".format(c),posterior)
            posteriors.append(posterior)
        print("Argmax: ",np.argmax(posteriors))
        print("Predicted value:",self._classes[np.argmax(posteriors)])
        print("="*100)
        return self._classes[np.argmax(posteriors)]
            
    
    def _pdf(self,class_index,x):
            mean = self._mean[class_index]
            variance = self._variance[class_index]
            numerator = np.exp(-(x-mean)**2/(2*variance))
            denomintor = np.sqrt(2*np.pi*variance)
            return numerator/denomintor

In [112]:
def accuracy(y_true,y_pred):
    return np.sum(y_pred == y_true)/len(y_true)

### Testing Script

In [117]:
X,y = datasets.make_classification(n_samples=1000,n_features=10,n_classes=2,random_state=4)
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=69)

In [118]:
nb = NaiveBayes()
nb.fit(X_train,y_train)

In [119]:
ypred = nb.predict(X_test)

[-3.58876154 -0.86466443 -0.74530967 -2.03079272 -2.44961548 -1.01172433
 -1.3661581  -0.24068597 -0.67335081  0.23837037]
PDF: [0.07279342 0.29237526 0.29000911 0.06170398 0.06285547 0.36836052
 0.39150738 0.38692573 0.3152469  0.40198562]
Log PDF: [-2.62012974 -1.22971715 -1.23784295 -2.7854068  -2.76691726 -0.99869315
 -0.93775092 -0.94952252 -1.15439913 -0.91133896]
Class Conditional of class 0:  -15.591718580592243
Posterior of class 0:  -16.27491543029902
PDF: [0.00463928 0.26868182 0.29579813 0.04498393 0.06786885 0.00154633
 0.00059633 0.4030835  0.31511399 0.38016201]
Log PDF: [-5.37319674 -1.31422743 -1.21807804 -3.10145004 -2.6901781  -6.47186941
 -7.42471351 -0.90861155 -1.15482082 -0.96715778]
Class Conditional of class 1:  -30.624303427986153
Posterior of class 1:  -31.3275009443996
Argmax:  0
Predicted value: 0
[ 1.20258135  0.88133713  0.0414488   0.05183746 -0.39758072  0.62684261
  1.86406593 -0.37038669 -0.55572568 -1.58823696]
PDF: [0.11390603 0.25533659 0.38636339 

Posterior of class 1:  -23.594128300029595
Argmax:  1
Predicted value: 1
[ 0.8669409  -0.47736094 -0.17130171  0.08890635 -0.51382008  0.50556061
  1.60603478 -1.84254762 -0.59967617 -0.04133615]
PDF: [0.1381395  0.37408562 0.3785989  0.38134071 0.27510927 0.1863649
 0.00931192 0.07332639 0.33168911 0.40326445]
Log PDF: [-1.97949121 -0.98327059 -0.97127796 -0.96406206 -1.29058693 -1.68004871
 -4.67646036 -2.61283465 -1.10355715 -0.90816273]
Class Conditional of class 0:  -17.16975234423685
Posterior of class 0:  -17.85294919394363
PDF: [0.25603102 0.34749341 0.38745348 0.41913123 0.25321308 0.97674526
 0.36842811 0.07623661 0.33150143 0.39307796]
Log PDF: [-1.36245668 -1.05700959 -0.9481595  -0.86957122 -1.37352392 -0.0235294
 -0.99850968 -2.57391348 -1.10412316 -0.93374731]
Class Conditional of class 1:  -11.244543928663294
Posterior of class 1:  -11.94774144507674
Argmax:  1
Predicted value: 1
[ 0.27961166 -0.4843685   0.07614127  0.22099078  1.17667938 -0.15404258
 -1.03135362  0.82

Posterior of class 1:  -12.675307277841087
Argmax:  1
Predicted value: 1
[ 0.7684797   0.42213619  0.26605012  0.55550359  1.15672391  0.0673143
 -0.43708833  0.49481779  0.72229707  0.03501304]
PDF: [0.14524758 0.36092905 0.37696139 0.32500534 0.19744989 0.52875454
 0.36341941 0.35256653 0.31641678 0.40617985]
Log PDF: [-1.92931552 -1.01907387 -0.97561251 -1.12391366 -1.62227047 -0.63723096
 -1.01219771 -1.04251594 -1.150695   -0.90095925]
Class Conditional of class 0:  -11.41378488031605
Posterior of class 0:  -12.096981730022828
PDF: [0.25597817 0.36191247 0.38331516 0.34804516 0.19951321 0.77585086
 0.05246557 0.34893816 0.31616546 0.39245693]
Log PDF: [-1.36266313 -1.01635289 -0.95889776 -1.05542304 -1.61187482 -0.25379497
 -2.94759817 -1.05286056 -1.15148961 -0.93532848]
Class Conditional of class 1:  -12.346283428843545
Posterior of class 1:  -13.049480945256992
Argmax:  0
Predicted value: 0
[ 1.34016964 -0.93057753 -2.07946254  1.07503908  1.42466242  0.25736809
 -0.07832251  0

Posterior of class 0:  -13.564756838164195
PDF: [1.41389887e-01 3.14320412e-01 2.44148378e-01 1.42690605e-01
 2.42041757e-01 5.84374374e-02 2.86109440e-05 1.99163651e-01
 2.99937333e-01 3.62484474e-01]
Log PDF: [ -1.95623405  -1.15734239  -1.40997913  -1.9470766   -1.41864502
  -2.83979854 -10.46172126  -1.61362843  -1.20418172  -1.01477364]
Class Conditional of class 1:  -25.023380764873867
Posterior of class 1:  -25.726578281287313
Argmax:  0
Predicted value: 0
[ 2.27047626  2.13793974  0.13407977  0.47640815  0.82683373  0.81085374
  1.69871324 -0.90329951 -0.1927779   0.74704504]
PDF: [0.04925    0.03190056 0.38465663 0.33872265 0.23719001 0.0600479
 0.00703554 0.26516299 0.39675126 0.32446755]
Log PDF: [-3.010846   -3.44513184 -0.95540422 -1.08257364 -1.43889372 -2.81261265
 -4.95678044 -1.32741057 -0.92444576 -1.12556974]
Class Conditional of class 0:  -21.079668586065214
Posterior of class 0:  -21.762865435771992
PDF: [0.16629364 0.04517906 0.39221666 0.36541558 0.23133316 0.523

 -1.24473587 -2.83284116 -0.90160265 -0.99099773]
Class Conditional of class 0:  -14.07338877307884
Posterior of class 0:  -14.756585622785616
PDF: [1.39050154e-02 3.63917728e-01 3.77890498e-01 2.71861403e-01
 1.69914362e-01 2.33136115e-03 3.39377113e-05 6.08028542e-02
 4.05414106e-01 3.42728966e-01]
Log PDF: [ -4.27550568  -1.01082746  -0.97315081  -1.30246289  -1.77246072
  -6.061303   -10.29098373  -2.80011855  -0.90284625  -1.07081533]
Class Conditional of class 1:  -30.460474432903055
Posterior of class 1:  -31.1636719493165
Argmax:  0
Predicted value: 0
[-1.45900507  1.09733927  0.3539112  -1.20521467 -2.31302967 -0.10018174
  0.96478229  2.01611309  1.81251111 -1.39837604]
PDF: [0.20781723 0.20096916 0.36857003 0.20606716 0.07447799 0.65692409
 0.04938078 0.05256093 0.07753075 0.12908392]
Log PDF: [-1.5710963  -1.6046038  -0.99812456 -1.57955314 -2.59725166 -0.42018681
 -3.00819406 -2.94578213 -2.55708065 -2.04729251]
Class Conditional of class 0:  -19.329165616072476
Posterior 

In [67]:
acc = accuracy(y_test,ypred)

In [57]:
acc

0.9