In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from collections import Counter
from tqdm import tqdm
from sklearn import datasets

In [17]:
df=datasets.load_iris()   
data=df["data"]
data

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [4.9, 3

In [19]:
labels=df["target"]
labels

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [20]:
x_train,x_test,y_train,y_test=train_test_split(data,labels,test_size=0.33)
x_train.shape,x_test.shape,y_train.shape,y_test.shape

((100, 4), (50, 4), (100,), (50,))

In [28]:
class GaussianNaiveBayes():
    
    def __init__(self):
        pass
    
    def prior_probability(self,y_train,label):
        
        c=Counter(y_train)
        return c[label]/y_train.shape[0]
    
    def conditional_probability(self,x_train,y_train,label,feature_no,feature_value):
        
        x_train_filtered=x_train[y_train==label]
        col=x_train_filtered[:,feature_no]
        mean=np.mean(col)
        std=np.std(col)
        
        power=-((feature_value-mean)/(std*(2**0.5)))**2
        
        return (1/(std*((2*np.pi)**0.5)))*(np.e**power)
        
    
    def likelihood(self,x_train,y_train,label,x):                              #x is a single query point
        
        pl=1
        for j in range(x_train.shape[1]):
            
            cp=self.conditional_probability(x_train,y_train,label,j,x[j])
            pl*=cp
        
        return pl
            
    def predict(self,x_train,y_train,x_test):
        
        labels=np.unique(y_train)
        predictions=[]
        
        pp={}
        for label in labels:
            prior_prob=self.prior_probability(y_train,label)
            pp[label]=prior_prob
        
        for x in tqdm(x_test):
            
            max_prob=0
            prob_label=-1
            
            for label in labels:
                likelihood_prob=self.likelihood(x_train,y_train,label,x)
                post_prob=likelihood_prob*pp[label]
                
                if post_prob>max_prob:
                    prob_label=label
                    max_prob=post_prob
                
            predictions.append(prob_label)
            
        return predictions

In [29]:
clf=GaussianNaiveBayes()
clf

<__main__.NaiveBayes at 0x264a4ad3be0>

In [31]:
predictions=clf.predict(x_train,y_train,x_test)
print(predictions)

100%|█████████████████████████████████████████████████████████████████████████████████| 50/50 [00:00<00:00, 737.13it/s]


[0, 2, 1, 1, 1, 0, 1, 2, 0, 1, 1, 1, 2, 2, 0, 2, 0, 2, 1, 2, 1, 0, 2, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 2, 1, 2, 2, 2, 1, 0, 2, 2, 0, 1, 0, 2, 2, 0]


In [35]:
(Counter(predictions-y_test)[0])/y_test.shape[0]

0.98