## **Import libraries**

In [1]:
import numpy as np

from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KernelDensity

## Normal Distribution

In [2]:
class NormalDistribution():
    """Normal(Gaussian) Distribution"""
    def __init__(self, features):
        self.mean = np.mean(features)
        self.std = np.std(features)
    def pdf(self, value):
        """Probability density function"""
        return 1/(self.std * np.sqrt(2 * np.pi)) * np.exp( - (value - self.mean)**2 / (2 * self.std**2))
    def logpdf(self, value):
        """Log Probability density function"""
        return np.log(self.pdf(value))

## **Naive Bayes Classifier**

In [3]:
class NaiveBayesClassifier():
    def __init__(self):
      self.values_dict = {}
      self.features_proba_y_dict = {}
      self.probability_y_dict = {}
      
      
    def fit(self, X, y,  distribution = 'normal'):
      self.unique_labels = np.unique(y)
      self.len_features = X.shape[1]
      for clas in self.unique_labels:
        self.values_dict[clas] = X[y == clas]
        # P(A)
        self.probability_y_dict[clas] = self.values_dict[clas].shape[0] / X.shape[0]
        # P(B|A)
        self.features_proba_y_dict[clas] = {}
        if distribution == 'normal':
          for feature in range(self.len_features):
            self.features_proba_y_dict[clas][feature]  = NormalDistribution(self.values_dict[clas].T[feature])
        else:
          for feature in range(self.len_features):
              try:
                self.features_proba_y_dict[clas][feature]  = distribution(self.values_dict[clas].T[feature])
              except:
                print('Please check your distribution function. It have to get NumPy array of features and create probability for all rows.')
      

    def proba_b_a(self, value, clas):
      psevdo_probability = 1
      for feature in range(self.len_features):
        psevdo_probability *= self.features_proba_y_dict[clas][feature].pdf(value[feature])
      return psevdo_probability


    def predict(self, value):
      a = 0
      #end_sum_of_prob = 0
      max_prob = 0 
      # P(A|B)
      for l in self.unique_labels: 
        a += self.proba_b_a(value,l)*self.probability_y_dict[l]
      for l in self.unique_labels: 

        #end_sum_of_prob += self.proba_b_a(value,l)*self.probability_y_dict[l]/a
        if self.proba_b_a(value,l)*self.probability_y_dict[l]/a > max_prob:
          max_prob = self.proba_b_a(value,l)*self.probability_y_dict[l]/a
          end_answer = l
      #print('Check sum probabilities:' , end_sum_of_prob)
      return end_answer
    def accuracy_score(self,features,targets):
      answers = []
      for f in features:
        answers.append(self.predict(f))
      print(np.array(answers))
      print(np.array(targets))
      return (list(answers == targets).count(True) / len(targets))

        



### **Simple test**

In [4]:
# Simple value
X = np.array([[15,17,19],[4,5,6],[7,8,9],[10,11,12],[13,14,15]])
y = np.array([1,2,2,1,1])

print(np.column_stack([X,y.T]))

[[15 17 19  1]
 [ 4  5  6  2]
 [ 7  8  9  2]
 [10 11 12  1]
 [13 14 15  1]]


In [5]:
#Simple train
model = NaiveBayesClassifier()  
model.fit(X,y,distribution = 'normal')

In [6]:
# Simple test
model.predict([5,3,6])

2

### **Iris test**

In [7]:
dataset = datasets.load_iris()

In [8]:
model.fit(dataset['data'], dataset['target'])

In [9]:
model.predict(np.array([6.1, 2.6, 5.6, 1.4]))

2

### **Kernel Density**

In [10]:
class GaussianKDE:
    def __init__(self, feature):
      self.kde = KernelDensity(bandwidth=1.)
      self.kde.fit(feature.reshape((-1, 1)))
    def logpdf(self, value):
      
      return self.kde.score_samples(value.reshape((-1, 1)))

    def pdf(self, value):
      return np.exp(self.logpdf(value))

In [11]:
model.predict(np.array([6.1, 2.6, 5.6, 1.4]))

2

### **Final Tests**

In [24]:
features_train, features_test, target_train, target_test = train_test_split(dataset['data'], dataset['target'], test_size=0.1)

In [25]:
model = NaiveBayesClassifier()  

In [26]:
model.fit(features_train, target_train)

In [27]:
model.accuracy_score(features_test,target_test)

[2 0 0 1 0 2 0 0 1 2 1 0 2 1 1]
[2 0 0 1 0 2 0 0 1 2 1 0 2 1 1]


1.0

In [28]:
model.fit(features_train, target_train, distribution =GaussianKDE)

In [29]:
model.accuracy_score(features_test,target_test)

[2 0 0 1 0 2 0 0 1 2 1 0 2 1 1]
[2 0 0 1 0 2 0 0 1 2 1 0 2 1 1]


1.0