## Import the required packages and libraries

In [1]:
import pandas as pd
import numpy as np
from pandas import Series, DataFrame

import matplotlib.pyplot as plt
import seaborn as sb
from matplotlib import rcParams

import sklearn as sk
from sklearn import datasets
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

%matplotlib inline
rcParams['figure.figsize'] = 10, 5
sb.set_style('whitegrid')
np.set_printoptions(precision=4, suppress=True)

## Import data and setting it up

In [2]:
iris = datasets.load_iris()

In [3]:
df = DataFrame(iris.data, columns=iris.feature_names)
df['class'] =  iris.target
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


## Splitting the data into Input and Output Features

In [4]:
X = df.drop('class', axis=1)
y = df['class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.25, random_state=10)

## Implementation of Naive Bayes using Pandas and Numpy

In [5]:
class NaiveBayes:

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self._classes = np.unique(y)
        n_classes = len(self._classes)
        
        # init mean, var, priors
        self._mean = np.zeros((n_classes, n_features), dtype=np.float64)
        self._var = np.zeros((n_classes, n_features), dtype=np.float64)
        self._priors = np.zeros(n_classes, dtype=np.float64)
        
        for c in self._classes:
            X_c = X[c == y]
            self._mean[c, :] = X_c.mean(axis=0)
            self._var[c, :] = X_c.var(axis=0)
            self._priors[c] = X_c.shape[0] / float(n_samples)
            
    
    def predict(self, X):
        y_preds = [self.__predictHelper__(x) for x in X]
        return np.array(y_preds)
    
    def __predictHelper__(self, X):
        posteriors = []
        
        for i, c in enumerate(self._classes):
            prior = np.log(self._priors[i])
            class_conditional = np.sum(np.log(self.__probability__(i, X)))
            posterior = prior + class_conditional
            posteriors.append(posterior)
        
        return self._classes[np.argmax(posteriors)]
         
        
    def __probability__(self, class_idx, X):
        X = X.astype(float)
        mean = self._mean[class_idx]
        var = self._var[class_idx]
        numerator = np.exp(-(X -  mean) ** 2 / (2 * var))
        denominator = np.sqrt(2 * np.pi * var)
        return numerator / denominator  

In [6]:
gnb = NaiveBayes()
gnb.fit(X_train.values, y_train.values)

In [7]:
gnb_preds = gnb.predict(X_test.values)
gnb_preds

array([1, 2, 0, 1, 0, 1, 1, 1, 0, 1, 1, 2, 1, 0, 0, 2, 1, 0, 0, 0, 2, 2,
       2, 0, 1, 0, 1, 1, 1, 2, 1, 1, 2, 2, 2, 0, 2, 2])

In [8]:
print(classification_report(y_test, gnb_preds))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        11
           1       1.00      1.00      1.00        15
           2       1.00      1.00      1.00        12

    accuracy                           1.00        38
   macro avg       1.00      1.00      1.00        38
weighted avg       1.00      1.00      1.00        38

