In [None]:
# Naive Bayes Classification

# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import pandas as pd

## Importing the dataset

In [None]:
dataset = pd.read_csv('iris.csv')

## Looking at the first 5 values of the dataset

In [None]:
dataset.head()

## Displaying Image

In [None]:
%matplotlib inline
img=mpimg.imread('iris_types.jpg')
plt.figure(figsize=(20,40))
plt.axis('off')
plt.imshow(img)

## Splitting the dataset in independent and dependent variables

In [None]:
X = dataset.iloc[:,:4].values
y = dataset['species'].values

## Splitting the dataset into the Training set and Test set 

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 82)

## Feature Scaling to bring the variabele in a single scale

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

## Custom Naive Bayes Classifier

In [None]:
import numpy as np
class CustomNaiveBayes:
    def fit(self, X, y):
        self.classes = np.unique(y)
        self.mean = np.zeros((len(self.classes), X.shape[1]), dtype=np.float64)
        self.var = np.zeros((len(self.classes), X.shape[1]), dtype=np.float64)
        self.priors = np.zeros(len(self.classes), dtype=np.float64)
        
        for idx, c in enumerate(self.classes):
            X_c = X[y == c]
            self.mean[idx, :] = X_c.mean(axis=0)
            self.var[idx, :] = X_c.var(axis=0)
            self.priors[idx] = X_c.shape[0] / float(X.shape[0])
    
    def _calculate_prior(self, class_idx):
        return np.log(self.priors[class_idx])
    
    def _calculate_likelihood(self, class_idx, x):
        mean = self.mean[class_idx]
        var = self.var[class_idx]
        numerator = np.exp(- (x - mean) ** 2 / (2 * var))
        denominator = np.sqrt(2 * np.pi * var)    
        return np.sum(np.log(numerator / denominator))
    
    def _calculate_posterior(self, x):
        posteriors = []
        for idx, _ in enumerate(self.classes):
            prior = self._calculate_prior(idx)
            likelihood = self._calculate_likelihood(idx, x)
            posterior = prior + likelihood
            posteriors.append(posterior)
        return self.classes[np.argmax(posteriors)]
    
    def predict(self, X):
        y_pred = [self._calculate_posterior(x) for x in X]
        return np.array(y_pred)

## Instantiate and train the custom classifier

In [None]:
custom_nb_classifier = CustomNaiveBayes()
custom_nb_classifier.fit(X_train, y_train)

## Predicting the Test set results with Custom Naive Bayes

In [None]:
y_pred_custom = custom_nb_classifier.predict(X_test)
y_pred_custom

## Comparing actual and predicted values 

In [None]:
y_compare = np.vstack((y_test, y_pred_custom)).T
# actual value on the left side and predicted value on the right side
# printing the top 5 values
y_compare[:5, :]

## Making the Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix
cm_custom = confusion_matrix(y_test, y_pred_custom)
cm_custom

## Finding the accuracy from the confusion matrix

In [None]:
a = cm_custom.shape
corrPred = 0
falsePred = 0

for row in range(a[0]):
    for c in range(a[1]):
        if row == c:
            corrPred += cm_custom[row, c]
        else: 
            falsePred += cm_custom[row, c]

print(f"Correct predictions: {corrPred}")
print(f"False predictions: {falsePred}")
print(f"\n\nAccuracy of the Custom Naive Bayes Classifier is: {corrPred/cm_custom.sum()}")

## Comparing with GaussianNB

In [None]:
# Fitting Naive Bayes Classification to the Training set with linear kernel
from sklearn.naive_bayes import GaussianNB
nvclassifier = GaussianNB()
nvclassifier.fit(X_train, y_train)

In [None]:
# Predicting the Test set results
y_pred = nvclassifier.predict(X_test)
print(y_pred)

In [None]:
#lets see the actual and predicted value side by side
y_compare = np.vstack((y_test,y_pred)).T
#actual value on the left side and predicted value on the right hand side
#printing the top 5 values
y_compare[:5,:]

In [None]:
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print(cm)

In [None]:
#finding accuracy from the confusion matrix.
a = cm.shape
corrPred = 0
falsePred = 0

for row in range(a[0]):
    for c in range(a[1]):
        if row == c:
            corrPred +=cm[row,c]
        else:
            falsePred += cm[row,c]
print('Correct predictions: ', corrPred)
print('False predictions', falsePred)
print ('\n\nAccuracy of the Naive Bayes Clasification is: ', corrPred/(cm.sum()))            


### Comparison of Custom Naive Bayes Classifier and `GaussianNB`
Both the custom Naive Bayes classifier and the `GaussianNB` classifier from `scikit-learn` showed the same performance on the Iris dataset in terms of accuracy and prediction results. This outcome can be attributed to the following reasons: 

1. **Underlying Algorithm:**
   - Both classifiers are based on the Gaussian Naive Bayes algorithm. They calculate the prior probability and conditional probability asusming a Gaussian (normal) distribution for the features. This results in similar probabilistic computations for classification.

2. **Mathematical Equivalence:**
   - The custom implementation of the Naive Bayes classifier follows the same mathematical principles as `GaussianNB`. The calculation of mean, variance, and likelihood of features given the class, and the computation of posterior probabilities, are fundamentally the same in both implementations.

3. **Feature Scaling:**
   - Feature scaling is applied to both classifiers using `StandardScaler` from `scikit-learn`. This ensures that the features are normalised, resulting in similar input data for both classifiers, which contributes to the same classification results.

4. **Data Splitting:**
   - The same training and test datasets are used for both classifiers. The `train_test_split` function from `scikit-learn` ensures that both classifiers are trained and tested on identical data, leading to comparable performance.

5. **Implementation Accuracy**
   - The custom Naive Bayes classifier is implemented correctly, adhering to the theoretical principles of Gaussian Naive Bayes. As long as the implementation is correct, the results should match those of the optimised `GaussianNB` provided by `scikit-learn`.

### Conclusion
WHile the custom Naive Bayes classifier helps in understanding the underlying mechanics of the algorithm, using the `GaussianNB` from `scikit-learn` is generally preferable for practical applications due to its optimisation and robustness. However, the matching performance demonstrates the correctness of the custom implementaion and reinforces the theoretical concepts of the Gaussian Naive Bayes classifier.