**Implement the classifier model with the training dataset without using smoothing and test it on the test dataset.**

In [13]:
import numpy as np
import pandas as pd
import scipy.stats as stats

In [14]:
class NB():
    def prior(self, features, target):
        self.prior = (features.groupby(target).apply(lambda x: len(x)) / self.rows).to_numpy()
        return self.prior
    
    def parameters(self, features, target):
        self.mean = features.groupby(target).apply(np.mean).to_numpy()
        self.var = features.groupby(target).apply(np.var).to_numpy()
        return self.mean, self.var
    
    def gaussians(self, class_idx, x):     
        mean = self.mean[class_idx]
        var = self.var[class_idx]
        numerator = np.exp((-1/2)*((x-mean)**2) / (2 * var))
        denominator = np.sqrt(2 * np.pi * var)
        prob = numerator / denominator
        return prob
    
    def calc_posterior(self, x):
        posteriors = []
        for i in range(self.count):
            prior = np.log(self.prior[i])
            conditional = np.sum(np.log(self.gaussians(i, x)))
            posterior = prior + conditional
            posteriors.append(posterior)
        return self.classes[np.argmax(posteriors)]
     
    def fit(self, features, target):
        self.classes = np.unique(target)
        self.count = len(self.classes)
        self.feature_nums = features.shape[1]
        self.rows = features.shape[0]   
        self.parameters(features, target)
        self.prior(features, target)
        
    def predict(self, features):
        preds = [self.calc_posterior(f) for f in features.to_numpy()]
        return preds

    def accuracy(self, y_test, y_pred):
        accuracy = np.sum(y_test == y_pred) / len(y_test)
        return accuracy

In [16]:
from keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train=x_train.reshape(60000,784)
x_test=x_test.reshape(10000,784)

In [19]:
# x = NB()
# x.fit(x_train, y_train)

**Implement the classifier model with the training dataset using smoothing function and test it on the test dataset.**

In [7]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import datasets
from sklearn.metrics import classification_report
from tqdm import tqdm
from time import time
from scipy.stats import multivariate_normal as mvn
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

In [8]:
from keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train=x_train.reshape(60000,784)
x_test=x_test.reshape(10000,784)

In [9]:
def NaiveBayes(X_train, y_train, X_test,smoothing):
  curve_data = dict()
  probab_prior= dict()
  training_label = set(y_train)
  #testing_label = set(y_test)
  for c in training_label:
    current_X = X_train[y_train==c]
    curve_data[c] = {
        'mean':current_X.mean(axis=0)+smoothing/2,
        'cov' :current_X.var(axis=0)+smoothing
    }
  
    probab_prior[c] = float(len([y_train==c]))/len(y_train)
    N, D = X_test.shape
    Probabilties = np.zeros((N, len(curve_data))) 
   
    for c, g in curve_data.items():
      mean, cov = g['mean'], g['cov']
      Probabilties[:,c] = mvn.logpdf(X_test, mean=mean, cov=cov) + np.log(probab_prior[c])
    y_hat = np.argmax(Probabilties, axis=1)

  return y_hat

In [4]:
y_pred=NaiveBayes(x_train, y_train, x_test,10)

In [5]:
matrix = confusion_matrix(y_test,y_pred)
print(matrix)

[[ 873    0    8    6    3   12   26    1   47    4]
 [   0 1081    5    5    0    3    7    0   33    1]
 [  16   16  736   41   11    7   74    8  115    8]
 [   5   30   31  775    4   21   22   19   49   54]
 [   6    2   12    3  595   14   28    6   22  294]
 [  20   15    9   86   29  565   24    8   89   47]
 [   8    9   13    2    3   20  884    0   18    1]
 [   0   14   10   11   22    2    3  808   20  138]
 [   8   65   12   31   16   34    8    7  704   89]
 [   4    7    6    8   34    1    0   25   22  902]]


In [6]:
from sklearn.metrics import classification_report
print('\nClassification Report\n')
print(classification_report(y_test, y_pred))


Classification Report

              precision    recall  f1-score   support

           0       0.93      0.89      0.91       980
           1       0.87      0.95      0.91      1135
           2       0.87      0.71      0.79      1032
           3       0.80      0.77      0.78      1010
           4       0.83      0.61      0.70       982
           5       0.83      0.63      0.72       892
           6       0.82      0.92      0.87       958
           7       0.92      0.79      0.85      1028
           8       0.63      0.72      0.67       974
           9       0.59      0.89      0.71      1009

    accuracy                           0.79     10000
   macro avg       0.81      0.79      0.79     10000
weighted avg       0.81      0.79      0.79     10000

