# Naive Bayes classifier

In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
import math  # This library is to use normal math functions like exp,sin etc.

In [None]:
# Mounting drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Importing data

In [None]:
#below where the file is in gdrive, change with your
data_path = "/content/drive/MyDrive/Colab Notebooks/PRNN_A1/Prnn_datasets/"
dataset = np.loadtxt(data_path + 'PCA_MNIST.csv', delimiter=',',skiprows=1)

In [None]:
dataset.shape

(60000, 11)

First column consists of class information

In [None]:
# Normalising dataset needed to prevent exponent going to zero
for i in range(1,dataset.shape[1]):
  dataset[:,i] = (dataset[:,i]-dataset[:,i].min())/(dataset[:,i].max()-dataset[:,i].min())

## Splitting into test and train dataset

In [None]:
count = 40000

In [None]:
X_train = dataset[0:count,1:]

In [None]:
Y_train =dataset[0:count,0]

In [None]:
X_test = dataset[count:,1:]

In [None]:
Y_test = dataset[count:,0]

In [None]:
classes = 10 # Here number of classes is 10 change it according to your dataset

In [None]:
features = X_train.shape[1]

## Constructing Naive Bayes Class

In [None]:
class Naive_Bayes:
  def __init__(self,features=10,classes=10):
    self.features = features
    self.classes = classes

    mean=[]   # 10 features' mean for each classes
    for i in range(self.classes):
      mean.append(np.zeros(self.features))
    self.mean = mean

    var=[]   # 10 features' mean for each classes
    for i in range(self.classes):
      var.append(np.ones(self.features))
    self.var = var

  def train_mean(self,X,Y):    # Finds mean keeps variance constant
    # creating training datas for different class
    lst = []
    mean = self.mean
    for i in range(self.classes):
      temp = X[np.where(Y[:]==i)]
      lst.append(temp)

    for i in range(self.classes):
      a=lst[i]
      a = a.mean(axis=0)
      mean[i]=(a)
    self.mean = mean
  
  def train(self,X,Y):    # Finds both mean and variance
    # creating training datas for different class
    lst = []
    mean = self.mean
    var = self.var
    for i in range(self.classes):
      temp = X[np.where(Y[:]==i)]
      lst.append(temp)

    for i in range(self.classes):
      a=lst[i]
      a = a.mean(axis=0)
      mean[i]= a
    self.mean = mean

    var = self.var
    for i in range(self.classes):
      var[i]=np.sqrt(np.sum((lst[i]-mean[i])**2,axis=0)/lst[i].shape[0])
    self.var=var
  
  def test(self,X,Y):
    conf_matrix = np.zeros((self.classes,self.classes))
    count = 0
    mean = self.mean
    var = self.var
    for i in range(X.shape[0]):
      g = []  #class conditionals
      for j in range(self.classes):
        prod = np.prod(var[j])
        v = (X[i]-mean[j])/var[j]
        v = np.power(v,2)
        k = 0.5 * np.sum(v)
        k = math.exp(-k)/prod
        g.append(k)
      k = g.index(max(g))
      if k==int(Y_test[i]):
        count=count+1
      v = int(Y_test[i])
    conf_matrix[v][k] += 1
    acc = count*100/X.shape[0]
    return acc,conf_matrix
    
  def predict(self,X):
    mean = self.mean
    var = self.var
    Y_pred = np.zeros(X.shape[0])
    for i in range(X.shape[0]):
      g = []  #class conditionals
      for j in range(self.classes):
        prod = np.prod(var[j])
        v = (X[i]-mean[j])/var[j]
        v = np.power(v,2)
        k = 0.5 * np.sum(v)
        k = math.exp(-k)/prod
        g.append(k)
      k = g.index(max(g))
      Y_pred[i]=k
    return(Y_pred)

## Testing the class code on example dataset with both mean and var update

In [None]:
nb = Naive_Bayes()

In [None]:
nb.train(X_train,Y_train)

In [None]:
acc,conf = nb.test(X_test,Y_test)

In [None]:
print("acc is :",acc)

acc is : 88.62


In [None]:
y_pred = nb.predict(X_test)

## Testing the class code only on mean update

In [None]:
nb = Naive_Bayes()

In [None]:
nb.train_mean(X_train,Y_train)

In [None]:
acc,conf = nb.test(X_test,Y_test)

In [None]:
print("acc is :",acc)

acc is : 85.84


In [None]:
y_pred = nb.predict(X_test)