<a href="https://colab.research.google.com/github/AakashAhuja30/Machine-Learning/blob/master/ModelsFromScratch/NaiveBayes_FromScratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import datasets

#Assumption: All features are mutually independent

In [None]:
class NaiveBayes():

  def fit(self,Xtrain,Ytrain):
    n_samples, n_features = Xtrain.shape
    self.classes = np.unique(Ytrain)
    n_classes = len(self.classes)

    self.mean= np.zeros((n_classes,n_features), dtype=np.float64)
    self.var = np.zeros((n_classes,n_features), dtype=np.float64)
    self.priors = np.zeros(n_classes, dtype=np.float64)

    for c in self.classes:
      X_c = Xtrain[c==Ytrain]
      self.mean[c,:] = X_c.mean(axis=0)
      self.var[c,:] = X_c.var(axis = 0)
      self.priors[c] = X_c.shape[0] / float(n_samples)
      

  def predict(self,Xtest):
    y_pred = [self.predictself(x) for x in Xtest]
    return y_pred

  def predictself(self,x):
    posteriors = []
    for idx,c in enumerate(self.classes):
      prior = np.log(self.priors[idx])
      likelihood = np.sum(np.log(self.prob_density(idx,x)))
      posterior = prior + likelihood # plus sign because we took log. therefore multiplication became addition
      posteriors.append(posterior)
    
    return self.classes[np.argmax(posteriors)]


  def prob_density(self,class_indx,x):
    mean = self.mean[class_indx]
    var = self.var[class_indx]
    numerator = np.exp( - (x-mean)**2/(2*var))
    denominator = np.sqrt(2*np.pi*var)
    return numerator / denominator


In [None]:
X,y = datasets.make_classification(n_samples=1000,n_features=10,n_classes=2,random_state=123)
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.2, random_state=123)

In [None]:
nb = NaiveBayes()

In [None]:
nb.fit(X_train,y_train)

In [None]:
predictions = nb.predict(X_test)

In [None]:
def accuracy(y_true, y_pred):
  acc=np.sum(y_true == y_pred) / len(y_true)
  return acc

In [None]:
accu=accuracy(y_test,predictions)
f'accuracy is {accu*100} %'

'accuracy is 96.5 %'