In [2]:
import numpy as np
import math
from math import *

class gaussNB():
    
    def fit(self, X_train, y_train):
        
        self.X_train = X_train
        self.y_train = y_train
        
        
        samples_no, no_features = X_train.shape #get the number of rows and columns using the .shape method
        
        
        self.classes_= np.unique(y_train) #get the number of unique classes in our dataset
        
        no_classes = len(self.classes)
        
        #Initialising our mean, variance, and priors
        
        self.mean_ = np.zeros((no_classes, no_features), dtype = float)
        
        self.var_ = np.zeros((no_classes, no_features), dtype = float)
        self.priors_ = np.zeros(no_classes, dtype = float) #for each class we only need one prior, so we'll use a 1-D array
        
        for single_class in self.classes_: #where single class is an entry in our list of classes
            X_class = X_train[single_class == y_train]
            
            #we now calculate the mean for each class by filling this row and all the columns
            self.mean_[single_class, :] = X_class.mean(axis = 0) #axis = 0 shows that we're iterating over the columns
            self.var_[single_class, :] = X_class.var(axis = 0)
            self.priors_[single_class] = (X_class.shape[0]/ float(samples_no)) #X_class.shape[0] gets the number of samples with single_class as the label
            
       
    #We now implement a helper method that will enable us to predict the output of a single column 
    def predictSinglePoint(self, x):
        #calculate the posterior probability
        
        posteriors = []      
        
        for indx, single_class in enumerate(self.classes_): #get the index and class labels using the enumerate function
            prior =np.log(self.priors_[indx]) #we use the priors we calculated in our fit method
        
            #calculate the class conditionals and the prior for each class 
            class_conditional = np.log(self.pd_function(indx, x))
            
            total = np.sum(class_conditional) #sum all our class conditionals
            posterior = prior + total
            
            posteriors.append(posterior)
            
        #choose the class with the highest probability using the inbuilt numpy argmax function
        
        output = self.classes_[np.argmax(posteriors)]
        
        return output
            
    
    #a helper function(probability density function) which will help us with our Gaussian function
    
    def pd_function(self, class_indx, x):
        #use the values we calculated in our fit method
        mean = self.mean_[class_indx]
        
        var = self.var_[class_indx]
        
        #divide the gauss function into numerator and denominator to avoid confusion
        
        num = np.exp(-(math.pow(x-mean,2))/(2*(math.pow(var,2))))
        denum = math.sqrt(2*math.pi*math.pow(var,2))
        
        return num/denum
     
    #this is our main prediction method for predicting multiple samples    
    def predict(self, X_test):
        result = []
        
        for x in X_test:
            prediction = self.predictSinglePoint(x) #here we get the prediction for each given row
            result.append(prediction)               #append the prediction to our list of results
        
        return result
    
    def acc_score(self, X_test, y_test):
        
        y_predicted = self.predict(X_test)
        score = float(np.sum(y_predicted==y_test)/len(y_test))
        
        return score
        

In [3]:
class multiNB():
    
    def __init__(self, alpha=1):
        self.alpha = alpha
        
    def fit(self, X_train, y_train):
        
        samples_no, no_features = X_train.shape #get the number of rows and columns using the .shape method
        
        
        self.classes_= np.unique(y_train) #get the number of unique classes in our dataset
        
        no_classes = len(self.classes)
        
        #inititalising our priors and likelihoods
        
        self.priors_ = np.zeros(no_classes)
        self.likelihoods_ = np.zeros((no_classes, no_features))
        
        #here we're finding our priors and likelihoods
        
        for indx, single_class in enumerate(self.classes_):
            X_train_clss = X_train[single_class == y_train]
            self.priors_[single_class] = (X_class.shape[0]/ float(samples_no)) #X_class.shape[0] gets the number of samples with single_class as the label
            self.likelihoods_[indx, :] = ((X_train_clss.sum(axis=0)) + self.alpha) / (np.sum(X_train_clss.sum(axis=0) + self.alpha)) #we include alpha here to smooth our table of likelihoods
            
      
    def predictSinglePoint(self, x):
        #calculate the posterior probability
        
        posteriors = []      
        
        for indx, single_class in enumerate(self.classes_): #get the index and class labels using the enumerate function
            prior =np.log(self.priors_[indx]) #we use the priors we calculated in our fit method
        
            #calculate the likelihood and the prior for each class 
            likelihood_class = np.log(self.likelihoods_[indx, :]) * x
            
            total = np.sum(likelihood_class) #sum all our class likelihoods
            posterior = prior + total
            
            posteriors.append(posterior)
            
        #choose the class with the highest probability using the inbuilt numpy argmax function
        
        output = self.classes_[np.argmax(posteriors)]
        
        return output
    
    def predict(self, X_test):
        result = []
        
        for x in X_test:
            prediction = self.predictSinglePoint(x) #here we get the prediction for each given row
            result.append(prediction)               #append the prediction to our list of results
        
        return result
    
    def acc_score(self, X_test, y_test):
        
        y_predicted = self.predict(X_test)
        score = float(np.sum(y_predicted==y_test)/len(y_test))
        
        return score