In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from scipy.stats import multivariate_normal as mvn
from scipy.stats import multinomial as mnd
from sklearn.naive_bayes import MultinomialNB
from scipy.stats import bernoulli as ber

%matplotlib inline

In [1]:
class Bayes():
    def __init__(self, dist):
        self.dist = dist
    
    def fit(self, X, y, naive = True):
        self.likelihoods = dict()
        self.priors = dict()
        
        self.K = set(y.astype(int))
        
        if self.dist == 'GaussianBayes':
            for k in self.K:
                X_k = X[y == k,:]
                mu_k = X_k.mean(axis=0)
                N_k, D = X_k.shape
                
                if naive == False:
                    self.likelihoods[k] = {"mean": mu_k, "cov":cov((X_k).T, X_k - mu_k) }  #made changes in mean and cov
          
                    self.priors[k] = len(X_k)/len(X)
                else:
                    self.likelihoods[k] = {"mean": mu_k, "cov":X_k.var(axis=0) }  #made changes in mean and cov
                    self.priors[k] = len(X_k)/len(X)
        
        if self.dist == 'MultiNomial':
            for k in self.K:
                X_k = list(X[y == k,:])
                self.likelihoods[k] = {"n":len(X_k), "probability": [np.sum(X_k, axis = 0)/sum(np.sum(X_k, axis = 0))]}
                self.priors[k] = len(X_k)/len(X)
                
        if self.dist == 'Bernoulli':
            for k in self.K:
                X_k = X[y == k,:]
                self.likelihoods[k] = np.mean(X_k,axis = 0)
                self.priors[k] = len(X_k)/len(X)
                
    def predict(self, X):
        N, D = X.shape
        
        P_hat = np.zeros((N,len(self.K)))
            
        if self.dist == 'GaussianBayes':
            for k, l in self.likelihoods.items():
                P_hat[:,k] = mvn.logpdf(X, l["mean"], l["cov"]) + np.log(self.priors[k])
                
        if self.dist == 'MultiNomial':        
            for k, l in self.likelihoods.items():
                P_hat[:,k] = mnd.logpmf(X, 1, l["probability"]) + np.log(self.priors[k])
                
        if self.dist == 'Bernoulli':
            for k,l in self.likelihoods.items():
                P_hat[:,k] = ber.logpmf(X, list(self.likelihoods[k]), loc = 0) + np.log(self.priors[k])
            
        return P_hat.argmax(axis = 1)
    
    