In [1]:
import math
import numpy as np

In [None]:
class GaussianNB:
    '''
    Implementation of Gaussian Naive Bayes Classifier
    
    Assumptions
    -> Each Feature is uncorrelated from each other
    -> The values of the features are normally distributed.
    
    INPUT:
    var_smoothing -> Float:
        Portion of the largest Variance of all features that is added to the variances for calculation Stability
    '''
    def __init__(self, var_smoothing=True):
        self.var_smoothing = var_smoothing
        
    def fit(self, X, y):
        '''
        Fit the Gaussian Naive Bayes according to X, y
        
        INPUT
        1. X: List, size => [n_samples, n_features]
            Training Vectors
        
        2. y: array-like, [n_samples,]
            Target Values
        '''
        self.X, self.y = X, y
        self.classes = np.unique(y)
        self.parameters = []
        
        """
        If the ratio of data variance b/w dimensions is too small, it will cause numerical errors. 
        
        To address this, we artificially boost the variance by epsilon, a small of the standard deviation
        of the largest dimension. 
        """
        self.epsilon = self.var_smoothing * np.var(X, axis=0).max()
        
        # Calculate the mean and variance of each feature for each class
        for i, c in enumerate(self.classes):
            # Select only the rows where the label equals the given class 
            X_class = X[np.where(y == c)]
            self.parameters.append([])
            
            for col in X_class.T:
                parameters = {'mean': col.mean(), 'var': col.var()}
                self.parameters[i].append(parameters)
    
    def 