In [42]:
import pandas as pd
import numpy as np

In [43]:
df = pd.read_csv('banknotes.txt', header = None)
df.columns = ['X_0','X_1','X_2','X_3','Y']
X = df[['X_0','X_1','X_2','X_3']].values
y = df['Y'].values

In [44]:
np.pi

3.141592653589793

In [45]:
class Normal:
    def __init__(self, mu, sigma):
        self.mu = mu
        self.sigma = sigma
    
    def prob(self, x):
        prefactor = 1/np.sqrt(2*np.pi*self.sigma*self.sigma)
        exponent = -(1/(2*self.sigma*self.sigma))*(x - self.mu)*(x-self.mu)
        result = prefactor*np.exp(exponent)
        return result

In [54]:
def get_features(X,y):
    feature_parameters = {}
    classes_indices = {}
    pCk = {}
    class_set = list(set(y))
    for c in class_set:
        pCk[c] = (y == c).sum()/len(y)
        classes_indices[c] = np.where(y == c)[0]

    for c,v in classes_indices.items():
        x = X[v]
        temp = {}
        for i,col in enumerate(['X_0','X_1','X_2','X_3']):
            temp[col] = [x[:,i].mean(),x[:,i].std()]
            feature_parameters[c] = temp
    
    return feature_parameters, pCk

In [55]:
class p_x_ck:
    def __init__(self, feature_parameter):
        self.feature_parameter = feature_parameter
        
    def _get_probs(self, ck):
        parameters = list(feature_parameters[ck].values())
        dists = [Normal(p[0],p[1]) for p in parameters]
        return dists
    
    def get(self,datapoint, ck):
        res = 1
        for pairs in list(zip(self._get_probs(ck),datapoint)):
            res*=pairs[0].prob(pairs[1])
        return res

In [69]:
class NaiveBayes:
    def __init__(self):
        self.feature_parameters = None
        self.pCk = None
        self.pck = None
    
    def train(self, X,y):
        self.feature_parameters, self.pCk = get_features(X,y)
        self.pck = p_x_ck(self.feature_parameters)
        
    def predict(self, datapoint, c):
        return pck.get(datapoint,c)*pCk[c]
    

In [70]:
nb = NaiveBayes()
nb.train(X,y)


In [74]:
y_pred = []
for x in X:
    y_pred.append(np.argmax([nb.predict(x,0),nb.predict(x,1)]))

In [77]:
np.array(np.array(y_pred) == y).sum()/len(y)

0.84110787172011658