In [1]:
import numpy as np
from scipy.stats import norm
class NaiveBayes:
    
    def __init__(self):
        self.class1_prob = []
        self.class2_prob = []
        self.mean1 = 0
        self.mean2 = 0
    
    def get_classes(self, X_train, y_train):
        return X_train[y_train == 0], X_train[y_train == 1]
    
    def get_mean_std(self, class_sample):
        return np.mean(class_sample, axis = 0), np.std(class_sample, axis = 0)
    
    def fit(self, X_train, y_train):
        class1, class2 = self.get_classes(X_train, y_train)
        self.class1_prob = len(class1) / X_train.shape[0]
        self.class2_prob = 1 - self.class1_prob
        self.mean1, self.stddev1 = self.get_mean_std(class1)
        self.mean2, self.stddev2 = self.get_mean_std(class2)
        
    def estimate(self, X_test, mean, stddev):
        return [sum(i) for i in np.log(norm.pdf(X_test, mean, stddev))]
    
    def predict(self, X_test):
        est1 = self.estimate(X_test, self.mean1, self.stddev1)
        est2 = self.estimate(X_test, self.mean2, self.stddev2)
        prob1 = np.exp(est1) * self.class1_prob
        prob2 = np.exp(est2) * self.class2_prob
        y_pred = prob2 > prob1
        return y_pred
    
    def score(self, X_test, y_test):
        y_pred = self.predict(X_test)
        return sum(y_pred == y_test) / X_test.shape[0] 

In [2]:
import pandas as pd
import numpy as np
import math
from sklearn.model_selection import train_test_split
data=pd.read_csv('classification.csv')
data.head(2)
y=data.pop('default')

In [3]:
X = data.drop('ed', axis=1).values

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [5]:
clf = NaiveBayes()

In [6]:
clf.fit(X_train, y_train)

In [7]:
clf.predict(X_test)

array([False, False, False,  True, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
        True, False, False, False,  True, False,  True, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,  True, False, False,  True,  True, False,  True,
       False,  True, False, False, False, False, False, False,  True,
       False, False,  True, False, False, False,  True, False, False,
       False, False, False, False, False, False, False,  True, False,
       False, False, False, False,  True, False, False, False,  True,
       False, False, False, False,  True, False, False, False,  True,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,

In [8]:
clf.score(X_test,y_test)

0.7828571428571428

In [9]:
from sklearn.naive_bayes import GaussianNB
nb = GaussianNB().fit(X_train, y_train)
nb.score(X_test, y_test)

0.7828571428571428