In [2]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd

In [7]:
def create_data():
    iris = load_iris()
    data = pd.DataFrame(iris.data, columns=[i.split(' (')[0].replace(' ','_') for i in iris.feature_names])
    data['label'] = iris.target
    X = np.array(data.iloc[:,:-1])
    y = np.array(data.iloc[:,-1])
    y = np.array([1 if i==0 else -1 for i in y])
    return X, y

In [8]:
X, y = create_data()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2)

In [12]:
X_train[0],y_train[0]

(array([7.6, 3. , 6.6, 2.1]), -1)

In [290]:
import math
from functools import reduce
class NaiveBayes(object):
    def __init__(self):
        self.data = None
    
    def mean(self, X):
        return sum(X) / float(len(X))
    
    def stdev(self, X):
        avg = self.mean(X)
        return np.sqrt( sum([(x-avg)**2 for x in X]) / float(len(X)) )
    
    def separateByClass(self, X, y):
        labels = list(set(y))
        separated = {i:[] for i in labels}
        for label, value in zip(y, X):
            separated[label].append(value)
        return separated
    
    def calculateProbability(self, x, mean, stdev):
        return (1 / (math.sqrt(2*math.pi) * stdev)) * np.exp( -(x-mean)**2 / (2 * stdev**2) )
    
    def calculateClassProbabilities(self, x, mean, stdev):
        return reduce(lambda x,y: x*y, self.calculateProbability(x ,mean, stdev))
    
    def fit(self, X, y):
        self.rdim, self.cdim = X.shape
        self.data = self.separateByClass(X, y)
        
    def predict(self, x):
        if not self.data: raise Exception('the model need train data')
        probability = {label:self.calculateClassProbabilities(x ,self.mean(value),self.stdev(value)) \
                for label,value in self.data.items()}
        return list(probability.keys())[list(probability.values()).index(max(probability.values()))]

    def score(self, X_test, y_test):
        right_cnt = [1 for X, y in zip(X_test, y_test) if y == self.predict(X)]
        return sum(right_cnt)/float(len(X_test))

In [294]:
model = NaiveBayes()
model.fit(X,y)
model.predict(X_test[2])

1

In [292]:
model.score(X_test, y_test)

1.0