In [160]:
class Bayes:    
    @staticmethod
    def mean(X):
        return np.mean(X,axis=0)
    
    @staticmethod
    def variance(X):
        return np.mean((X-Bayes.mean(X))**2,axis=0)
    
    def gaussian(self,x,avg,var): 
        return (1./np.sqrt(2*np.pi*var)) * np.exp(-0.5*((x-avg)**2)/var)
    
    def fit(self,X,y):
        c = len(set(y))
        self.c = c
        Xs = [X[y==i] for i in range(c)]
        
        #各个类别的均值、方差、所占比率
        self.avgs = [Bayes.mean(X) for X in Xs]
        self.vars = [Bayes.variance(X) for X in Xs]
        self.percs = [len(y[y==i])/len(y) for i in range(c)]
        
    def predict(self,x):
        if len(x.shape) == 1:
            result = np.array(self.percs)
            for i in range(self.c):
                gaus = self.gaussian(x,self.avgs[i],self.vars[i])
                for j in range(len(x)):
                    result[i] *= gaus[j]
            return np.argmax(result)
        results = np.array([self.predict(x[i]) for i in range(len(x))])
        return results
    
    def score(self,X,y):
        y_pred = self.predict(X)
        return np.sum(y_pred==y)/len(y)

In [161]:
import numpy as np
import math
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

In [162]:
iris = load_iris()
X,y = iris["data"],iris["target"]
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=22)
print(len(X_train))
print(len(X_test))

112
38


In [163]:
bayes = Bayes()
bayes.fit(X_train,y_train)
print(bayes.avgs)
print(bayes.vars)

[array([4.98461538, 3.42564103, 1.46153846, 0.23846154]), array([5.94324324, 2.76486486, 4.25135135, 1.33243243]), array([6.55555556, 2.95833333, 5.59722222, 2.03888889])]
[array([0.13719921, 0.16857331, 0.03262327, 0.00903353]), array([0.28461651, 0.11903579, 0.24898466, 0.04705625]), array([0.41246914, 0.07854167, 0.30804784, 0.0729321 ])]


In [164]:
bayes.score(X_test,y_test)

0.9736842105263158

In [165]:
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
clf.fit(X_train, y_train)
clf.score(X_test,y_test)

0.9736842105263158