In [154]:
from sklearn import datasets
from sklearn.naive_bayes import GaussianNB
import matplotlib.pyplot as plot

from ipywidgets import interact
import ipywidgets as widgets

In [155]:
iris = datasets.load_iris()
print(iris.data[1:15])
print("...")

[[4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]
 [5.4 3.9 1.7 0.4]
 [4.6 3.4 1.4 0.3]
 [5.  3.4 1.5 0.2]
 [4.4 2.9 1.4 0.2]
 [4.9 3.1 1.5 0.1]
 [5.4 3.7 1.5 0.2]
 [4.8 3.4 1.6 0.2]
 [4.8 3.  1.4 0.1]
 [4.3 3.  1.1 0.1]
 [5.8 4.  1.2 0.2]]
...


In [156]:
gnb = GaussianNB()
gnb.fit(iris.data, iris.target)

print("Апостерирорные вероятности =", gnb.class_prior_)
print("Мат ожидания =\n", gnb.theta_)
print("Дисперсии =\n", gnb.sigma_)

Апостерирорные вероятности = [0.33333333 0.33333333 0.33333333]
Мат ожидания =
 [[5.006 3.428 1.462 0.246]
 [5.936 2.77  4.26  1.326]
 [6.588 2.974 5.552 2.026]]
Дисперсии =
 [[0.121764 0.140816 0.029556 0.010884]
 [0.261104 0.0965   0.2164   0.038324]
 [0.396256 0.101924 0.298496 0.073924]]


In [157]:
def show_feature(y, feature):
    feature_by_y = iris.data[list(map(lambda t: t == y, iris.target)), feature]

    d = {}
    for feat in feature_by_y:
        cnt = d.get(feat, 0)
        d[feat] = cnt + 1

    plot.rcParams['figure.figsize'] = (15, 2)
    plot.figure()
    for (key, value) in d.items():
        plot.plot(key, 0, 'bo', markersize=value * 2)

    plot.plot(gnb.theta_[y,feature], 0, 'ro', markersize=8)

    plot.show()

    print("Мат. ожидание =", str(gnb.theta_[y,feature]))
    print("Дисперсия =", str(gnb.sigma_[y,feature]))
    
interact(show_feature, 
                y = widgets.IntSlider(min = 0, max = 2, value = 0),
                feature = widgets.IntSlider(min = 0, max = 3, value = 0),
               );

interactive(children=(IntSlider(value=0, description='y', max=2), IntSlider(value=0, description='feature', ma…

In [158]:
y_predict = gnb.predict(iris.data)

print("Количество неправильно распознанных ирисов из %d = %d"
       % (iris.data.shape[0],(iris.target != y_predict).sum()))

Количество неправильно распознанных ирисов из 150 = 6


In [160]:
def show_2_features(feat1, feat2):
    data = iris.data[:,[feat1, feat2]]

    gnb2 = GaussianNB()
    gnb2.fit(data, iris.target)

    y_predict2 = gnb2.predict(data)

    print("Количество неправильно распознанных ирисов из %d = %d"
           % (data.shape[0],(iris.target != y_predict2).sum()))

    def get_by_y(y):
            return data[list(map(lambda t: t == y, iris.target))]

    plot.rcParams['figure.figsize'] = (15, 10)
    plot.figure()

    colors = ['r', 'b', 'g']
    for y in range(0, 3):
        data_y = get_by_y(y)
        plot.plot(data_y[:,0], data_y[:,1], 'o', color = colors[y], markersize=10)

    errors = data[iris.target != y_predict2]
    plot.plot(errors[:,0], errors[:,1], 'w*', markersize=10)

    plot.show()
    
interact(show_2_features, 
                feat1 = widgets.IntSlider(min = 0, max = 3, value = 0),
                feat2 = widgets.IntSlider(min = 0, max = 3, value = 1),
               );

interactive(children=(IntSlider(value=0, description='feat1', max=3), IntSlider(value=1, description='feat2', …