# Naive Bayes:
https://machinelearningmastery.com/classification-as-conditional-probability-and-the-naive-bayes-algorithm/

In [38]:
# example of generating a small classification dataset
import numpy as np
import pandas as pd
from scipy.stats import norm
from sklearn.datasets.samples_generator import make_blobs
# generate 2d classification dataset
X, y = make_blobs(n_samples=100, centers=2, n_features=2, random_state=1)
#X, y = make_blobs(n_samples=100, centers=3, n_features=3, random_state=1)
# summarize
print(X.shape, y.shape)
print(X[:5])
print(y[:5])

(100, 2) (100,)
[[-0.79415228  2.10495117]
 [-9.15155186 -4.81286449]
 [-3.10367371  3.90202401]
 [-1.42946517  5.16850105]
 [-7.4693868  -4.20198333]]
[0 1 0 0 1]


In [41]:
# fit a probability distribution to a univariate data sample
def fit_distribution(data):
    # estimate parameters
    mu = np.mean(data)
    sigma = np.std(data)
    print(mu, sigma)
    # fit distribution
    dist = norm(mu, sigma)
    return dist

In [68]:
# sort data into classes
Xy0 = X[y == 0]
Xy1 = X[y == 1]
#Xy2 = X[y == 2]
print(Xy0.shape, Xy1.shape)
#print(Xy0.shape, Xy1.shape, Xy2.shape)

(50, 2) (50, 2)


In [69]:
Xy0[:5]

array([[-0.79415228,  2.10495117],
       [-3.10367371,  3.90202401],
       [-1.42946517,  5.16850105],
       [-2.76017908,  5.55121358],
       [-1.17104176,  4.33091816]])

In [70]:
# calculate priors
priory0 = len(Xy0) / len(X)
priory1 = len(Xy1) / len(X)
print(priory0, priory1)

0.5 0.5


In [71]:
# create PDFs for y==0
distX1y0 = fit_distribution(Xy0[:, 0])
distX2y0 = fit_distribution(Xy0[:, 1])
# create PDFs for y==1
distX1y1 = fit_distribution(Xy1[:, 0])
distX2y1 = fit_distribution(Xy1[:, 1])

-1.5632888906409914 0.787444265443213
4.426680361487157 0.958296071258367
-9.681177100524485 0.8943078901048118
-3.9713794295185845 0.9308177595208521


In [45]:
# calculate the independent conditional probability
def probability(X, prior, dist1, dist2):
    return prior * dist1.pdf(X[0]) * dist2.pdf(X[1])

In [76]:
# classify one example
Xsample, ysample = X[0], y[0]

py0 = probability(Xsample, priory0, distX1y0, distX2y0)
py1 = probability(Xsample, priory1, distX1y1, distX2y1)
print('P(y=0 | %s) = %.3f' % (Xsample, py0*100))
print('P(y=1 | %s) = %.3f' % (Xsample, py1*100))
print('Truth: y=%d' % ysample)

P(y=0 | [-0.79415228  2.10495117]) = 0.348
P(y=1 | [-0.79415228  2.10495117]) = 0.000
Truth: y=0


In [77]:
# classify second example:
Xsample2 = [-0.5, 2.5]
print(0.5*distX1y0.pdf(-0.5)*distX2y0.pdf(2.5)*100)
print(0.5*distX1y1.pdf(-0.5)*distX2y1.pdf(2.5)*100)

py0 = probability(Xsample2, priory0, distX1y0, distX2y0)
py1 = probability(Xsample2, priory1, distX1y1, distX2y1)
print('P(y=0 | %s) = %.3f' % (Xsample2, py0*100))
print('P(y=1 | %s) = %.3f' % (Xsample2, py1*100))
#print('Truth: y=%d' % ysample)

0.5615453178620351
3.964526858439656e-33
P(y=0 | [-0.5, 2.5]) = 0.562
P(y=1 | [-0.5, 2.5]) = 0.000


In [79]:
# classify third example:
Xsample3 = [-8.0, -2.0]
print(0.5*distX1y0.pdf(-8.0)*distX2y0.pdf(-2.0)*100)
print(0.5*distX1y1.pdf(-8.0)*distX2y1.pdf(-2.0)*100)

py0 = probability(Xsample3, priory0, distX1y0, distX2y0)
py1 = probability(Xsample3, priory1, distX1y1, distX2y1)
print('P(y=0 | %s) = %.3f' % (Xsample3, py0*100))
print('P(y=1 | %s) = %.3f' % (Xsample3, py1*100))
#print('Truth: y=%d' % ysample)

5.592883203699416e-24
0.17340028597761695
P(y=0 | [-8.0, -2.0]) = 0.000
P(y=1 | [-8.0, -2.0]) = 0.173


In [91]:
# example of gaussian naive bayes
from sklearn.datasets.samples_generator import make_blobs
from sklearn.naive_bayes import GaussianNB

# generate 2d classification dataset
X, y = make_blobs(n_samples=100, centers=2, n_features=2, random_state=1)

# define the model
model = GaussianNB()

# fit the model
model.fit(X, y)

# select a single sample
Xsample, ysample = [X[0]], y[0]
#print([X[0]])

# make a probabilistic prediction
yhat_prob = model.predict_proba(Xsample)
print('Predicted Probabilities: ', yhat_prob)

# make a classification prediction
yhat_class = model.predict(Xsample)
print('Predicted Class: ', yhat_class)
print('Truth: y=%d' % ysample)

Predicted Probabilities:  [[1.00000000e+00 5.52387327e-30]]
Predicted Class:  [0]
Truth: y=0


In [92]:
# prediction for second sample:
Xsample2 = [[-0.5, 2.5]]
yhat_prob2 = model.predict_proba(Xsample2)
print('Predicted Probabilities: ', yhat_prob2)
yhat_class2 = model.predict(Xsample2)
print('Predicted Class: ', yhat_class2)

Predicted Probabilities:  [[1.00000000e+00 7.06004193e-33]]
Predicted Class:  [0]


In [93]:
# prediction for third sample:
Xsample3 = [[-8.0, -2.0]]
yhat_prob3 = model.predict_proba(Xsample3)
print('Predicted Probabilities: ', yhat_prob3)
yhat_class3 = model.predict(Xsample3)
print('Predicted Class: ', yhat_class3)

Predicted Probabilities:  [[3.22542191e-23 1.00000000e+00]]
Predicted Class:  [1]
