In [1]:
from scipy.stats import norm
from numpy import mean
from numpy import std

In [2]:
# example of generating a small classification dataset
from sklearn.datasets import make_blobs

#generate 2d classification dataset
# X is data and y is label
X,y = make_blobs(n_samples = 100, centers = 2, n_features = 2, random_state=1)

#summarize
print(X.shape, y.shape)
print(X[:5])
print(y[:5])

(100, 2) (100,)
[[-0.79415228  2.10495117]
 [-9.15155186 -4.81286449]
 [-3.10367371  3.90202401]
 [-1.42946517  5.16850105]
 [-7.4693868  -4.20198333]]
[0 1 0 0 1]


In [3]:
#fit a probability distribution to a univariate data sample 
def fit_distribution(data):
    #estimate parameters
    mu = mean(data)
    sigma = std(data)
    print(mu,sigma)
    
    #fit distribution
    dist = norm(mu,sigma)
    
    return dist

In [4]:
#generate 2d classification dataset
X,y = make_blobs(n_samples = 100, centers = 2, n_features = 2, random_state=1)

#sort data into classes
Xy0 = X[y==0]
Xy1 = X[y==1]
print(Xy0.shape, Xy1.shape)

#calculate the priors
priory0 = len(Xy0)/len(X)   #proby0
priory1 = len(Xy1)/len(X)   #proby1
print(priory0,priory1)

#create PDFs for y==0
X1y0 = fit_distribution(Xy0[:,0])
X2y0 = fit_distribution(Xy0[:,1])

#create PDFs for y==1
X1y1 = fit_distribution(Xy1[:,0])
X2y1 = fit_distribution(Xy1[:,1])

(50, 2) (50, 2)
0.5 0.5
-1.5632888906409914 0.787444265443213
4.426680361487157 0.958296071258367
-9.681177100524485 0.8943078901048118
-3.9713794295185845 0.9308177595208521


Example pf preparing and making prediction with a naive bayes model

In [5]:
from sklearn.datasets import make_blobs
from scipy.stats import norm
from numpy import mean
from numpy import std

In [6]:
#fit a probability distribution to a univariate data sample 
def fit_distribution(data):
    #estimate parameters
    mu = mean(data)
    sigma = std(data)
    print(mu,sigma)
    
    #fit distribution
    dist = norm(mu,sigma)
    
    return dist

In [7]:
#calculate the independent conditional probability
def probabilty(X, prior, dist1, dist2):
    return prior * dist1.pdf(X[0]) * dist2.pdf(X[1])

In [8]:
#generate 2d classification dataset
X,y = make_blobs(n_samples = 100, centers = 2, n_features = 2, random_state=1)

#sort data into classes
Xy0 = X[y==0]
Xy1 = X[y==1]
print(Xy0.shape, Xy1.shape)

#calculate the priors
priory0 = len(Xy0)/len(X)   #proby0
priory1 = len(Xy1)/len(X)   #proby1
print(priory0,priory1)

#create PDFs for y==0
distX1y0 = fit_distribution(Xy0[:,0])
distX2y0 = fit_distribution(Xy0[:,1])

#create PDFs for y==1
distX1y1 = fit_distribution(Xy1[:,0])
distX2y1 = fit_distribution(Xy1[:,1])

(50, 2) (50, 2)
0.5 0.5
-1.5632888906409914 0.787444265443213
4.426680361487157 0.958296071258367
-9.681177100524485 0.8943078901048118
-3.9713794295185845 0.9308177595208521


In [9]:
#Classify one example
Xsample, ysample = X[0], y[0]
py0 = probabilty(Xsample,priory0, distX1y0, distX2y0)
py1 = probabilty(Xsample,priory1, distX1y1, distX2y1)

print('P(y=0)| %s) = %.3f' % (Xsample, py0*100))
print('P(y=1)| %s) = %.3f' % (Xsample, py1*100))
print('Truth: y = %d' % ysample)

P(y=0)| [-0.79415228  2.10495117]) = 0.348
P(y=1)| [-0.79415228  2.10495117]) = 0.000
Truth: y = 0


Gaussian Naive Bayes

In [10]:
#Example of guassian naive bayes
from sklearn.datasets import make_blobs
from sklearn.naive_bayes import GaussianNB

In [11]:
#generate 2d classification dataset
X,y = make_blobs(n_samples = 100, centers = 2, n_features = 2, random_state=1)

#define model
model = GaussianNB()

#fit the model
model.fit(X,y)

#select a single sample
Xsample, ysample = [X[0]],y[0]

#make probablistic prediction 
yhat_prob = model.predict_proba(Xsample)
print('Predicted probablities: ',yhat_prob)
#make classification prediction
yhat_class = model.predict(yhat_prob)
print('Predicted Class: ', yhat_class)
print('Truth: y = %d'%ysample)

Predicted probablities:  [[1.00000000e+00 5.52387327e-30]]
Predicted Class:  [0]
Truth: y = 0
