# Training a Classifier for Continuous Features

In [1]:
from sklearn.datasets import load_iris
from sklearn.naive_bayes import GaussianNB

In [2]:
X,y = load_iris().data, load_iris().target

In [3]:
gaussiannb = GaussianNB()

In [4]:
gaussiannbft= gaussiannb.fit(X,y)

In [7]:
gaussiannbft.predict(X[0].reshape(1,4))

array([0])

In [8]:
gaussiannbft.class_prior_ # by default, priors are set equally for every class

array([0.33333333, 0.33333333, 0.33333333])

In [9]:
gaussiannbft.predict_proba(X[0].reshape(1,4)) # cannot be trusted, needs to be calibrated

array([[1.00000000e+00, 1.35784265e-18, 7.11283512e-26]])

In [58]:
gnb = GaussianNB(priors = [0.25,0.25,0.5]) # reset the priors 

In [12]:
ftgnb= gnb.fit(X,y)

In [13]:
ftgnb.predict(X[100].reshape(1, 4))

array([2])

# Training a Classifier for Discrete and Count Features

In [24]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import CountVectorizer
import numpy as np

In [25]:
text_data = np.array(['I love Brazil. Brazil!', 'Brazil is best', 'Germany beats both'])

In [51]:
model = CountVectorizer(stop_words='english')
data = model.fit_transform(text_data).toarray()
data

array([[0, 0, 2, 0, 1],
       [0, 1, 1, 0, 0],
       [1, 0, 0, 1, 0]])

In [60]:
target = [0,0,1]

In [52]:
model.inverse_transform(data)

[array(['brazil', 'love'], dtype='<U7'),
 array(['best', 'brazil'], dtype='<U7'),
 array(['beats', 'germany'], dtype='<U7')]

In [53]:
model.get_feature_names()

['beats', 'best', 'brazil', 'germany', 'love']

In [71]:
mnb = MultinomialNB(class_prior=[0.25,0.1], alpha=0.5) #  fit_prior=False if you dont want to set priors

In [72]:
ftmnb =mnb.fit(data, target)

In [73]:
ftmnb.predict([[0,0,1,0,1]])

array([0])

In [74]:
ftmnb.class_prior

[0.25, 0.1]

In [75]:
ftmnb.fit_prior

True

In [76]:
ftmnb.classes_

array([0, 1])

In [77]:
ftmnb.alpha

0.5

# Training a Naive Bayes Classifier for Binary Features

In [79]:
from sklearn.naive_bayes import BernoulliNB

In [86]:
data = np.random.randint(2, size=(100,3))
target = np.random.randint(2, size=(100,1)).ravel()

In [91]:
bnb = BernoulliNB(class_prior=None, fit_prior=False, alpha=0.5)

In [90]:
ftbnb= bnb.fit(data, target)

In [92]:
ftbnb.binarize

0.0

In [94]:
ftbnb.predict(data[1].reshape(1,3))

array([1])

# Calibrating Predicted Probabilities

In [98]:
from sklearn.datasets import load_iris
from sklearn.calibration import CalibratedClassifierCV

In [97]:
Xi, yi = load_iris().data, load_iris().target

In [99]:
classifier = GaussianNB()

In [100]:
calibrated = CalibratedClassifierCV(classifier, cv=10, method='sigmoid')

In [101]:
ft = calibrated.fit(Xi,yi)

In [104]:
ft.predict_proba([[2.6, 2.6, 2.6, 0.4]])

array([[0.08280475, 0.82273828, 0.09445697]])

In [105]:
classifier.fit(Xi,yi).predict_proba([[2.6, 2.6, 2.6, 0.4]]) 

array([[2.31548432e-04, 9.99768128e-01, 3.23532277e-07]])