In [1]:
# Chapter 18. Naive Bayes

In [3]:
# 18.1 Training a Classifier for Continuous Features
from sklearn import datasets
from sklearn.naive_bayes import GaussianNB

iris = datasets.load_iris()
features, target = iris.data, iris.target

# Create Gaussian naive Bayes object
classifier = GaussianNB()

# train model
model = classifier.fit(features, target)

# Create new observation
new_observation = [[ 4, 4, 4, 0.4]]

# predict class
model.predict(new_observation)

array([1])

In [4]:
# Create Gaussian naive Bayes object with prior probabilities of each class

clf = GaussianNB(priors=[0.25, 0.25, 0.5])

# train model
model = classifier.fit(features, target)

In [10]:
# 18.2 Training a Classifier for Discrete and Count Features
import numpy as np
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import CountVectorizer

# Create text
text_data = np.array(['I love Brazil. Brazil!',
                        'Brazil is best',
                        'Germany beats both'])

# create bag of words
count = CountVectorizer()
bag_of_words = count.fit_transform(text_data)

# create feature matrix
features = bag_of_words.toarray()

# create target vector
target = np.array([0, 0, 1])

# Create multinomial naive Bayes object with prior probabilities of each class
classifier = MultinomialNB(class_prior=[0.25, 0.5])

# train model
model = classifier.fit(features, target)

# Create new observation
new_observation = [[0, 0, 0, 1, 0, 1, 0]]

# predict class
model.predict(new_observation)

array([0])

In [11]:
# 18.3 Training a Naive Bayes Classifier for Binary Features

import numpy as np
from sklearn.naive_bayes import BernoulliNB

# create three binary features
features = np.random.randint(2, size=(100, 3))

# Create a binary target vector
target = np.random.randint(2, size=(100,1)).ravel()

classifier = BernoulliNB(class_prior=[0.25, 0.5])

# train model
model = classifier.fit(features, target)

In [13]:
model_uniform_prior = BernoulliNB(class_prior=None, fit_prior=False)

In [None]:
# 18.4 Calibrating Predicted Probabilities
from sklearn import datasets
from sklearn.naive_bayes import GaussianNB
from sklearn.calibration import CalibratedClassifierCV

iris = datasets.load_iris()
features, target = iris.data, iris.target

classifier = GaussianNB()

# Create calibrated cross-validation with sigmoid calibration
classifier_sigmoid = CalibratedClassifierCV(classifier, cv=2, method='sigmoid')

# Calibrate probabilities
classifier_sigmoid.fit(features, target)

new_observation = [[2.6, 2.6, 2.6, 0.4]]

# View calibrated probabilities
classifier_sigmoid.predict_proba(new_observation)

array([[0.31859971, 0.63663451, 0.04476578]])

In [None]:
# # Train a Gaussian naive Bayes then predict class probabilities
classifier.fit(features, target).predict_proba(new_observation)

array([[2.31548432e-04, 9.99768128e-01, 3.23532277e-07]])