In [14]:
# NAIVE BAYES

import numpy as np

from sklearn import datasets
from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.calibration import CalibratedClassifierCV


In [3]:
# TRAINING A CLASSIFIER FOR CONTINUOUS FEATURES

# load data
iris = datasets.load_iris()
features = iris.data
target = iris.target

# create Gaussian Naive Bayes object
classifier = GaussianNB()
# train model
model = classifier.fit(features, target)

# create new observation
new_observation = [[4, 4, 4, 0.4]]

# predict class
model.predict(new_observation)

array([1])

In [4]:
# create GaussianNB object with prior probabilities of each class
clf = GaussianNB(priors=[0.25, 0.25, 0.5])
# train model
model = clf.fit(features, target)

In [11]:
# TRAINING A CLASSIFIER FOR DISCRETE AND COUNT FEATURES

# create text
text_data = np.array(['I love Brazil. Brazil!',
                     'Brazil is best',
                     'Germany beats both'])

# create bag of words
count = CountVectorizer()
bag_of_words = count.fit_transform(text_data)

# create feature matrix
features = bag_of_words.toarray()

# create target vector
target = np.array([0, 0, 1])

# create multinomial naive bayes with prior prob of each class
classifier = MultinomialNB(class_prior=[0.25, 0.5])
# train model
model = classifier.fit(features, target)

# create new observation 
new_observation = [[0, 0, 0, 1, 0, 1, 0]]
# predict new observation
model.predict(new_observation)

array([0])

In [13]:
# TRAINING A NAIVE BAYES CLASSIFIER FOR BINARY FEATURES

# create three binary features
features = np.random.randint(2, size=(100, 3))

# create a binary target vector
target = np.random.randint(2, size=(100, 1)).ravel()

# create Bernoulli Naive Bayes object with prior probabilities of each class
classifier = BernoulliNB(class_prior=[0.25, 0.5])

# train model
model = classifier.fit(features, target)

In [15]:
# CSLIBRATING PREDICTED PROBABILITIES

# Load data
iris = datasets.load_iris()
features = iris.data
target = iris.target

# create FaussianNB
classifier = GaussianNB()

# Create calibrated cross-validation with sigmoid calibration
classifier_sigmoid = CalibratedClassifierCV(classifier, cv=2, method='sigmoid')

# calibrate probabilities
classifier_sigmoid.fit(features, target)

# create new observation
new_observation = [[2.6, 2.6, 2.6, 0.4]]
# view calibrated probabilities
classifier_sigmoid.predict_proba(new_observation)

array([[0.31859969, 0.63663466, 0.04476565]])