## Naive Bayes Classifiers

In [10]:
import numpy as np
import pandas as pd
import urllib.request

import sklearn
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import accuracy_score
from sklearn.naive_bayes import BernoulliNB, GaussianNB, MultinomialNB

In [11]:
# The spam dataset comes from University of California Irvine. 
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/spambase/spambase.data"

raw_data = urllib.request.urlopen(url)

dataset = np.loadtxt(raw_data, delimiter=',')
print(dataset[0])

[  0.      0.64    0.64    0.      0.32    0.      0.      0.      0.
   0.      0.      0.64    0.      0.      0.      0.32    0.      1.29
   1.93    0.      0.96    0.      0.      0.      0.      0.      0.
   0.      0.      0.      0.      0.      0.      0.      0.      0.
   0.      0.      0.      0.      0.      0.      0.      0.      0.
   0.      0.      0.      0.      0.      0.      0.778   0.      0.
   3.756  61.    278.      1.   ]


In [12]:
x = dataset[:,0:48]
y = dataset[:,-1]

In [13]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=17)

In [14]:
BernNB = BernoulliNB(binarize=True)
BernNB.fit(X_train, y_train)

y_expect = y_test
y_pred = BernNB.predict(X_test)

print(accuracy_score(y_expect, y_pred))

0.8577633007600435


In [15]:
MultiNB = MultinomialNB()
MultiNB.fit(X_train, y_train)

y_expect = y_test
y_pred = MultiNB.predict(X_test)

print(accuracy_score(y_expect, y_pred))

0.8816503800217155


In [16]:
GausNB = GaussianNB()
GausNB.fit(X_train, y_train)

y_expect = y_test
y_pred = GausNB.predict(X_test)

print(accuracy_score(y_expect, y_pred))

0.8197611292073833


In [17]:
BernNB = BernoulliNB(binarize=0.1)
BernNB.fit(X_train, y_train)

y_expect = y_test
y_pred = BernNB.predict(X_test)

print(accuracy_score(y_expect, y_pred))

0.9109663409337676
