# Naive Bayesian models

### types:
1. Multinomial (for categorical | continuous features)
2. Bernolli (for binary features)
3. Gaussian (for normally distributed features)

### Use cases:
1. Spam detection
2. Credit risk protection
3. Customer classification
4. Health risk protection

### Assumptions:
1. Predictors are independant of each other
2. Past conditions still hold true (can lead to false predictions if the circumstances have changed)

## Setup

In [2]:
import numpy as np
import pandas as pd
import urllib
import sklearn

from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import accuracy_score

In [3]:
from sklearn.naive_bayes import BernoulliNB, GaussianNB, MultinomialNB

## Using Naive Bayes to predict spam

In [5]:
url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/spambase/spambase.data'

import urllib.request

raw_data = urllib.request.urlopen(url)
dataset = np.loadtxt(raw_data, delimiter=',')
print(dataset[0])

[  0.      0.64    0.64    0.      0.32    0.      0.      0.      0.
   0.      0.      0.64    0.      0.      0.      0.32    0.      1.29
   1.93    0.      0.96    0.      0.      0.      0.      0.      0.
   0.      0.      0.      0.      0.      0.      0.      0.      0.
   0.      0.      0.      0.      0.      0.      0.      0.      0.
   0.      0.      0.      0.      0.      0.      0.778   0.      0.
   3.756  61.    278.      1.   ]


In [13]:
X = dataset[:, :48]
y = dataset[:,-1]

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=17)

In [17]:
bernouli = BernoulliNB(binarize=True)
bernouli.fit(X_train, y_train)
print(bernouli)

y_pred = bernouli.predict(X_test)
print(accuracy_score(y_test, y_pred))

BernoulliNB(binarize=True)
0.8577633007600435


In [18]:
multinomial = MultinomialNB()
multinomial.fit(X_train, y_train)
print(multinomial)

y_pred = multinomial.predict(X_test)
print(accuracy_score(y_test, y_pred))

MultinomialNB()
0.8816503800217155


In [19]:
gaussian = GaussianNB()
gaussian.fit(X_train, y_train)
print(gaussian)

y_pred = gaussian.predict(X_test)
print(accuracy_score(y_test, y_pred))

GaussianNB()
0.8197611292073833
