# Gaussian Naive Bayes

In [2]:
from sklearn import datasets
from sklearn.naive_bayes import GaussianNB
import pandas as pd

### 1. Data Loading

In [3]:
iris = datasets.load_iris()
df_x = pd.DataFrame(iris.data)
df_y = pd.DataFrame(iris.target)

In [4]:
df_x.head()

Unnamed: 0,0,1,2,3
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [5]:
df_y.head()

Unnamed: 0,0
0,0
1,0
2,0
3,0
4,0


### 2. Model fiitting

In [9]:
gnb = GaussianNB()
fitted_model = gnb.fit(iris.data, iris.target)
y_pred = fitted_model.predict(iris.data)

In [14]:
prob = fitted_model.predict_proba(iris.data)[[1, 45, 50, 101]]
pred = fitted_model.predict(iris.data)[[1, 45, 50, 101]]

print(prob)
print('-'*50)
print(pred)

[[1.00000000e+000 1.51480769e-017 2.34820051e-025]
 [1.00000000e+000 1.95284044e-016 1.97347120e-024]
 [3.21380935e-109 8.04037666e-001 1.95962334e-001]
 [1.06947698e-152 2.50121636e-002 9.74987836e-001]]
--------------------------------------------------
[0 0 1 2]


### 3. Confusion matrix

In [16]:
from sklearn.metrics import confusion_matrix

In [17]:
confusion_matrix(iris.target, y_pred)

array([[50,  0,  0],
       [ 0, 47,  3],
       [ 0,  3, 47]], dtype=int64)

### 4. Prior 설정

In [19]:
gnb2 = GaussianNB(priors = [1/100, 1/100, 98/100])
fitted_model2 = gnb2.fit(iris.data, iris.target)
y_pred2 = fitted_model2.predict(iris.data)
confusion_matrix(iris.target, y_pred2)

array([[50,  0,  0],
       [ 0, 33, 17],
       [ 0,  0, 50]], dtype=int64)

In [20]:
gnb2 = GaussianNB(priors = [1/100, 98/100, 1/100])
fitted_model2 = gnb2.fit(iris.data, iris.target)
y_pred2 = fitted_model2.predict(iris.data)
confusion_matrix(iris.target, y_pred2)

array([[50,  0,  0],
       [ 0, 50,  0],
       [ 0, 14, 36]], dtype=int64)

# Multinomal naive bayes

In [21]:
from sklearn.naive_bayes import MultinomialNB
import numpy as np

In [23]:
X = np.random.randint(5, size=(6, 100))
y = np.array(range(1, 7))

In [25]:
X, y

(array([[2, 4, 0, 0, 3, 1, 3, 1, 4, 2, 3, 4, 2, 1, 4, 3, 3, 1, 2, 1, 0, 4,
         0, 4, 2, 3, 1, 4, 2, 0, 3, 0, 4, 1, 1, 2, 2, 2, 2, 2, 2, 3, 1, 3,
         4, 2, 0, 4, 0, 2, 0, 0, 0, 4, 4, 1, 3, 1, 3, 4, 2, 2, 1, 1, 0, 3,
         3, 2, 4, 0, 1, 2, 4, 2, 3, 3, 4, 4, 1, 0, 3, 0, 2, 2, 2, 4, 3, 3,
         1, 3, 4, 2, 0, 1, 4, 1, 2, 2, 0, 0],
        [2, 2, 1, 1, 2, 2, 2, 1, 0, 0, 4, 0, 2, 0, 0, 3, 1, 3, 1, 1, 2, 2,
         0, 4, 4, 4, 2, 3, 3, 2, 3, 0, 0, 3, 2, 3, 3, 2, 3, 2, 1, 3, 2, 4,
         4, 1, 0, 2, 3, 4, 0, 3, 2, 0, 0, 2, 3, 1, 2, 0, 3, 2, 3, 3, 1, 0,
         0, 2, 2, 2, 1, 1, 2, 3, 1, 1, 1, 1, 2, 4, 1, 0, 2, 4, 2, 1, 1, 2,
         1, 3, 1, 1, 4, 4, 2, 0, 2, 3, 1, 2],
        [1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 2, 4, 4, 3, 4, 4, 1, 0, 0, 0, 4, 2,
         3, 1, 0, 1, 0, 0, 4, 4, 0, 2, 1, 3, 1, 2, 0, 1, 1, 2, 0, 1, 4, 3,
         1, 1, 1, 3, 2, 0, 3, 4, 0, 2, 3, 3, 3, 2, 2, 4, 3, 1, 2, 2, 4, 2,
         0, 4, 3, 3, 4, 3, 0, 2, 4, 2, 1, 1, 3, 4, 4, 3, 4, 4, 1, 2, 3, 0,
        

### 1. Multinomial naive bayes 모델 생성

In [28]:
clf = MultinomialNB()
clf.fit(X, y)

In [30]:
clf.predict(X[2:3])

array([3])

In [31]:
clf.predict_proba(X[2:3])

array([[2.59772154e-37, 6.64248997e-33, 1.00000000e+00, 3.81994616e-37,
        4.33225353e-36, 8.27592956e-37]])

### 2. Prior 변경

In [32]:
clf2 = MultinomialNB(class_prior = [0.1,0.5,0.1,0.1,0.1,0.1])
clf2.fit(X, y)

In [33]:
clf2.predict_proba(X[2:3])

array([[2.59772154e-37, 3.32124498e-32, 1.00000000e+00, 3.81994616e-37,
        4.33225353e-36, 8.27592956e-37]])