## Gaussian Naive Bayes

In [1]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

import pandas as pd

iris = pd.read_csv('iris_2D.csv')

# Seperating the data into dependent and independent variables
X_train = iris.iloc[:, :-1].values
y_train = iris.iloc[:, -1].values

In [13]:
X_train[:,0]

array([4.9, 4.7, 4.6, 5. , 5.4, 4.6, 6.4, 6.9, 5.5, 6.5, 5.7, 6.3])

In [14]:
X_train[:,1]

array([3. , 3.2, 3.1, 3.6, 3.9, 3.4, 3.2, 3.1, 2.3, 2.8, 2.8, 3.3])

In [3]:
y_train

array([0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1], dtype=int64)

In [4]:
from sklearn.naive_bayes import GaussianNB
import numpy as np

classifier = GaussianNB()
clf = classifier.fit(X_train, y_train)

In [7]:
# predict
x_test = np.array([[4.1, 2.9]])
clf.predict_proba(x_test)

array([[0.996191, 0.003809]])

In [9]:
clf.class_prior_

array([0.5, 0.5])

In [11]:
clf.var_

array([[0.07888889, 0.09555556],
       [0.22805556, 0.11138889]])

In [12]:
clf.theta_

array([[4.86666667, 3.36666667],
       [6.21666667, 2.91666667]])

## Compute manually

In [16]:
import math

def gaussian_function(data, mean, var):
    return (1.0/np.sqrt(2*math.pi*var)) * np.exp(-(data-mean)**2 / (2*var))

In [20]:
gaussian_function(np.array([[4.1]]), 6.21666667, 0.22805556)

array([[4.52805727e-05]])

In [21]:
gaussian_function(np.array([[2.9]]), 2.91666667, 0.11138889)

array([[1.19384409]])

In [24]:
0.01412*0.5+0.000047*0.5

0.0070835

In [16]:
# compute manually

data_c0 = np.array([4.9, 4.7, 4.6, 5. , 5.4, 4.6])
mean_c0 = data_c0.mean()
var_c0  = data_c0.var()
print(mean_c0)
print(var_c0)      

prob_c0 = gaussian_function(data_c0, mean_c0, var_c0)
print(prob_c0)

1.52
0.1416
[1.00761742 0.40804972 0.893626   0.63670363 0.46995266]


In [17]:
data_c1 = np.array([3.8, 4.1, 3.9, 4.2, 3.4])
mean_c1 = data_c1.mean()
var_c1  = data_c1.var()
print(mean_c1)
print(var_c1)

prob_c1 = gaussian_function(data_c1, mean_c1, var_c1)
print(prob_c1)

3.88
0.07760000000000002
[1.37426395 1.0484362  1.42843305 0.74034542 0.3245289 ]


In [23]:
# predict manually

x_test = np.array([[3.0]])
dpf_c0_x = gaussian_function(x_test, mean_c0, var_c0)
dpf_c1_x = gaussian_function(x_test, mean_c1, var_c1)
print(dpf_c0_x)
print(dpf_c1_x)

# normalize
total = dpf_c0_x*0.5 + dpf_c1_x*0.5
print(total)
dpf_c0_x = dpf_c0_x*0.5/total
dpf_c1_x = dpf_c1_x*0.5/total

# print
print(dpf_c0_x)
print(dpf_c1_x)

[[0.00046381]]
[[0.00974954]]
[[0.00510668]]
[[0.0454125]]
[[0.9545875]]
