In [1]:
from sklearn.naive_bayes import GaussianNB
from sklearn import datasets
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt

**Data manipulation**

In [2]:
iris = datasets.load_iris()
df = pd.DataFrame(iris.data)
df.columns = iris.feature_names
df['class'] = iris.target
iris.target_names
iris.data[:,0][[1,2]]
iris.target
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


**Splitting dataset on train\test**

In [3]:
def split(df, ratio = 0.77):
    
    msk = np.random.rand(len(df)) < ratio
    train = df[msk]
    test = df[~msk]
    X = test[test.columns[:-1]]
    Y = test[test.columns[-1]]
    return train, test, X, Y

train, test, X, Y = split(df)

**Prior probability, mean, variance for classes**

In [4]:
p_prob = train['class'].value_counts() / len(train)
mean = train.groupby(['class']).mean()
var = train.groupby(['class']).var()

**Classifaction of test data**

In [5]:
def n_bayes(test):   
    probabilities = dict()
    for i in p_prob.index:
        gauss = 1 / (np.sqrt(2 * np.pi * var.ix[i])) * np.exp(-0.5 *((test-mean.ix[i])**2) / var.ix[i])  
    
        probabilities[i] = gauss.product(axis = 1) * p_prob[i]
    
    predictions = list(pd.DataFrame(probabilities).idxmax(axis=1))
    return predictions

In [6]:
prediction = n_bayes(X)
print(prediction)

[0, 0, 0, 0, 0, 0, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2]


**Sklearn comp.**

In [7]:
comp = GaussianNB()
comp.fit(X, Y)
prediction_s = comp.predict(X)
print(prediction_s)

[0 0 0 0 0 0 1 1 1 1 2 1 1 1 1 1 1 1 1 2 2 2 2 2 1 2 2 2]
