## import libraries

In [2]:
import pandas as pd
import numpy as np
from sklearn.naive_bayes import GaussianNB

### read csv

In [6]:
df_train = pd.read_csv("data/iris/iris_train.csv", delimiter=',', header=None, names=[i for i in range(5)], index_col=False)

In [7]:
df_test = pd.read_csv("data/iris/iris_test.csv", delimiter=',', header=None, names=[i for i in range(5)], index_col=False)

### exploratory

In [8]:
df_train.head(5)

Unnamed: 0,0,1,2,3,4
0,7.7,3.8,6.7,2.2,2
1,5.0,3.4,1.6,0.4,0
2,6.7,3.0,5.0,1.7,1
3,5.9,3.0,4.2,1.5,1
4,6.6,2.9,4.6,1.3,1


In [9]:
df_test.head(5)

Unnamed: 0,0,1,2,3,4
0,5.6,2.9,3.6,1.3,1
1,6.4,3.2,4.5,1.5,1
2,5.1,3.8,1.9,0.4,0
3,4.6,3.2,1.4,0.2,0
4,5.2,3.5,1.5,0.2,0


### get features

In [10]:
x_train = df_train.iloc[:,:-1]
x_train.head(5)

Unnamed: 0,0,1,2,3
0,7.7,3.8,6.7,2.2
1,5.0,3.4,1.6,0.4
2,6.7,3.0,5.0,1.7
3,5.9,3.0,4.2,1.5
4,6.6,2.9,4.6,1.3


In [11]:
x_test = df_test.iloc[:,:-1]
x_test.head(5)

Unnamed: 0,0,1,2,3
0,5.6,2.9,3.6,1.3
1,6.4,3.2,4.5,1.5
2,5.1,3.8,1.9,0.4
3,4.6,3.2,1.4,0.2
4,5.2,3.5,1.5,0.2


### get labels

In [12]:
y_train = df_train.iloc[:,-1:]
y_train.head(5)

Unnamed: 0,4
0,2
1,0
2,1
3,1
4,1


In [13]:
y_test = df_test.iloc[:,-1:]
y_test.head(5)

Unnamed: 0,4
0,1
1,1
2,0
3,0
4,0


### data normalization

#### min max scale

In [14]:
x_train = (x_train-x_train.min())/(x_train.max()-x_train.min())

In [15]:
x_train.head(5)

Unnamed: 0,0,1,2,3
0,0.944444,0.75,0.965517,0.875
1,0.194444,0.583333,0.086207,0.125
2,0.666667,0.416667,0.672414,0.666667
3,0.444444,0.416667,0.534483,0.583333
4,0.638889,0.375,0.603448,0.5


In [16]:
x_test = (x_test-x_test.min())/(x_test.max()-x_test.min())

In [17]:
x_test.head()

Unnamed: 0,0,1,2,3
0,0.363636,0.35,0.45614,0.5
1,0.606061,0.5,0.614035,0.583333
2,0.212121,0.8,0.157895,0.125
3,0.060606,0.5,0.070175,0.041667
4,0.242424,0.65,0.087719,0.041667


### training

In [18]:
classifier = GaussianNB()
classifier.fit(x_train, y_train.values.ravel())

GaussianNB(priors=None, var_smoothing=1e-09)

In [19]:
y_pred = classifier.predict(x_test) 

### evaluate

In [20]:
from sklearn.metrics import accuracy_score, confusion_matrix
print(accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

0.92
[[17  0  0]
 [ 0 15  0]
 [ 0  4 14]]
