In [1]:
import pandas as pd
from sklearn import metrics
from sklearn.datasets import load_iris
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split

from Convert import convert_to_dataframe, single_y_test_pred

In [2]:
iris = convert_to_dataframe(load_iris())

In [3]:
iris.sample(10)

Unnamed: 0,sepallength,sepalwidth,petallength,petalwidth,target
37,4.9,3.6,1.4,0.1,setosa
86,6.7,3.1,4.7,1.5,versicolor
69,5.6,2.5,3.9,1.1,versicolor
132,6.4,2.8,5.6,2.2,virginica
68,6.2,2.2,4.5,1.5,versicolor
105,7.6,3.0,6.6,2.1,virginica
134,6.1,2.6,5.6,1.4,virginica
100,6.3,3.3,6.0,2.5,virginica
29,4.7,3.2,1.6,0.2,setosa
57,4.9,2.4,3.3,1.0,versicolor


In [4]:
X = iris.drop(['target'], axis=1)
Y = iris['target']
feature_names = iris.columns.values.tolist()[:-1]
class_names = Y.unique().tolist()
print(feature_names)
print(class_names)

['sepallength', 'sepalwidth', 'petallength', 'petalwidth']
['setosa', 'versicolor', 'virginica']


In [5]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, shuffle=True)

In [6]:
x_train.head()

Unnamed: 0,sepallength,sepalwidth,petallength,petalwidth
129,7.2,3.0,5.8,1.6
34,4.9,3.1,1.5,0.2
58,6.6,2.9,4.6,1.3
4,5.0,3.6,1.4,0.2
149,5.9,3.0,5.1,1.8


In [7]:
clf = GaussianNB()

In [8]:
clf = clf.fit(x_train, y_train)

In [9]:
y_pred = clf.predict(x_test)

In [10]:
print(single_y_test_pred(y_test, y_pred))

    index      target      y_pred
0      42      setosa      setosa
1     111   virginica   virginica
2      49      setosa      setosa
3       9      setosa      setosa
4      25      setosa      setosa
5     141   virginica   virginica
6      78  versicolor  versicolor
7      19      setosa      setosa
8     124   virginica   virginica
9      30      setosa      setosa
10     31      setosa      setosa
11     18      setosa      setosa
12     16      setosa      setosa
13    121   virginica   virginica
14     92  versicolor  versicolor
15     76  versicolor  versicolor
16    145   virginica   virginica
17    140   virginica   virginica
18     75  versicolor  versicolor
19      1      setosa      setosa
20     43      setosa      setosa
21    107   virginica   virginica
22     61  versicolor  versicolor
23     96  versicolor  versicolor
24     51  versicolor  versicolor
25     38      setosa      setosa
26    137   virginica   virginica
27     21      setosa      setosa
28     82  ver

In [11]:
print(metrics.classification_report(y_test, y_pred))

              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        13
  versicolor       1.00      1.00      1.00         9
   virginica       1.00      1.00      1.00         8

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



In [12]:
print("Confusion matrix:")
print(metrics.confusion_matrix(y_test, y_pred, labels=class_names))

Confusion matrix:
[[13  0  0]
 [ 0  9  0]
 [ 0  0  8]]


In [13]:
accuracy_test = metrics.accuracy_score(y_test, y_pred) * 100
accuracy_train = metrics.accuracy_score(y_train, clf.predict(x_train)) * 100

print(f"Accuracy: {round(accuracy_test, 2)}% on Test Data")
print(f"Accuracy: {round(accuracy_train, 2)}% on Training Data")

Accuracy: 100.0% on Test Data
Accuracy: 94.17% on Training Data


In [14]:
clf.score(x_test, y_test)

1.0