In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [8]:
df = pd.read_csv('iris.csv')

In [9]:
df

Unnamed: 0,5.1,3.5,1.4,0.2,Iris-setosa
0,4.9,3.0,1.4,0.2,Iris-setosa
1,4.7,3.2,1.3,0.2,Iris-setosa
2,4.6,3.1,1.5,0.2,Iris-setosa
3,5.0,3.6,1.4,0.2,Iris-setosa
4,5.4,3.9,1.7,0.4,Iris-setosa
...,...,...,...,...,...
144,6.7,3.0,5.2,2.3,Iris-virginica
145,6.3,2.5,5.0,1.9,Iris-virginica
146,6.5,3.0,5.2,2.0,Iris-virginica
147,6.2,3.4,5.4,2.3,Iris-virginica


In [10]:
df.columns =['sepal_length', 'sepal_width','petal_length','petal_width','flower_class']
df.loc[len(df.index)]=[5.1,3.5,1.4,0.2,'Iris-setosa'] #Since 1st row was replaced by column names.
new_df = pd.DataFrame(df)
new_df['flower_class'] = df.flower_class.astype('category')
new_df['flower_class']= new_df['flower_class'].cat.codes

In [11]:
new_df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,flower_class
0,4.9,3.0,1.4,0.2,0
1,4.7,3.2,1.3,0.2,0
2,4.6,3.1,1.5,0.2,0
3,5.0,3.6,1.4,0.2,0
4,5.4,3.9,1.7,0.4,0
...,...,...,...,...,...
145,6.3,2.5,5.0,1.9,2
146,6.5,3.0,5.2,2.0,2
147,6.2,3.4,5.4,2.3,2
148,5.9,3.0,5.1,1.8,2


In [12]:
features = new_df.drop(columns=['flower_class']).values
target = new_df['flower_class'].values

In [13]:
print(target)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 0]


In [14]:
from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test = train_test_split(features,target, test_size=.2, )

In [15]:
from sklearn.naive_bayes import GaussianNB

gnb = GaussianNB()
gnb.fit(x_train,y_train)

In [16]:
pred = gnb.predict(x_test)

In [17]:
from sklearn.metrics import accuracy_score

acc  = accuracy_score(pred,y_test)
print(f"Accuracy : {acc*100}")

Accuracy : 100.0


In [18]:
from sklearn.metrics import precision_score, confusion_matrix,recall_score,f1_score

cm = confusion_matrix(y_test, pred)
precision = precision_score(y_test, pred,average='macro')
recall = recall_score(y_test, pred,average='macro')
f1 = f1_score(y_test,pred,average='macro')

In [19]:
print(cm)
print(precision)
print(recall)
print(f1)

[[ 9  0  0]
 [ 0 12  0]
 [ 0  0  9]]
1.0
1.0
1.0


In [20]:
from sklearn.metrics import roc_curve

In [21]:
proba = gnb.predict_proba(x_test)

In [22]:
setosa, virginica, versicolor = new_df.groupby('flower_class')


In [23]:
ytrue_arr= np.array(y_test).reshape(-1,1)
prob_arr= np.array(proba).reshape(-1,1)

In [24]:
df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,flower_class
0,4.9,3.0,1.4,0.2,Iris-setosa
1,4.7,3.2,1.3,0.2,Iris-setosa
2,4.6,3.1,1.5,0.2,Iris-setosa
3,5.0,3.6,1.4,0.2,Iris-setosa
4,5.4,3.9,1.7,0.4,Iris-setosa
...,...,...,...,...,...
145,6.3,2.5,5.0,1.9,Iris-virginica
146,6.5,3.0,5.2,2.0,Iris-virginica
147,6.2,3.4,5.4,2.3,Iris-virginica
148,5.9,3.0,5.1,1.8,Iris-virginica


In [25]:
X,y  = np.array( df.drop(columns=['flower_class'])), np.array(df['flower_class'])

In [26]:
Xtrain,Xtest, Ytrain,Ytest = train_test_split(X,y, test_size=0.2)

In [27]:
classifier = GaussianNB()
y_score = classifier.fit(Xtrain,Ytrain).predict_proba(Xtest)

In [28]:
from sklearn.preprocessing import LabelBinarizer

lb = LabelBinarizer().fit(Ytrain)
y_onehot_test = lb.transform(Ytest)


In [29]:
y_onehot_test.shape

(30, 3)

In [30]:
lb.transform(["Iris-versicolor"])

array([[0, 1, 0]])

In [31]:
class_of_interest = "Iris-virginica"
class_id = np.flatnonzero(lb.classes_ == class_of_interest)[0]
class_id

2

In [32]:
y_score

array([[9.80032780e-089, 9.99465691e-001, 5.34309196e-004],
       [1.98665074e-147, 2.71379329e-002, 9.72862067e-001],
       [1.00000000e+000, 3.26806658e-015, 5.49090718e-024],
       [9.23199498e-130, 7.67680737e-001, 2.32319263e-001],
       [1.01816009e-198, 5.57032482e-007, 9.99999443e-001],
       [2.41799472e-090, 9.99391584e-001, 6.08416304e-004],
       [1.00000000e+000, 1.33516251e-010, 3.16489348e-019],
       [1.00000000e+000, 5.34782649e-015, 3.33823913e-023],
       [2.84080868e-120, 9.40384654e-001, 5.96153464e-002],
       [3.38212775e-187, 9.79293059e-006, 9.99990207e-001],
       [9.17066206e-082, 9.99907562e-001, 9.24382956e-005],
       [1.48240242e-148, 3.97214797e-002, 9.60278520e-001],
       [1.41793609e-151, 4.23347994e-002, 9.57665201e-001],
       [1.00000000e+000, 2.42411517e-017, 4.43488433e-025],
       [1.81766129e-111, 8.46428361e-001, 1.53571639e-001],
       [1.09643604e-225, 1.78489896e-006, 9.99998215e-001],
       [3.06616447e-131, 2.99411046e-001

In [33]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.preprocessing import LabelEncoder, OrdinalEncoder
#from yellowbrick.classifier import PrecisionRecallCurve


In [34]:
X = OrdinalEncoder().fit_transform(new_df.drop(columns=['flower_class']))
encoder  = LabelEncoder()
Y = encoder.fit_transform(target)

In [35]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True)

In [36]:
viz = PrecisionRecallCurve(
    MultinomialNB(), 
    classes = encoder.classes_,
    colors=["purple", "cyan", "blue"],
    iso_f1_curves=True,
    per_class=True,
    micro=False
)
viz.fit(X_train, y_train)
viz.score(X_test, y_test)
viz.show()


NameError: name 'PrecisionRecallCurve' is not defined