# Import libraries

In [2]:
import numpy as np 
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB, BernoulliNB, MultinomialNB
from sklearn.metrics import accuracy_score

# Load and prepare the iris dataset

In [3]:
# Load the iris dataset
iris = datasets.load_iris()
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df["species"] = iris.target
df["species"] = df["species"].map({0:"sentosa", 1:"versicolor", 2:"vorginica"})
df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),species
0,5.1,3.5,1.4,0.2,sentosa
1,4.9,3.0,1.4,0.2,sentosa
2,4.7,3.2,1.3,0.2,sentosa
3,4.6,3.1,1.5,0.2,sentosa
4,5.0,3.6,1.4,0.2,sentosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,vorginica
146,6.3,2.5,5.0,1.9,vorginica
147,6.5,3.0,5.2,2.0,vorginica
148,6.2,3.4,5.4,2.3,vorginica


* Features = sepal length, sepal width, petal length, petal width
* Target = species (sentosa, versicolor, virginica)

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  150 non-null    float64
 1   sepal width (cm)   150 non-null    float64
 2   petal length (cm)  150 non-null    float64
 3   petal width (cm)   150 non-null    float64
 4   species            150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [5]:
df.describe()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
count,150.0,150.0,150.0,150.0
mean,5.843333,3.057333,3.758,1.199333
std,0.828066,0.435866,1.765298,0.762238
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


# Separate Features (x) and Target (y)

In [6]:
# define independent variable (Features) and dependent variable (Target)
x = df.drop(columns=["species"])  # Features
y = df["species"]  # Target labels

* x contains only the numeric feature columns, while y contains the species.

# Split data intp Train and Test sets (80:20)

In [9]:
# Splitting the data
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Train and calculate GaussianNB

In [18]:
# Initialize and train GaussianNB
gnb = GaussianNB()
gnb.fit(x_train, y_train)

In [17]:
# predict on the test
y_pred_gnb = gnb.predict(x_test)

# calculate accuracy
accuracy = accuracy_score(y_test,y_pred_gnb)
print("GaussianNB Accuracy: ",accuracy)

GaussianNB Accuracy:  1.0


# Train and Evaluate Bernoulli Naive Bayes

* since BernoulliNB is meant for binary data, we convert our dataset to binary form.

In [12]:
# Initialize Bernoulli Naive Bayes model
bnb = BernoulliNB()

# Train the model
bnb.fit(x_train, y_train)

In [13]:
# predict on the test
y_pred_bnb = gnb.predict(x_test)

# calculate accuracy
accuracy = accuracy_score(y_test,y_pred_gnb)
print("BernoulliNB: ",accuracy)

BernoulliNB:  1.0


# Train and Evaluate Multinomial Naive Bayes

In [15]:
# Initialize Multinomial Naive Bayes model
mnb = MultinomialNB()

# Train the model
mnb.fit(x_train, y_train)

In [16]:
# Predict on the test
y_pred_mnb = mnb.predict(x_test)

# calculate accuracy
accuracy = accuracy_score(y_test,y_pred_mnb)
print("MultinomialNB: ",accuracy)

MultinomialNB:  0.9
