In [21]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score
from sklearn.preprocessing import LabelEncoder

In [22]:
# Load the dataset
iris_data = pd.read_csv('/content/Iris.csv')

In [23]:
# Drop the Id column as it's not needed
iris_data = iris_data.drop('Id', axis=1)

In [24]:
# Encode the species labels
label_encoder = LabelEncoder()
iris_data['Species'] = label_encoder.fit_transform(iris_data['Species'])

In [25]:
# Split the data into features (X) and target (y)
X = iris_data.drop('Species', axis=1)
y = iris_data['Species']

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [26]:
# Initialize the Gaussian Naïve Bayes classifier
nb_classifier = GaussianNB()

# Train the classifier
nb_classifier.fit(X_train, y_train)

# Make predictions on the test set
y_pred = nb_classifier.predict(X_test)

In [27]:
# Compute the confusion matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)
error_rate = 1 - accuracy

# For multi-class, we need to specify average method
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')

Confusion Matrix:
[[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]


In [28]:
print("\nMetrics:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Error Rate: {error_rate:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")


Metrics:
Accuracy: 1.0000
Error Rate: 0.0000
Precision: 1.0000
Recall: 1.0000


In [29]:
# For binary classification metrics (if we focus on one class vs rest)
# Let's demonstrate for class 0 (setosa)
tn = cm[1:, 1:].sum()  # True negatives for class 0
fp = cm[0, 1:].sum()    # False positives for class 0
fn = cm[1:, 0].sum()    # False negatives for class 0
tp = cm[0, 0]           # True positives for class 0

print("\nFor class 0 (Setosa):")
print(f"True Positives (TP): {tp}")
print(f"False Positives (FP): {fp}")
print(f"True Negatives (TN): {tn}")
print(f"False Negatives (FN): {fn}")


For class 0 (Setosa):
True Positives (TP): 10
False Positives (FP): 0
True Negatives (TN): 20
False Negatives (FN): 0


In [30]:
# Calculate binary metrics for class 0
binary_accuracy = (tp + tn) / (tp + tn + fp + fn)
binary_precision = tp / (tp + fp) if (tp + fp) != 0 else 0
binary_recall = tp / (tp + fn) if (tp + fn) != 0 else 0

print(f"Binary Accuracy: {binary_accuracy:.4f}")
print(f"Binary Precision: {binary_precision:.4f}")
print(f"Binary Recall: {binary_recall:.4f}")

Binary Accuracy: 1.0000
Binary Precision: 1.0000
Binary Recall: 1.0000
