In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score

# Step 1: Load the dataset
file_path = r"C:\Users\comp\Documents\DSBDA PRACTICALS\DSBDA\6th_practical\iris.csv"
df = pd.read_csv(file_path)
print("Dataset Preview:")
print(df.head())

# Step 2: Split features and labels
X = df.iloc[:, :-1]  # all columns except the last one
y = df.iloc[:, -1]   # last column (species)

# Step 3: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Train Naïve Bayes model
model = GaussianNB()
model.fit(X_train, y_train)

# Step 5: Make Predictions
y_pred = model.predict(X_test)

# Step 6: Confusion Matrix
cm = confusion_matrix(y_test, y_pred, labels=y.unique())
print("\nConfusion Matrix:")
print(cm)

# Extracting TP, FP, FN, TN from confusion matrix
# This works for binary classification; for multiclass, we compute metrics per class.
# So we’ll just calculate metrics directly using sklearn:
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')  # 'macro' for multi-class
recall = recall_score(y_test, y_pred, average='macro')
error_rate = 1 - accuracy

print(f"\nAccuracy: {accuracy:.2f}")
print(f"Error Rate: {error_rate:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")


Dataset Preview:
   sepal_length  sepal_width  petal_length  petal_width species
0           5.1          3.5           1.4          0.2  setosa
1           4.9          3.0           1.4          0.2  setosa
2           4.7          3.2           1.3          0.2  setosa
3           4.6          3.1           1.5          0.2  setosa
4           5.0          3.6           1.4          0.2  setosa

Confusion Matrix:
[[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]

Accuracy: 1.00
Error Rate: 0.00
Precision: 1.00
Recall: 1.00
