In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Step 1: Load the Iris dataset
iris = datasets.load_iris()
X = iris.data  # Features
y = iris.target  # Target classes
feature_names = iris.feature_names
target_names = iris.target_names

# Convert to DataFrame and save as CSV
iris_df = pd.DataFrame(data=np.c_[iris['data'], iris['target']],
                      columns=iris['feature_names'] + ['target'])
iris_df['species'] = iris_df['target'].apply(lambda x: target_names[int(x)])
iris_df.to_csv('iris_dataset.csv', index=False)
print("Dataset saved to iris_dataset.csv")

# Step 2: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
print(f"Training set size: {X_train.shape[0]}")
print(f"Testing set size: {X_test.shape[0]}")

# Step 3: Implement Naive Bayesian Classifier (Gaussian Naive Bayes)
gnb = GaussianNB()
gnb.fit(X_train, y_train)

# Step 4: Make predictions on test data
y_pred = gnb.predict(X_test)

# Step 5: Compute accuracy and other metrics
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred, target_names=target_names)

# Display results
print("\nNaive Bayes Classifier Results:")
print(f"Accuracy: {accuracy:.4f}")

print("\nConfusion Matrix:")
print(conf_matrix)

print("\nClassification Report:")
print(class_report)

# Step 6: Display correct and incorrect predictions
correct_predictions = []
wrong_predictions = []

for i in range(len(y_test)):
    true_class = target_names[y_test[i]]
    pred_class = target_names[y_pred[i]]
    if y_test[i] == y_pred[i]:
        correct_predictions.append((i, true_class, pred_class))
    else:
        wrong_predictions.append((i, true_class, pred_class))

print("\nCorrect Predictions (Index, True Class, Predicted Class):")
for pred in correct_predictions:
    print(f"  Sample {pred[0]}: Actual={pred[1]}, Predicted={pred[2]}")

print("\nWrong Predictions (Index, True Class, Predicted Class):")
if wrong_predictions:
    for pred in wrong_predictions:
        print(f"  Sample {pred[0]}: Actual={pred[1]}, Predicted={pred[2]}")
else:
    print("  None - All predictions are correct!")

# Step 7: Visualize the results
plt.figure(figsize=(16, 6))

# Plot actual classes
plt.subplot(1, 2, 1)
plt.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap='viridis', s=60, edgecolor='k')
plt.colorbar(label='Class')
plt.xlabel(feature_names[0])
plt.ylabel(feature_names[1])
plt.title('Actual Classes')
plt.grid(True, alpha=0.3)

# Plot predicted classes
plt.subplot(1, 2, 2)
plt.scatter(X_test[:, 0], X_test[:, 1], c=y_pred, cmap='viridis', s=60, marker='x')
plt.colorbar(label='Class')
plt.xlabel(feature_names[0])
plt.ylabel(feature_names[1])
plt.title('Predicted Classes')
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()
