# Problem Statement

1. Implement Simple Naïve Bayes classification algorithm using Python/R on iris.csv
dataset.

2. Compute Confusion matrix to find TP, FP, TN, FN, Accuracy, Error rate, Precision, Recall
on the given dataset.

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

# Load the Iris dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
column_names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class']
iris_df = pd.read_csv(url, names=column_names)

# Splitting data into features and target variable
X = iris_df.drop('class', axis=1)
y = iris_df['class']

# Splitting data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

  from pandas.core import (


In [2]:
# 1. Implement Simple Naïve Bayes classification algorithm
# Initialize the Gaussian Naive Bayes classifier
naive_bayes = GaussianNB()

# Fit the model to the training data
naive_bayes.fit(X_train, y_train)

# Predict the classes for the test set
y_pred = naive_bayes.predict(X_test)

In [7]:
import numpy as np

# Counting the number of classes
num_classes = len(np.unique(y_test))

# Computing the confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)

# If binary classification, unpack the values
if num_classes == 2:
    tn, fp, fn, tp = conf_matrix.ravel()
    accuracy = (tp + tn) / (tp + tn + fp + fn)
    error_rate = 1 - accuracy
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    f1 = 2 * (precision * recall) / (precision + recall)
else:
    # For multi-class classification, compute micro-average metrics
    accuracy = accuracy_score(y_test, y_pred)
    error_rate = 1 - accuracy
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')

print("Confusion Matrix:")
print(conf_matrix)
print("True Positives:", conf_matrix[0,0])
print("False Positives:", conf_matrix[0,1])
print("True Negatives:", conf_matrix[1,0])
print("False Negatives:", conf_matrix[1,1])
print("Accuracy:", accuracy)
print("Error Rate:", error_rate)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)


Confusion Matrix:
[[19  0  0]
 [ 0 12  1]
 [ 0  0 13]]
True Positives: 19
False Positives: 0
True Negatives: 0
False Negatives: 12
Accuracy: 0.9777777777777777
Error Rate: 0.022222222222222254
Precision: 0.9793650793650793
Recall: 0.9777777777777777
F1 Score: 0.9777448559670783
