### 1. Implement Naïve Bayes classifier for following datasets and evaluate the classification performance. Draw the confusion matrix, compute accuracy, error and other measures as applicable.

In [10]:
def calculate_class_probabilities(dataset):
    class_probabilities = {}
    total_samples = len(dataset)
    
    for instance in dataset:
        decision = instance[-1]
        if decision not in class_probabilities:
            class_probabilities[decision] = 0
        class_probabilities[decision] += 1
    
    for decision in class_probabilities:
        class_probabilities[decision] /= total_samples
    
    return class_probabilities

def calculate_attribute_probabilities(dataset):
    attribute_probabilities = {}
    total_samples = len(dataset)
    
    for instance in dataset:
        for i in range(len(instance)-1):
            attribute = (i, instance[i], instance[-1])  # (attribute index, attribute value, class)
            if attribute not in attribute_probabilities:
                attribute_probabilities[attribute] = 0
            attribute_probabilities[attribute] += 1
    
    for attribute in attribute_probabilities:
        attribute_probabilities[attribute] /= total_samples
    
    return attribute_probabilities

def predict(class_probabilities, attribute_probabilities, instance):
    predicted_class = None
    max_probability = -1
    
    for decision, class_probability in class_probabilities.items():
        probability = class_probability
        
        for i in range(len(instance)-1):
            attribute = (i, instance[i], decision)
            if attribute in attribute_probabilities:
                probability *= attribute_probabilities[attribute]
        
        if probability > max_probability:
            max_probability = probability
            predicted_class = decision
    
    return predicted_class

def evaluate(predictions, actual):
    confusion_matrix = {'TP': 0, 'TN': 0, 'FP': 0, 'FN': 0}
    
    for pred, act in zip(predictions, actual):
        if pred == 'Yes' and act == 'Yes':
            confusion_matrix['TP'] += 1
        elif pred == 'No' and act == 'No':
            confusion_matrix['TN'] += 1
        elif pred == 'Yes' and act == 'No':
            confusion_matrix['FP'] += 1
        elif pred == 'No' and act == 'Yes':
            confusion_matrix['FN'] += 1
    
    accuracy = (confusion_matrix['TP'] + confusion_matrix['TN']) / (confusion_matrix['TP'] + confusion_matrix['TN'] + confusion_matrix['FP'] + confusion_matrix['FN'])
    error_rate = 1 - accuracy
    
    return confusion_matrix, accuracy, error_rate

Accuracy: 0.0
Confusion Matrix:
[[0 3 0]
 [0 0 0]
 [0 1 0]]
Error Rate: 1.0


### 2.Implement Naïve Bayes classifier for following datasets and evaluate the classification performance. Draw the confusion matrix, compute accuracy, error and other measures as applicable.b. The Iris dataset

In [2]:
import numpy as np
from collections import defaultdict
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

class NaiveBayes:
    def __init__(self):
        self.class_probs = defaultdict(float)
        self.class_feature_probs = defaultdict(lambda: defaultdict(float))

    def fit(self, X, y):
        num_samples = len(y)
        unique_classes = np.unique(y)

        # Calculate class probabilities
        for c in unique_classes:
            self.class_probs[c] = np.sum(y == c) / num_samples

        # Calculate conditional probabilities for each feature given class
        for c in unique_classes:
            X_c = X[y == c]
            num_samples_c = len(X_c)

            for feature in range(X.shape[1]):
                unique_feature_values = np.unique(X[:, feature])

                for value in unique_feature_values:
                    count_feature_given_class = np.sum(X_c[:, feature] == value)
                    self.class_feature_probs[c][value, feature] = count_feature_given_class / num_samples_c

    def predict(self, X):
        predictions = []

        for sample in X:
            max_prob = -1
            predicted_class = None

            for c, class_prob in self.class_probs.items():
                feature_probs = np.array([self.class_feature_probs[c][sample[feature], feature] 
                                          for feature in range(len(sample))])
                prob = np.prod(feature_probs) * class_prob

                if prob > max_prob:
                    max_prob = prob
                    predicted_class = c

            predictions.append(predicted_class)

        return predictions

# Example usage with Iris dataset

# Load Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Instantiate and train Naive Bayes classifier
nb = NaiveBayes()
nb.fit(X_train, y_train)

# Make predictions
predictions = nb.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, predictions)
print("Accuracy:", accuracy)

# Make predictions
predictions = nb.predict(X_test)

# Calculate confusion matrix
conf_matrix = confusion_matrix(y_test, predictions)
print("Confusion Matrix:")
print(conf_matrix)

# Calculate error rate
error_rate = 1 - accuracy
print("Error Rate:", error_rate)



Accuracy: 0.8333333333333334
Confusion Matrix:
[[10  0  0]
 [ 2  6  1]
 [ 2  0  9]]
Error Rate: 0.16666666666666663
