**1) Implement Naïve Bayes classifier for following datasets and evaluate the classification
performance. Draw the confusion matrix, compute accuracy, error and other measures as
applicable.
a. The enjoy sports dataset as. given below**

In [8]:
import numpy as np
import pandas as pd
from collections import defaultdict
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

class NaiveBayes:
    def __init__(self):
        self.class_probs = defaultdict(float)
        self.class_feature_probs = defaultdict(lambda: defaultdict(float))

    def fit(self, X, y):
        num_samples = len(y)
        unique_classes = np.unique(y)

        for c in unique_classes:
            self.class_probs[c] = np.sum(y == c) / num_samples

        for c in unique_classes:
            X_c = X[y == c]
            num_samples_c = len(X_c)

            for feature in range(X.shape[1]):
                unique_feature_values = np.unique(X[:, feature])

                for value in unique_feature_values:
                    count_feature_given_class = np.sum(X_c[:, feature] == value)
                    self.class_feature_probs[c][value, feature] = count_feature_given_class / num_samples_c

    def predict(self, X):
        predictions = []

        for sample in X:
            max_prob = -1
            predicted_class = None

            for c, class_prob in self.class_probs.items():
                feature_probs = np.array([self.class_feature_probs[c][sample[feature], feature]
                                          for feature in range(len(sample))])
                prob = np.prod(feature_probs) * class_prob

                if prob > max_prob:
                    max_prob = prob
                    predicted_class = c

            predictions.append(predicted_class)

        return predictions

df = pd.read_csv("weatherdata.csv")

X = df.drop(['Answer'], axis=1).values
y = df['Answer'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

nb = NaiveBayes()
nb.fit(X_train, y_train)

predictions = nb.predict(X_test)

accuracy = accuracy_score(y_test, predictions)
print("Accuracy:", accuracy)

conf_matrix = confusion_matrix(y_test, predictions)
print("Confusion Matrix:")
print(conf_matrix)

error_rate = 1 - accuracy
print("Error Rate:", error_rate)


Accuracy: 0.0
Confusion Matrix:
[[0 3 0]
 [0 0 0]
 [0 1 0]]
Error Rate: 1.0


**2) Implement Naïve Bayes classifier for following datasets and evaluate the classification performance. Draw the confusion matrix, compute accuracy, error and other measures as applicable.b. The Iris dataset**

In [10]:
import numpy as np
from collections import defaultdict
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

class NaiveBayes:
    def __init__(self):
        self.class_probs = defaultdict(float)
        self.class_feature_probs = defaultdict(lambda: defaultdict(float))

    def fit(self, X, y):
        num_samples = len(y)
        unique_classes = np.unique(y)

        # Calculate class probabilities
        for c in unique_classes:
            self.class_probs[c] = np.sum(y == c) / num_samples

        for c in unique_classes:
            X_c = X[y == c]
            num_samples_c = len(X_c)

            for feature in range(X.shape[1]):
                unique_feature_values = np.unique(X[:, feature])

                for value in unique_feature_values:
                    count_feature_given_class = np.sum(X_c[:, feature] == value)
                    self.class_feature_probs[c][value, feature] = count_feature_given_class / num_samples_c

    def predict(self, X):
        predictions = []

        for sample in X:
            max_prob = -1
            predicted_class = None

            for c, class_prob in self.class_probs.items():
                feature_probs = np.array([self.class_feature_probs[c][sample[feature], feature]
                                          for feature in range(len(sample))])
                prob = np.prod(feature_probs) * class_prob

                if prob > max_prob:
                    max_prob = prob
                    predicted_class = c

            predictions.append(predicted_class)

        return predictions

iris = load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

nb = NaiveBayes()
nb.fit(X_train, y_train)

predictions = nb.predict(X_test)

accuracy = accuracy_score(y_test, predictions)
print("Accuracy:", accuracy)

predictions = nb.predict(X_test)

conf_matrix = confusion_matrix(y_test, predictions)
print("Confusion Matrix:")
print(conf_matrix)

error_rate = 1 - accuracy
print("Error Rate:", error_rate)


Accuracy: 0.8333333333333334
Confusion Matrix:
[[10  0  0]
 [ 2  6  1]
 [ 2  0  9]]
Error Rate: 0.16666666666666663
