In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

class NaiveBayesClassifier:
    def __init__(self):
        self.class_probs = {}
        self.feature_probs = {}

    def fit(self, X, y):
        num_samples, num_features = X.shape
        unique_classes = np.unique(y)

        for c in unique_classes:
            # Calculate class probabilities
            self.class_probs[c] = np.sum(y == c) / num_samples

            # Calculate feature probabilities for each class
            features_given_class = X[y == c]
            self.feature_probs[c] = np.sum(features_given_class, axis=0) / np.sum(y == c)

    def predict(self, X):
        predictions = []

        for sample in X:
            class_scores = {}

            for c, class_prob in self.class_probs.items():
                # Calculate the probability of the sample belonging to each class
                feature_probs_given_class = self.feature_probs[c]
                log_prob = np.sum(np.log(sample * feature_probs_given_class + (1 - sample) * (1 - feature_probs_given_class)))
                class_scores[c] = np.log(class_prob) + log_prob

            # Predict the class with the highest probability
            predicted_class = max(class_scores, key=class_scores.get)
            predictions.append(predicted_class)

        return predictions

data = pd.read_csv("play_tennis.csv")

# Convert categorical features to numerical values using label encoding
label_encoder = LabelEncoder()
for column in data.select_dtypes(include=['object']).columns:
    data[column] = label_encoder.fit_transform(data[column])

X = data.drop('play', axis=1).values
y = data['play'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

nb_classifier = NaiveBayesClassifier()
nb_classifier.fit(X_train, y_train)
predictions = nb_classifier.predict(X_test)

accuracy = accuracy_score(y_test, predictions)
print("Predictions:", predictions)
print("Accuracy:", accuracy)

from sklearn.metrics import confusion_matrix
cm=confusion_matrix(y_test,predictions)
tp=cm[0][0]
fp=cm[0][1]
fn=cm[1][0]
tn=cm[1][1]
print(tp)
print(fp)
print(fn)
print(tn)
accuracy=(tp+tn)/(tp+fp+tn+fn)
print(accuracy)

Predictions: [1, 0, 0]
Accuracy: 0.6666666666666666
1
0
1
1
0.6666666666666666
