In [18]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

class NaiveBayesClassifier:
    def __init__(self):
        self.class_probs = {}
        self.feature_probs = {}

    def fit(self, X, y):
        num_samples, num_features = X.shape
        unique_classes = np.unique(y)

        for c in unique_classes:
            # Calculate class probabilities
            self.class_probs[c] = np.sum(y == c) / num_samples

            # Calculate feature probabilities for each class
            features_given_class = X[y == c]
            class_feature_probs = {}

            for feature_idx in range(num_features):
                unique_values, counts = np.unique(features_given_class[:, feature_idx], return_counts=True)
                class_feature_probs[feature_idx] = dict(zip(unique_values, counts / len(features_given_class)))

            self.feature_probs[c] = class_feature_probs

    def predict(self, X):
        predictions = []

        for sample in X:
            class_scores = {}

            for c, class_prob in self.class_probs.items():
                class_feature_probs = self.feature_probs[c]
                log_prob = 0

                for feature_idx, value in enumerate(sample):
                    if value in class_feature_probs[feature_idx]:
                        log_prob += np.log(class_feature_probs[feature_idx][value])
                    else:
                        # If the value is not observed in the training data for this class and feature,
                        # assign a small non-zero probability (pseudocount)
                        log_prob += np.log(1e-5)

                class_scores[c] = np.log(class_prob) + log_prob

            # Predict the class with the highest probability
            predicted_class = max(class_scores, key=class_scores.get)
            predictions.append(predicted_class)

        return predictions

# Load the dataset
data = pd.read_csv("play_tennis.csv")
data = data.drop('day',axis=1)
# Extract features and labels
X = data.drop('play', axis=1).values
y = data['play'].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Example usage:
nb_classifier = NaiveBayesClassifier()
nb_classifier.fit(X_train, y_train)
predictions = nb_classifier.predict(X_test)

# Calculate accuracy using accuracy_score
accuracy = accuracy_score(y_test, predictions)
print("Predictions:", predictions)
print("Accuracy:", accuracy)


Predictions: ['Yes', 'Yes', 'Yes']
Accuracy: 0.6666666666666666


In [19]:
X_test

array([['Rain', 'Mild', 'Normal', 'Weak'],
       ['Overcast', 'Mild', 'High', 'Strong'],
       ['Sunny', 'Hot', 'High', 'Weak']], dtype=object)

In [20]:
y_test

array(['Yes', 'Yes', 'No'], dtype=object)