In [1]:
import pandas as pd
import numpy as np
from collections import defaultdict

In [19]:
data = {
    'WEATHER': ['Rainy', 'Rainy', 'Overcast', 'Sunny', 'Sunny', 'Sunny', 'Overcast', 'Rainy', 'Rainy', 'Sunny', 'Rainy', 'Overcast', 'Overcast', 'Sunny'],
    'TEMPERATURE': ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Cool', 'Mild', 'Cool', 'Mild', 'Mild', 'Mild', 'Hot', 'Mild'],
    'HUMIDITY': ['High', 'High', 'High', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'High'],
    'WINDY': ['False', 'True', 'False', 'False', 'False', 'True', 'True', 'False', 'False', 'False', 'True', 'True', 'False', 'True'],
    'PLAY_GOLF': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No']
}

df = pd.DataFrame(data)
df

Unnamed: 0,WEATHER,TEMPERATURE,HUMIDITY,WINDY,PLAY_GOLF
0,Rainy,Hot,High,False,No
1,Rainy,Hot,High,True,No
2,Overcast,Hot,High,False,Yes
3,Sunny,Mild,High,False,Yes
4,Sunny,Cool,Normal,False,Yes
5,Sunny,Cool,Normal,True,No
6,Overcast,Cool,Normal,True,Yes
7,Rainy,Mild,High,False,No
8,Rainy,Cool,Normal,False,Yes
9,Sunny,Mild,Normal,False,Yes


In [42]:
class NaiveBayesClassifier:

    def __init__ (self):
        self.feature_counts = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.class_counts = defaultdict(int)
        self.class_probs = defaultdict(float)
        self.feature_probs_given_class = defaultdict(lambda: defaultdict(lambda: defaultdict(float)))

    def fit(self, X, y):
            
        for features, label in zip(X, y):
            self.class_counts[label]+=1
            for value, feature in zip(features, self.features):
                self.feature_counts[feature][value][label] += 1
                
        total_count = sum(self.class_counts.values())

        #label_probability
        for label, count in self.class_counts.items():
            self.class_probs[label] = count/total_count

        for feature in self.features:
            feature_values = set(X[:, self.features.index(feature)])
            for value in feature_values:
                for label in self.class_counts.keys():
                    self.feature_probs_given_class[feature][value][label] = (1+self.feature_counts[feature][value][label])/(self.class_counts[label] + len(feature_values))
    
    def predict(self, X):
        predictions = []

        for row in X:
            scores = {}

            for label in self.class_probs.keys():
                score = self.class_probs[label]

                for feature, value in zip(self.features, row):
                    score *= self.feature_probs_given_class[feature][value][label]
                
                scores[label]= score
            
            best_label = max(scores, key=scores.get)
            predictions.append((best_label, scores[best_label]))

        return predictions

In [43]:
X = df.drop(columns=['PLAY_GOLF'])
y = df['PLAY_GOLF']
features = []
for f in X:
    features.append(f)

X=X.to_numpy()
y=y.to_numpy()

nb = NaiveBayesClassifier()

nb.features = features

nb.fit(X, y)

test_data = [['Rainy', 'Cool', 'High', 'True'], ['Sunny', 'Mild', 'Normal', 'False'], ['Overcast', 'Hot', 'Normal', 'True']]
y_pred = nb.predict(test_data)

print(*y_pred, sep='\n')

('No', 0.018221574344023325)
('Yes', 0.036157024793388434)
('Yes', 0.015495867768595043)
