In [2]:
import numpy as np
import pandas as pd

In [3]:
#dataset
data = {
    'Contains Buy': [1, 1, 1, 0, 0, 0, 1, 0],
    'Contains Win': [1, 1, 0, 1, 1, 0, 0, 0],
    'Class': ['Spam', 'Spam', 'Spam', 'Not Spam', 'Not Spam', 'Not Spam', 'Spam', 'Not Spam']
}

df = pd.DataFrame(data)
print(df)

   Contains Buy  Contains Win     Class
0             1             1      Spam
1             1             1      Spam
2             1             0      Spam
3             0             1  Not Spam
4             0             1  Not Spam
5             0             0  Not Spam
6             1             0      Spam
7             0             0  Not Spam


In [4]:
#prior probabilities
class_counts = df['Class'].value_counts()
total_count = len(df)
prior_probs = class_counts / total_count
print("Prior Probabilities:\n", prior_probs)


Prior Probabilities:
 Class
Spam        0.5
Not Spam    0.5
Name: count, dtype: float64


In [5]:
# Number of classes and features
features = ['Contains Buy', 'Contains Win']
classes = df['Class'].unique()
n_classes = len(classes)
n_features = len(features)

In [6]:
# Initialize likelihoods
likelihoods = {cls: {} for cls in classes}

In [8]:
#Calculate likelihoods with Laplace smoothing
for cls in classes:
    cls_data = df[df['Class'] == cls]
    total_cls_count = len(cls_data)
    
    for feature in features:
        feature_vals = cls_data[feature].value_counts()
        likelihoods[cls][feature] = {
            val: (feature_vals.get(val, 0) + 1) / (total_cls_count + 2)
            for val in df[feature].unique()
        }

print("Likelihoods:\n", likelihoods)

Likelihoods:
 {'Spam': {'Contains Buy': {np.int64(1): np.float64(0.8333333333333334), np.int64(0): 0.16666666666666666}, 'Contains Win': {np.int64(1): np.float64(0.5), np.int64(0): np.float64(0.5)}}, 'Not Spam': {'Contains Buy': {np.int64(1): 0.16666666666666666, np.int64(0): np.float64(0.8333333333333334)}, 'Contains Win': {np.int64(1): np.float64(0.5), np.int64(0): np.float64(0.5)}}}


In [9]:
def predict(sample):
    posterior_probs = {}
    
    for cls in classes:
        prior = prior_probs[cls]
        likelihood = 1
        for feature in features:
            feature_val = sample[feature]
            likelihood *= likelihoods[cls][feature].get(feature_val, 1 / (total_count + 2))
        
        posterior_probs[cls] = prior * likelihood
    
    # Return the class with the highest posterior probability
    return max(posterior_probs, key=posterior_probs.get)

# Test the classifier
test_sample = {'Contains Buy': 1, 'Contains Win': 0}
predicted_class = predict(test_sample)
print(f"Predicted Class for {test_sample}: {predicted_class}")


Predicted Class for {'Contains Buy': 1, 'Contains Win': 0}: Spam
