In [2]:
from collections import defaultdict

# Training data
emails = [
    ("Buy this online today", "Ham"),
    ("Send us money today", "Spam"),
    ("Send money today", "Spam"),
    ("Buy online today", "Spam"),
    ("Send us money", "Ham"),
    ("Send this money", "Spam")
]

# Step 1: Count total spam and ham emails
total_spam = sum(1 for _, label in emails if label == "Spam")
total_ham = sum(1 for _, label in emails if label == "Ham")

# Step 2: Count word occurrences in spam and ham
word_counts = {
    "Spam": defaultdict(int),
    "Ham": defaultdict(int)
}
unique_words = set()

for text, label in emails:
    words = set(text.lower().split())  # Convert to lowercase and get unique words in email
    unique_words.update(words)
    for word in words:
        word_counts[label][word] += 1

# Step 3: Compute conditional probabilities
conditional_probs = {"Spam": {}, "Ham": {}}
for word in unique_words:
    conditional_probs["Spam"][word] = word_counts["Spam"][word] / total_spam
    conditional_probs["Ham"][word] = word_counts["Ham"][word] / total_ham

# Display the result as a table
print(f"{'Word':<10}{'P(Word|Spam)':<15}{'P(Word|Ham)'}")
print("=" * 35)
for word in sorted(unique_words):
    print(f"{word:<10}{conditional_probs['Spam'][word]:<15.2f}{conditional_probs['Ham'][word]:.2f}")

word_counts

Word      P(Word|Spam)   P(Word|Ham)
buy       0.25           0.50
money     0.75           0.50
online    0.25           0.50
send      0.75           0.50
this      0.25           0.50
today     0.75           0.50
us        0.25           0.50


{'Spam': defaultdict(int,
             {'money': 3,
              'us': 1,
              'today': 3,
              'send': 3,
              'buy': 1,
              'online': 1,
              'this': 1}),
 'Ham': defaultdict(int,
             {'buy': 1,
              'today': 1,
              'online': 1,
              'this': 1,
              'money': 1,
              'us': 1,
              'send': 1})}