In [1]:
import math
from collections import defaultdict

# Training data
data = [
    ("win prize now", "Spam"),
    ("claim free reward", "Spam"),
    ("win money free", "Spam"),
    ("prize money win", "Spam"),
    ("free prize claim", "Spam"),
    ("meeting scheduled today", "Ham"),
    ("project meeting tomorrow", "Ham"),
    ("submit assignment today", "Ham"),
    ("discussion on project", "Ham"),
    ("meeting about assignment", "Ham")
]

# Initialize counts
word_counts = {"Spam": defaultdict(int), "Ham": defaultdict(int)}
class_counts = {"Spam": 0, "Ham": 0}
total_words = {"Spam": 0, "Ham": 0}

# Training
for msg, label in data:
    class_counts[label] += 1
    for word in msg.split():
        word_counts[label][word] += 1
        total_words[label] += 1

# Vocabulary
vocab = set(word for msg, _ in data for word in msg.split())
V = len(vocab)

# Prediction with full probability output
def predict_with_probabilities(message):
    results = {}

    print("Vocabulary Size:", V)
    print("\nMessage:", message)
    print("\n--- Probability Calculation ---")

    for label in ["Spam", "Ham"]:
        prior = class_counts[label] / len(data)
        log_prob = math.log(prior)

        print(f"\nClass: {label}")
        print(f"Prior P({label}) = {prior}")

        for word in message.split():
            likelihood = (word_counts[label][word] + 1) / (total_words[label] + V)
            log_prob += math.log(likelihood)
            print(f"P({word} | {label}) = {likelihood}")

        results[label] = math.exp(log_prob)
        print(f"Final P({label} | message) = {results[label]}")

    print("\n--- Final Result ---")
    for cls, prob in results.items():
        print(f"{cls}: {prob}")

    print("\nPredicted Class:", max(results, key=results.get))


# Test message
predict_with_probabilities("win free prize")


Vocabulary Size: 17

Message: win free prize

--- Probability Calculation ---

Class: Spam
Prior P(Spam) = 0.5
P(win | Spam) = 0.125
P(free | Spam) = 0.125
P(prize | Spam) = 0.125
Final P(Spam | message) = 0.0009765625000000009

Class: Ham
Prior P(Ham) = 0.5
P(win | Ham) = 0.03125
P(free | Ham) = 0.03125
P(prize | Ham) = 0.03125
Final P(Ham | message) = 1.5258789062500007e-05

--- Final Result ---
Spam: 0.0009765625000000009
Ham: 1.5258789062500007e-05

Predicted Class: Spam
