<a href="https://colab.research.google.com/github/Aakash-1s/SDC-GENAI/blob/main/Spam_detector.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import re
from collections import Counter

# A basic dataset of spam and non-spam messages
spam_messages = [
    "Congratulations! You've won a free gift card. Claim now!",
    "You have a limited-time offer to buy cheap viagra!",
    "Hurry up! You are eligible for a $1000 reward!",
    "Limited time offer! Get 90% off on all electronics.",
    "Dear customer, we are offering a 50% discount on your next purchase.",
]

non_spam_messages = [
    "Hey, let's catch up this weekend!",
    "Are you free to meet for coffee tomorrow?",
    "I am working on a project; can you help me with it?",
    "The report looks great; I will send it to the boss.",
    "Don't forget to submit your assignment by tomorrow.",
]

# Combine the spam and non-spam messages into one dataset
all_messages = spam_messages + non_spam_messages
labels = ['spam'] * len(spam_messages) + ['ham'] * len(non_spam_messages)

# Preprocess the text (remove punctuation, make lowercase)
def preprocess(text):
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    return text

# Build a simple word frequency table
def build_frequency_table(messages):
    word_freq = Counter()
    for message in messages:
        words = preprocess(message).split()
        word_freq.update(words)
    return word_freq

# Calculate the probability of a message being spam or non-spam based on word frequency
def calculate_probability(message, word_freq, total_messages):
    words = preprocess(message).split()
    prob = 1
    for word in words:
        prob *= word_freq[word] / total_messages
    return prob

# Spam detection function
def detect_spam(message):
    # Build frequency tables
    spam_freq = build_frequency_table(spam_messages)
    ham_freq = build_frequency_table(non_spam_messages)

    # Calculate probabilities for spam and non-spam
    spam_prob = calculate_probability(message, spam_freq, len(spam_messages))
    ham_prob = calculate_probability(message, ham_freq, len(non_spam_messages))

    # Compare probabilities
    if spam_prob > ham_prob:
        return "Spam"
    else:
        return "Not Spam"

# Test the spam detector
test_messages = [
    "Congratulations! You've won a free iPhone!",
    "Hey, are we still on for dinner tonight?",
    "Get a 100% discount on all your purchases today.",
    "Can you send me the updated report?",
    "You're invited to a special sale event this weekend."
]

for msg in test_messages:
    print(f"Message: {msg}")
    print(f"Classification: {detect_spam(msg)}\n")


Message: Congratulations! You've won a free iPhone!
Classification: Not Spam

Message: Hey, are we still on for dinner tonight?
Classification: Not Spam

Message: Get a 100% discount on all your purchases today.
Classification: Not Spam

Message: Can you send me the updated report?
Classification: Not Spam

Message: You're invited to a special sale event this weekend.
Classification: Not Spam

