# Spam Detection AI Game: Three Levels of AI

This notebook shows three different AI approaches to detect spam emails and plugs them into a simple "Inbox Duel" game where a human player competes with the AI:

1. Classical machine learning with TF‑IDF + Linear SVM
2. Transformer-based model (DistilBERT embeddings + Logistic Regression)
3. Cloud LLM spam oracle (API-based, skeleton code)

The dataset is deliberately tiny and hard-coded so you can run this quickly and extend it in class.

In [None]:
# If you run this in a fresh environment (e.g. Google Colab), uncomment this cell.
# !pip install pandas scikit-learn transformers torch --quiet

## 1. Mini email dataset and train/test split

In [None]:
import random
import textwrap
from dataclasses import dataclass
from typing import Callable

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Built-in mini dataset: (label, text)
data = [
    ("spam", "Congratulations! You have won a $1000 gift card. Click here to claim now!"),
    ("spam", "WIN a brand new iPhone. Limited time offer, act now!"),
    ("spam", "You have been selected for a lottery prize, reply with your bank details."),
    ("spam", "Cheap meds online!!! Buy now with 90% discount."),
    ("spam", "This is not a scam. Send your password to verify your account."),
    ("ham",  "Hi John, can we move our meeting to tomorrow morning?"),
    ("ham",  "Reminder: your dentist appointment is on Friday at 10am."),
    ("ham",  "Thanks for sending over the report, I'll review it tonight."),
    ("ham",  "Mum’s birthday is next week – shall we book a restaurant?"),
    ("ham",  "Here are the notes from today’s lecture, let me know if you have questions."),
]

df = pd.DataFrame(data, columns=["label", "text"])
df["label_num"] = df["label"].map({"ham": 0, "spam": 1})

X = df["text"].tolist()
y = df["label_num"].tolist()

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

print("Train size:", len(X_train), " Test size:", len(X_test))
for text, label in zip(X_train[:3], y_train[:3]):
    print(f"[{'spam' if label else 'ham'}] {text}")


## 2. Level 1: Classical model (TF‑IDF + Linear SVM)

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.pipeline import Pipeline
import math

# TF‑IDF + Linear SVM with unigrams + bigrams
svm_model = Pipeline([
    ("tfidf", TfidfVectorizer(
        stop_words="english",
        max_df=0.9,
        ngram_range=(1, 2)
    )),
    ("clf", LinearSVC())
])

svm_model.fit(X_train, y_train)

print("SVM performance on test set:")
svm_preds = svm_model.predict(X_test)
print(classification_report(y_test, svm_preds, target_names=["ham", "spam"]))

def svm_predict_label(text: str) -> int:
    """Return 1 for spam, 0 for ham."""
    return int(svm_model.predict([text])[0])

def svm_confidence(text: str) -> float:
    """Pseudo-confidence based on distance from decision boundary."""
    dist = svm_model.decision_function([text])[0]
    return 1.0 / (1.0 + math.exp(-dist))


## 3. Level 2: Transformer-based model (DistilBERT embeddings + Logistic Regression)

This section downloads a pre-trained DistilBERT model, uses it to turn emails into vector embeddings, then trains a simple Logistic Regression classifier on top.

Note: the first run will download model weights and may take a little while.

In [None]:
from transformers import AutoTokenizer, AutoModel
import torch
from sklearn.linear_model import LogisticRegression

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
bert_model = AutoModel.from_pretrained("distilbert-base-uncased").to(device)

def embed_texts(texts, batch_size: int = 8):
    """Return a NumPy array of vector embeddings for a list of texts."""
    all_embeddings = []
    for i in range(0, len(texts), batch_size):
        batch = texts[i:i+batch_size]
        tokens = tokenizer(
            batch,
            padding=True,
            truncation=True,
            return_tensors="pt"
        ).to(device)
        with torch.no_grad():
            outputs = bert_model(**tokens)
        # Mean pooling over sequence length
        embeddings = outputs.last_hidden_state.mean(dim=1)
        all_embeddings.append(embeddings.cpu())
    return torch.cat(all_embeddings, dim=0).numpy()

X_train_emb = embed_texts(X_train)
X_test_emb = embed_texts(X_test)

bert_clf = LogisticRegression(max_iter=1000)
bert_clf.fit(X_train_emb, y_train)

print("DistilBERT + LogisticRegression performance on test set:")
bert_preds = bert_clf.predict(X_test_emb)
print(classification_report(y_test, bert_preds, target_names=["ham", "spam"]))

def bert_predict_label(text: str) -> int:
    emb = embed_texts([text])
    return int(bert_clf.predict(emb)[0])

def bert_confidence(text: str) -> float:
    emb = embed_texts([text])
    prob = bert_clf.predict_proba(emb)[0][1]  # probability of spam
    return float(prob)


## 4. Level 3: Cloud LLM spam oracle (skeleton)

In a production system, you can also ask a cloud-hosted LLM to classify each email as spam or ham.

Below is a **skeleton** for such an integration. You need to plug in your own API call to your chosen provider (e.g. OpenAI, Anthropic, etc.) and parse the response to return `1` for spam and `0` for ham.

In [None]:
def llm_predict_label(text: str) -> int:
    """Placeholder: call your LLM provider and return 1 for spam, 0 for ham.

    Example logic (pseudocode):
        response = client.chat.completions.create(
            model="your-llm-model",
            messages=[
                {"role": "system", "content": "You are a spam filter."},
                {"role": "user", "content": f"Is this email spam or ham? Only answer 'spam' or 'ham'.\n\n{text}"},
            ],
        )
        label_str = response.choices[0].message.content.strip().lower()
        return 1 if label_str == "spam" else 0

    For safety and to keep this notebook self-contained, this function just raises
    NotImplementedError. Implement it yourself if you have an API key and provider.
    """
    raise NotImplementedError("Plug in your cloud LLM spam classifier here.")

def llm_confidence(text: str) -> float:
    """Placeholder confidence score for LLM-based classification.

    You could, for example, instruct the model to output a probability or confidence
    alongside the label and parse it here.
    """
    raise NotImplementedError("Return a confidence score from the LLM response.")


## 5. Shared game engine: Inbox Duel (human vs AI)

In [None]:
@dataclass
class AIOpponent:
    name: str
    predict_label: Callable[[str], int]
    confidence: Callable[[str], float]

def play_inbox_duel(ai: AIOpponent, test_texts=None, test_labels=None):
    """Simple terminal game: human vs AI spam detector.

    The player and the AI both try to classify the same sequence of emails.
    +1 point for each correct answer.
    """
    if test_texts is None or test_labels is None:
        texts = X_test
        labels = y_test
    else:
        texts = test_texts
        labels = test_labels

    pairs = list(zip(texts, labels))
    random.shuffle(pairs)

    player_score = 0
    ai_score = 0
    WRAP_WIDTH = 80

    print(f"\n=== Inbox Duel: You vs {ai.name} ===")
    print("Type 's' for spam, 'h' for ham, 'q' to quit.\n")

    def show_email(email_text):
        print("\n" + "-" * WRAP_WIDTH)
        print("New email received:")
        print("-" * WRAP_WIDTH)
        for line in textwrap.wrap(email_text, width=WRAP_WIDTH):
            print(line)
        print("-" * WRAP_WIDTH)

    def ask_player():
        while True:
            ans = input("Is this spam or not? (s = spam, h = ham, q = quit): ").strip().lower()
            if ans in ["s", "h", "q"]:
                return ans
            print("Please type 's', 'h', or 'q'.")

    for email_text, true_label in pairs:
        show_email(email_text)
        player_choice = ask_player()
        if player_choice == "q":
            break

        player_label = 1 if player_choice == "s" else 0
        ai_label = ai.predict_label(email_text)
        ai_conf = ai.confidence(email_text)

        true_str = "spam" if true_label == 1 else "ham"
        player_str = "spam" if player_label == 1 else "ham"
        ai_str = "spam" if ai_label == 1 else "ham"
        
        print(f"\nGround truth: {true_str.upper()}")
        print(f"Your answer:  {player_str.upper()}")
        print(f"{ai.name} answer: {ai_str.upper()} (confidence = {ai_conf:.2f})")

        if player_label == true_label:
            player_score += 1
            print("You: +1 point")
        if ai_label == true_label:
            ai_score += 1
            print(f"{ai.name}: +1 point")

        print(f"Scores -> You: {player_score} | {ai.name}: {ai_score}\n")

    print("\nGame over!")
    print(f"Final scores -> You: {player_score} | {ai.name}: {ai_score}")
    if player_score > ai_score:
        print("You beat the AI!")
    elif player_score < ai_score:
        print(f"{ai.name} wins this time.")
    else:
        print("It's a draw.")


## 6. Choose your AI opponent

In [None]:
# Classical SVM opponent
svm_ai = AIOpponent(
    name="SVM spam detector",
    predict_label=svm_predict_label,
    confidence=svm_confidence,
)

# Transformer-based opponent
bert_ai = AIOpponent(
    name="DistilBERT spam detector",
    predict_label=bert_predict_label,
    confidence=bert_confidence,
)

# Cloud LLM opponent (once you implement llm_predict_label / llm_confidence)
llm_ai = AIOpponent(
    name="Cloud LLM spam oracle",
    predict_label=llm_predict_label,
    confidence=llm_confidence,
)

print("Defined opponents: svm_ai, bert_ai, llm_ai")


In [None]:
# Example: play against the classical SVM model
# Uncomment one line at a time to try different opponents.

# play_inbox_duel(svm_ai)
# play_inbox_duel(bert_ai)
# play_inbox_duel(llm_ai)  # once LLM functions are implemented
