## SPAM MESSAGE

## NON SPAM

In [7]:
import os
import logging
import re
from typing import List
from pydantic import BaseModel, Field
from langgraph.graph import StateGraph, START, END
from langgraph.checkpoint.memory import MemorySaver
from langchain_core.tools import tool
from langchain_google_genai import ChatGoogleGenerativeAI
from pprint import pprint

# ============================================================
# CONFIGURATION AND SETUP
# ============================================================

# Configure the LLM
llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    api_key=os.getenv("GOOGLE_API_KEY"),
    temperature=0
)

# Setup logging for debugging
logging.basicConfig(level=logging.INFO)

# ============================================================
# DATA STRUCTURES
# ============================================================

class State(BaseModel):
    messages: List[str] = Field(default_factory=list, description="List of messages")
    sender: str = Field(..., description="Email sender")
    subject: str = Field(..., description="Email subject")
    body: str = Field(..., description="Email body")
    attachments: List[str] = Field(default_factory=list, description="List of attachment filenames")
    is_whitelisted: bool = Field(False, description="Is sender whitelisted")
    is_blacklisted: bool = Field(False, description="Is sender blacklisted")
    spam_score: float = Field(0.0, description="Spam score of the email")
    is_malicious_attachment: bool = Field(False, description="Are attachments malicious")
    classification: str = Field("", description="Email classification")
    human_decision: str = Field("", description="Decision made by human reviewer")

class EmailContent(BaseModel):
    subject: str
    body: str

class Attachments(BaseModel):
    files: List[str]

# ============================================================
# TOOLS
# ============================================================

@tool(args_schema=EmailContent)
def analyze_spam_score(subject: str, body: str) -> float:
    """
    Analyze the email content to determine its spam score using LLM.
    """
    prompt = (
        f"Analyze the following email for spam likelihood.\n\n"
        f"Subject: {subject}\n"
        f"Body: {body}\n\n"
        "Provide a spam score between 0.0 (not spam) and 1.0 (definitely spam)."
    )
    response = llm.invoke(prompt)
    match = re.search(r"(\d*\.?\d+)", response.content.strip())
    if match:
        return max(0.0, min(float(match.group(1)), 1.0))
    logging.error(f"Invalid response for spam score: {response.content.strip()}")
    return 0.1

@tool(args_schema=Attachments)
def scan_attachments(files: List[str]) -> bool:
    """
    Scan the provided attachments for malicious content using LLM.
    """
    malicious_extensions = [".exe", ".zip", ".rar", ".js"]
    # Fix: Properly iterate over extensions
    if any(file.lower().endswith(ext) for ext in malicious_extensions for file in files):
        return True

    prompt = (
        f"Analyze the following attachments for malicious content:\n"
        f"{files}\nRespond with 'Yes' or 'No'."
    )
    response = llm.invoke(prompt)
    return "yes" in response.content.lower()

# ============================================================
# GRAPH NODES
# ============================================================

def input_node(state: State) -> State:
    logging.info("Input node initialized.")
    return state

def spam_analysis_node(state: State) -> State:
    state.spam_score = analyze_spam_score.run(
        {"subject": state.subject, "body": state.body}
    )
    return state

def attachment_scan_node(state: State) -> State:
    state.is_malicious_attachment = scan_attachments.run(
        {"files": state.attachments}
    )
    return state

def human_review_node(state: State) -> State:
    if state.spam_score > 0.8 or state.is_malicious_attachment:
        logging.warning("Spam or malicious attachments detected. Human review required.")
        state.classification = "quarantine"
        state.human_decision = "Awaiting human intervention."
    else:
        state.classification = "not_spam"
    return state

def classification_node(state: State) -> State:
    if state.classification == "quarantine":
        logging.info("Email has been quarantined for review.")
    else:
        state.classification = "not_spam"
    return state

# ============================================================
# BUILD AND COMPILE GRAPH
# ============================================================

graph_builder = StateGraph(State)

# Add nodes
graph_builder.add_node("input_node", input_node)
graph_builder.add_node("spam_analysis_node", spam_analysis_node)
graph_builder.add_node("attachment_scan_node", attachment_scan_node)
graph_builder.add_node("human_review_node", human_review_node)
graph_builder.add_node("classification_node", classification_node)

# Define edges to establish the sequence of execution
graph_builder.add_edge(START, "input_node")
graph_builder.add_edge("input_node", "spam_analysis_node")
graph_builder.add_edge("spam_analysis_node", "attachment_scan_node")
graph_builder.add_edge("attachment_scan_node", "human_review_node")
graph_builder.add_edge("human_review_node", "classification_node")
graph_builder.add_edge("classification_node", END)

# Compile the graph with an in-memory checkpointer
checkpointer = MemorySaver()
graph = graph_builder.compile(checkpointer=checkpointer)

# ============================================================
# MAIN EXECUTION
# ============================================================

if __name__ == "__main__":
    # Initial email state
    state = State(
        sender="unknown@spamsite.com",
        subject="Congratulations! You've won a free iPhone!",
        body=(
            "Dear user,\n\n"
            "You have been selected as a winner in our exclusive giveaway! "
            "Claim your free iPhone now by clicking the link below.\n\n"
            "Hurry, this offer expires soon!\n\n"
            "Best regards,\n"
            "Spammy Promotions Team"
        ),
        attachments=["offer.zip"]
    )

    # Configuration with thread_id
    config = {"configurable": {"thread_id": "email_processing_thread"}}

    # Run the graph
    print("\n🚀 Running Email Processing Pipeline...\n")
    for event in graph.stream(state.model_dump(), config=config):
        pprint(event)

    print("\n✅ Processing complete.")


INFO:root:Input node initialized.



🚀 Running Email Processing Pipeline...

{'input_node': {'attachments': ['offer.zip'],
                'body': 'Dear user,\n'
                        '\n'
                        'You have been selected as a winner in our exclusive '
                        'giveaway! Claim your free iPhone now by clicking the '
                        'link below.\n'
                        '\n'
                        'Hurry, this offer expires soon!\n'
                        '\n'
                        'Best regards,\n'
                        'Spammy Promotions Team',
                'classification': '',
                'human_decision': '',
                'is_blacklisted': False,
                'is_malicious_attachment': False,
                'is_whitelisted': False,
                'messages': [],
                'sender': 'unknown@spamsite.com',
                'spam_score': 0.0,
                'subject': "Congratulations! You've won a free iPhone!"}}


INFO:root:Email has been quarantined for review.


{'spam_analysis_node': {'attachments': ['offer.zip'],
                        'body': 'Dear user,\n'
                                '\n'
                                'You have been selected as a winner in our '
                                'exclusive giveaway! Claim your free iPhone '
                                'now by clicking the link below.\n'
                                '\n'
                                'Hurry, this offer expires soon!\n'
                                '\n'
                                'Best regards,\n'
                                'Spammy Promotions Team',
                        'classification': '',
                        'human_decision': '',
                        'is_blacklisted': False,
                        'is_malicious_attachment': False,
                        'is_whitelisted': False,
                        'messages': [],
                        'sender': 'unknown@spamsite.com',
                        'spam_score': 0.95,