In [95]:
from pprint import pprint
from langgraph.graph import StateGraph, END
from langgraph.errors import NodeInterrupt
from langgraph.checkpoint.memory import MemorySaver
from langchain_core.tools import tool
from langchain_core.callbacks.manager import CallbackManager
from langchain_core.callbacks.base import BaseCallbackHandler
import logging
from typing import TypedDict, List
from pydantic import BaseModel, ValidationError, Field
from langchain_google_genai import ChatGoogleGenerativeAI

# Configure the LLM
llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    api_key=os.getenv("GOOGLE_API_KEY"),
    temperature=0
)

# Set up checkpointing
checkpointer = MemorySaver()

# Setup logging for debugging
logging.basicConfig(level=logging.INFO)

# Define a callback handler for debugging
class SimpleCallbackHandler(BaseCallbackHandler):
    def on_tool_start(self, serialized: dict, input_str: str, **kwargs):
        logging.info(f"Tool started with input: {input_str}")

    def on_tool_end(self, output: str, **kwargs):
        logging.info(f"Tool ended with output: {output}")

    def on_tool_error(self, error: Exception, **kwargs):
        logging.error(f"Tool error: {error}")

# Configure the CallbackManager
callback_handler = SimpleCallbackHandler()
callback_manager = CallbackManager(handlers=[callback_handler])

# Define the email state schema
class EmailState(TypedDict):
    messages: List[str]
    sender: str
    subject: str
    body: str
    attachments: List[str]
    is_whitelisted: bool
    is_blacklisted: bool
    spam_score: float
    content_category: str
    is_malicious_attachment: bool
    is_quarantined: bool
    human_decision: str
    classification: str

# Define input models
class EmailContent(BaseModel):
    subject: str = Field(..., description="Subject of the email")
    body: str = Field(..., description="Body of the email")

class Attachments(BaseModel):
    files: List[str] = Field(..., description="List of attachment filenames")

# Tools
@tool(args_schema=EmailContent)
def analyze_spam_score(subject: str, body: str) -> float:
    """Analyze the email content to determine its spam score."""
    spam_keywords = [
        "win", "winner", "selected", "claim your prize",
        "urgent", "congratulations", "free"
    ]
    if any(keyword in body.lower() or keyword in subject.lower() for keyword in spam_keywords):
        return 0.9  # High spam score for flagged keywords

    # Updated prompt with explicit instructions
    prompt = (
        f"Analyze the following email for spam likelihood.\n"
        f"Subject: {subject}\n"
        f"Body: {body}\n\n"
        "Please provide a spam score between 0.0 (not spam) and 1.0 (definitely spam). "
        "Respond with only the numeric score."
    )
    response = llm.invoke(prompt, config={"callbacks": callback_manager})

    # Log the raw response for debugging
    logging.info(f"LLM Response for Spam Score: {response.content.strip()}")

    # Use regular expression to extract the numeric value
    import re
    try:
        match = re.search(r"(\d*\.?\d+)", response.content)
        if match:
            spam_score = float(match.group(1))
            return max(0.0, min(spam_score, 1.0))  # Ensure score is between 0 and 1
        else:
            raise ValueError("No numeric value found in LLM response.")
    except ValueError as e:
        logging.error(f"Invalid spam score received: {response.content.strip()} | Error: {e}")
        return 0.1  # Assign a lower default spam score for safety in non-spam cases



@tool(args_schema=Attachments)
def scan_attachments(files: List[str]) -> bool:
    """Scan the provided attachments for malicious content."""
    malicious_extensions = [".exe", ".zip", ".rar", ".js"]
    if any(file.lower().endswith(tuple(malicious_extensions)) for file in files):
        logging.info(f"Malicious file detected in attachments: {files}")
        return True
    prompt = (
        f"Analyze the following attachments for malicious content:\n{files}\n\n"
        "Please answer 'Yes' if any attachment is malicious or 'No' if they are safe. "
        "Respond with only 'Yes' or 'No'."
    )
    response = llm.invoke(prompt, config={"callbacks": callback_manager})

    # Log the raw response for debugging
    logging.info(f"LLM Response for Attachment Scan: {response.content.strip()}")

    # Check the response content
    return "yes" in response.content.lower()



# Node functions
def process_email(state: EmailState) -> EmailState:
    # Example whitelist and blacklist
    whitelist = ["colleague@company.com", "boss@company.com"]
    blacklist = ["unknown@spamsite.com", "phishing@malicious.com"]

    # Update whitelisting and blacklisting status
    if state['sender'] in whitelist:
        state['is_whitelisted'] = True
    elif state['sender'] in blacklist:
        state['is_blacklisted'] = True
    else:
        state['is_whitelisted'] = False
        state['is_blacklisted'] = False

    try:
        state['spam_score'] = analyze_spam_score.invoke(
            input={'subject': state['subject'], 'body': state['body']},
            config={"callbacks": callback_manager}
        )
    except ValidationError as e:
        logging.error(f"Validation Error: {e}")
        state['spam_score'] = 0.8  # Assign a higher default spam score for safety

    try:
        state['is_malicious_attachment'] = scan_attachments.invoke(
            input={'files': state['attachments']},
            config={"callbacks": callback_manager}
        )
    except ValidationError as e:
        logging.error(f"Validation Error in attachment scanning: {e}")
        state['is_malicious_attachment'] = False

    return state

def evaluate_email(state: EmailState) -> EmailState:
    if state["is_blacklisted"] or state["spam_score"] > 0.8 or state["is_malicious_attachment"]:
        state["classification"] = "spam"
    elif state["is_whitelisted"]:
        state["classification"] = "not_spam"
    else:
        state["classification"] = "quarantine"
    return state

# Execution logic
if __name__ == "__main__":
    sender = "colleague@company.com"
    subject = "Meeting Agenda for Tomorrow"
    body = (
        "Hi team, please find attached the agenda for tomorrow's meeting. "
        "Let me know if you have any questions."
    )
    attachments = ["agenda.pdf"]

    initial_state = EmailState(
        messages=[],
        sender=sender,
        subject=subject,
        body=body,
        attachments=attachments,
        is_whitelisted=False,
        is_blacklisted=False,
        spam_score=0.0,
        content_category="",
        is_malicious_attachment=False,
        is_quarantined=False,
        human_decision="",
        classification=""
    )

    pprint(initial_state)
    print("\nProcessing...\n")

    state = process_email(initial_state)
    state = evaluate_email(state)

    print("\nFinal Processed State:")
    pprint(state)


INFO:root:Tool started with input: {'subject': 'Meeting Agenda for Tomorrow', 'body': "Hi team, please find attached the agenda for tomorrow's meeting. Let me know if you have any questions."}


{'attachments': ['agenda.pdf'],
 'body': "Hi team, please find attached the agenda for tomorrow's meeting. Let "
         'me know if you have any questions.',
 'classification': '',
 'content_category': '',
 'human_decision': '',
 'is_blacklisted': False,
 'is_malicious_attachment': False,
 'is_quarantined': False,
 'is_whitelisted': False,
 'messages': [],
 'sender': 'colleague@company.com',
 'spam_score': 0.0,
 'subject': 'Meeting Agenda for Tomorrow'}

Processing...



INFO:root:LLM Response for Spam Score: 0.0
INFO:root:Tool ended with output: 0.0
INFO:root:Tool started with input: {'files': ['agenda.pdf']}
INFO:root:LLM Response for Attachment Scan: No
INFO:root:Tool ended with output: False



Final Processed State:
{'attachments': ['agenda.pdf'],
 'body': "Hi team, please find attached the agenda for tomorrow's meeting. Let "
         'me know if you have any questions.',
 'classification': 'not_spam',
 'content_category': '',
 'human_decision': '',
 'is_blacklisted': False,
 'is_malicious_attachment': False,
 'is_quarantined': False,
 'is_whitelisted': True,
 'messages': [],
 'sender': 'colleague@company.com',
 'spam_score': 0.0,
 'subject': 'Meeting Agenda for Tomorrow'}


In [97]:
from pprint import pprint
from langgraph.graph import StateGraph, END
from langgraph.errors import NodeInterrupt
from langgraph.checkpoint.memory import MemorySaver
from langchain_core.tools import tool
from langchain_core.callbacks.manager import CallbackManager
from langchain_core.callbacks.base import BaseCallbackHandler
import logging
from typing import TypedDict, List
from pydantic import BaseModel, ValidationError, Field
from langchain_google_genai import ChatGoogleGenerativeAI

# Configure the LLM
llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    api_key=os.getenv("GOOGLE_API_KEY"),
    temperature=0
)

# Set up checkpointing
checkpointer = MemorySaver()

# Setup logging for debugging
logging.basicConfig(level=logging.INFO)

# Define a callback handler for debugging
class SimpleCallbackHandler(BaseCallbackHandler):
    def on_tool_start(self, serialized: dict, input_str: str, **kwargs):
        logging.info(f"Tool started with input: {input_str}")

    def on_tool_end(self, output: str, **kwargs):
        logging.info(f"Tool ended with output: {output}")

    def on_tool_error(self, error: Exception, **kwargs):
        logging.error(f"Tool error: {error}")

# Configure the CallbackManager
callback_handler = SimpleCallbackHandler()
callback_manager = CallbackManager(handlers=[callback_handler])

# Define the email state schema
class EmailState(TypedDict):
    messages: List[str]
    sender: str
    subject: str
    body: str
    attachments: List[str]
    is_whitelisted: bool
    is_blacklisted: bool
    spam_score: float
    content_category: str
    is_malicious_attachment: bool
    is_quarantined: bool
    human_decision: str
    classification: str

# Define input models
class EmailContent(BaseModel):
    subject: str = Field(..., description="Subject of the email")
    body: str = Field(..., description="Body of the email")

class Attachments(BaseModel):
    files: List[str] = Field(..., description="List of attachment filenames")

# Tools
@tool(args_schema=EmailContent)
def analyze_spam_score(subject: str, body: str) -> float:
    """Analyze the email content to determine its spam score."""
    spam_keywords = [
        "win", "winner", "selected", "claim your prize",
        "urgent", "congratulations", "free"
    ]
    if any(keyword in body.lower() or keyword in subject.lower() for keyword in spam_keywords):
        return 0.9  # High spam score for flagged keywords

    # Updated prompt with explicit instructions
    prompt = (
        f"Analyze the following email for spam likelihood.\n"
        f"Subject: {subject}\n"
        f"Body: {body}\n\n"
        "Please provide a spam score between 0.0 (not spam) and 1.0 (definitely spam). "
        "Respond with only the numeric score."
    )
    response = llm.invoke(prompt, config={"callbacks": callback_manager})

    # Log the raw response for debugging
    logging.info(f"LLM Response for Spam Score: {response.content.strip()}")

    # Use regular expression to extract the numeric value
    import re
    try:
        match = re.search(r"(\d*\.?\d+)", response.content)
        if match:
            spam_score = float(match.group(1))
            return max(0.0, min(spam_score, 1.0))  # Ensure score is between 0 and 1
        else:
            raise ValueError("No numeric value found in LLM response.")
    except ValueError as e:
        logging.error(f"Invalid spam score received: {response.content.strip()} | Error: {e}")
        return 0.1  # Assign a lower default spam score for safety in non-spam cases



@tool(args_schema=Attachments)
def scan_attachments(files: List[str]) -> bool:
    """Scan the provided attachments for malicious content."""
    malicious_extensions = [".exe", ".zip", ".rar", ".js"]
    if any(file.lower().endswith(tuple(malicious_extensions)) for file in files):
        logging.info(f"Malicious file detected in attachments: {files}")
        return True
    prompt = (
        f"Analyze the following attachments for malicious content:\n{files}\n\n"
        "Please answer 'Yes' if any attachment is malicious or 'No' if they are safe. "
        "Respond with only 'Yes' or 'No'."
    )
    response = llm.invoke(prompt, config={"callbacks": callback_manager})

    # Log the raw response for debugging
    logging.info(f"LLM Response for Attachment Scan: {response.content.strip()}")

    # Check the response content
    return "yes" in response.content.lower()



# Node functions
def process_email(state: EmailState) -> EmailState:
    # Example whitelist and blacklist
    whitelist = ["colleague@company.com", "boss@company.com"]
    blacklist = ["unknown@spamsite.com", "phishing@malicious.com"]

    # Update whitelisting and blacklisting status
    if state['sender'] in whitelist:
        state['is_whitelisted'] = True
    elif state['sender'] in blacklist:
        state['is_blacklisted'] = True
    else:
        state['is_whitelisted'] = False
        state['is_blacklisted'] = False

    try:
        state['spam_score'] = analyze_spam_score.invoke(
            input={'subject': state['subject'], 'body': state['body']},
            config={"callbacks": callback_manager}
        )
    except ValidationError as e:
        logging.error(f"Validation Error: {e}")
        state['spam_score'] = 0.8  # Assign a higher default spam score for safety

    try:
        state['is_malicious_attachment'] = scan_attachments.invoke(
            input={'files': state['attachments']},
            config={"callbacks": callback_manager}
        )
    except ValidationError as e:
        logging.error(f"Validation Error in attachment scanning: {e}")
        state['is_malicious_attachment'] = False

    return state

def evaluate_email(state: EmailState) -> EmailState:
    if state["is_blacklisted"] or state["spam_score"] > 0.8 or state["is_malicious_attachment"]:
        state["classification"] = "spam"
    elif state["is_whitelisted"]:
        state["classification"] = "not_spam"
    else:
        state["classification"] = "quarantine"
    return state

# Execution logic
if __name__ == "__main__":
    sender = "unknown@spamsite.com"
    subject = "Congratulations! You've won a free iPhone!"
    body = (
        "Dear user,\n\n"
        "You have been selected as a winner in our exclusive giveaway! "
        "Claim your free iPhone now by clicking the link below.\n\n"
        "Hurry, this offer expires soon!\n\n"
        "Best regards,\n"
        "Spammy Promotions Team"
    )
    attachments = ["offer.zip"]

    initial_state = EmailState(
        messages=[],
        sender=sender,
        subject=subject,
        body=body,
        attachments=attachments,
        is_whitelisted=False,
        is_blacklisted=False,
        spam_score=0.0,
        content_category="",
        is_malicious_attachment=False,
        is_quarantined=False,
        human_decision="",
        classification=""
    )

    pprint(initial_state)
    print("\nProcessing...\n")

    state = process_email(initial_state)
    state = evaluate_email(state)

    print("\nFinal Processed State:")
    pprint(state)

INFO:root:Tool started with input: {'subject': "Congratulations! You've won a free iPhone!", 'body': 'Dear user,\n\nYou have been selected as a winner in our exclusive giveaway! Claim your free iPhone now by clicking the link below.\n\nHurry, this offer expires soon!\n\nBest regards,\nSpammy Promotions Team'}
INFO:root:Tool ended with output: 0.9
INFO:root:Tool started with input: {'files': ['offer.zip']}
INFO:root:Malicious file detected in attachments: ['offer.zip']
INFO:root:Tool ended with output: True


{'attachments': ['offer.zip'],
 'body': 'Dear user,\n'
         '\n'
         'You have been selected as a winner in our exclusive giveaway! Claim '
         'your free iPhone now by clicking the link below.\n'
         '\n'
         'Hurry, this offer expires soon!\n'
         '\n'
         'Best regards,\n'
         'Spammy Promotions Team',
 'classification': '',
 'content_category': '',
 'human_decision': '',
 'is_blacklisted': False,
 'is_malicious_attachment': False,
 'is_quarantined': False,
 'is_whitelisted': False,
 'messages': [],
 'sender': 'unknown@spamsite.com',
 'spam_score': 0.0,
 'subject': "Congratulations! You've won a free iPhone!"}

Processing...


Final Processed State:
{'attachments': ['offer.zip'],
 'body': 'Dear user,\n'
         '\n'
         'You have been selected as a winner in our exclusive giveaway! Claim '
         'your free iPhone now by clicking the link below.\n'
         '\n'
         'Hurry, this offer expires soon!\n'
         '\n'
         'Best r