In [None]:
import os
os.environ["OPENAI_API_KEY"] = "key"

In [7]:
pip install "langchain>=0.2.10" "langchain-openai>=0.1.7" "langchain-community>=0.2.0" faiss-cpu python-dotenv tiktoken


Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [8]:
"""
Assignment 3 (Bonus): Agentic RAG on Email Corpus
"""

import os
import csv
import json
from dataclasses import dataclass
from typing import List, Dict, Any, Optional

from dotenv import load_dotenv

# --- LangChain / OpenAI imports for latest versions ---
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.documents import Document
from langchain_core.tools import Tool
from langchain.agents import initialize_agent, AgentType
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate


# -------------------------------------------------------------------
# 0. CONFIG
# -------------------------------------------------------------------
load_dotenv()  # will load OPENAI_API_KEY if you use a .env file

OPENAI_MODEL = "gpt-4.1-mini"          # You can change to gpt-4o-mini / gpt-4.1
EMBED_MODEL = "text-embedding-3-small"

DATA_DIR = "data"
RESULTS_DIR = "results"
os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(RESULTS_DIR, exist_ok=True)


# -------------------------------------------------------------------
# 1. DATA SELECTION & PREPROCESSING
#    - Build small Enron-like corpus
#    - Save as CSV + JSONL (structured format)
# -------------------------------------------------------------------

@dataclass
class EmailRecord:
    id: str
    subject: str
    body: str
    sender: Optional[str] = None
    date: Optional[str] = None


def build_synthetic_enron_corpus() -> List[EmailRecord]:
    """
    Instead of relying on external filesystem downloads, we create a small
    "Enron-like" corpus in code. These emails mimic internal communication
    between trading, legal, risk, compliance, HR, and project teams.
    """

    raw_emails = [
        dict(
            subject="California power trading strategy – weekly update",
            body=(
                "Team,\n\n"
                "Following up on last week's trading performance in California. "
                "We increased our positions around peak hours and saw improved margins. "
                "Please review the attached spreadsheet before tomorrow's risk meeting. "
                "We need to be prepared to justify our exposure levels.\n\n"
                "Thanks,\nJohn\n"
            ),
            sender="john.doe@enron.com",
            date="2000-06-12",
        ),
        dict(
            subject="Legal review: FERC inquiry and documentation",
            body=(
                "Hi all,\n\n"
                "FERC has requested additional documentation regarding our "
                "power contracts in the Northwest region. Legal needs all "
                "deal confirmations and risk assessments by Friday. "
                "Failure to provide accurate records may result in penalties.\n\n"
                "Best,\nSusan (Legal)\n"
            ),
            sender="susan.legal@enron.com",
            date="2000-08-03",
        ),
        dict(
            subject="New structured deal – approval workflow",
            body=(
                "Folks,\n\n"
                "We have a new structured gas deal with a large industrial client. "
                "The structure is complex and will require approvals from trading, "
                "risk, and accounting. Please review the term sheet and provide "
                "comments by end of day Wednesday.\n\n"
                "Regards,\nMichael\n"
            ),
            sender="michael.structures@enron.com",
            date="2000-09-20",
        ),
        dict(
            subject="Concerns about mark-to-market assumptions",
            body=(
                "Team,\n\n"
                "I am worried that some of the mark-to-market assumptions "
                "we are using for long-dated contracts are too optimistic. "
                "If prices move against us, our reported earnings could be "
                "materially overstated. We should revisit the pricing curves.\n\n"
                "Sincerely,\nKaren\n"
            ),
            sender="karen.risk@enron.com",
            date="2000-10-02",
        ),
        dict(
            subject="Holiday schedule and trading coverage",
            body=(
                "Hi everyone,\n\n"
                "Please send your preferred holiday dates so we can plan "
                "trading coverage for December. We must maintain minimum "
                "desk coverage at all times. Managers, confirm back-up "
                "plans for critical roles.\n\n"
                "Thanks,\nHR\n"
            ),
            sender="hr@enron.com",
            date="2000-11-15",
        ),
        dict(
            subject="Discussion: possible conflict with accounting policies",
            body=(
                "Hi,\n\n"
                "During yesterday's discussion we identified that a few deals "
                "may not align perfectly with our current accounting policies. "
                "We need Accounting and Legal to jointly review them and "
                "document any exceptions or adjustments.\n\n"
                "Regards,\nDaniel\n"
            ),
            sender="daniel.finance@enron.com",
            date="2000-12-01",
        ),
        dict(
            subject="Employee concerns about transparency and communication",
            body=(
                "Leadership,\n\n"
                "Some employees have raised concerns about transparency in "
                "how performance metrics are calculated and used for bonuses. "
                "There is a perception that targets shift without clear explanation. "
                "We should consider a town hall to address these concerns.\n\n"
                "Best,\nInternal Comms\n"
            ),
            sender="comms@enron.com",
            date="2000-12-10",
        ),
        dict(
            subject="Urgent: counterparty credit downgrade",
            body=(
                "Risk Team,\n\n"
                "Our major counterparty NorthWest Power Co. has just been downgraded "
                "by the rating agencies. We need an urgent review of all open "
                "exposures with this counterparty and proposed mitigation steps "
                "by tomorrow morning.\n\n"
                "Thanks,\nCredit Risk\n"
            ),
            sender="credit.risk@enron.com",
            date="2001-01-05",
        ),
        dict(
            subject="Recap of regulatory briefing",
            body=(
                "Hello team,\n\n"
                "Today's regulatory briefing highlighted increased scrutiny on "
                "trading practices and potential new reporting requirements. "
                "Compliance will circulate a summary memo. Please review and "
                "flag any processes that might need changes.\n\n"
                "Regards,\nCompliance\n"
            ),
            sender="compliance@enron.com",
            date="2001-02-01",
        ),
        dict(
            subject="Project Falcon – status update",
            body=(
                "Hi project members,\n\n"
                "Project Falcon is moving into phase 2. We have completed the "
                "initial feasibility study and are now working on implementation "
                "plans for the risk system integration. We need input from IT, "
                "trading, and risk by next week.\n\n"
                "Thanks,\nProject Lead\n"
            ),
            sender="falcon.pm@enron.com",
            date="2001-03-12",
        ),
    ]

    emails: List[EmailRecord] = []
    for i, e in enumerate(raw_emails):
        emails.append(
            EmailRecord(
                id=f"email_{i}",
                subject=e["subject"],
                body=e["body"],
                sender=e["sender"],
                date=e["date"],
            )
        )

    print(f"Synthetic email corpus built with {len(emails)} emails.")
    return emails


def save_corpus_structured(emails: List[EmailRecord], out_dir: str = DATA_DIR):
    """
    Saves the email subset into:
    - CSV  : data/email_subset.csv
    - JSONL: data/email_subset.jsonl

    This satisfies the "structured format" requirement.
    """
    csv_path = os.path.join(out_dir, "email_subset.csv")
    jsonl_path = os.path.join(out_dir, "email_subset.jsonl")

    # CSV
    with open(csv_path, "w", newline="", encoding="utf-8") as f:
        writer = csv.writer(f)
        writer.writerow(["id", "subject", "body", "sender", "date"])
        for e in emails:
            writer.writerow([e.id, e.subject, e.body, e.sender or "", e.date or ""])

    # JSONL
    with open(jsonl_path, "w", encoding="utf-8") as f:
        for e in emails:
            obj = {
                "id": e.id,
                "subject": e.subject,
                "body": e.body,
                "sender": e.sender,
                "date": e.date,
            }
            f.write(json.dumps(obj) + "\n")

    print(f"Saved structured dataset to:\n  {csv_path}\n  {jsonl_path}")


# -------------------------------------------------------------------
# 2. BUILD EMBEDDINGS & VECTOR STORE (RAG INDEX)
# -------------------------------------------------------------------

def build_vector_store(emails: List[EmailRecord]) -> FAISS:
    """
    Embeds subject+body using OpenAIEmbeddings and stores them in FAISS.
    This is the "RAG index" over the email corpus.
    """
    print("Building embeddings & FAISS vector store...")
    emb = OpenAIEmbeddings(model=EMBED_MODEL)

    docs: List[Document] = []
    for e in emails:
        meta = {
            "email_id": e.id,
            "subject": e.subject,
            "sender": e.sender,
            "date": e.date,
        }
        content = f"Subject: {e.subject}\n\n{e.body}"
        docs.append(Document(page_content=content, metadata=meta))

    vectordb = FAISS.from_documents(docs, emb)
    print("Vector store ready with", len(docs), "documents.")
    return vectordb


# -------------------------------------------------------------------
# 3. RAG TOOL + SUPPORTING TOOLS
#    - RAG Tool: enron_rag_search
#    - Summarizer Tool
#    - Sentiment / Tone Analyzer Tool
# -------------------------------------------------------------------

def make_rag_retrieval_tool(vectordb: FAISS) -> Tool:
    """
    RAG tool: given a query, returns relevant email snippets.
    """
    retriever = vectordb.as_retriever(search_kwargs={"k": 5})

    def _rag_search(query: str) -> str:
        docs = retriever.get_relevant_documents(query)
        if not docs:
            return "No relevant emails found."

        blocks = []
        for i, d in enumerate(docs, start=1):
            meta = d.metadata
            block_lines = [
                f"[Email {i}]",
                f"ID: {meta.get('email_id')}",
                f"Subject: {meta.get('subject')}",
            ]
            if meta.get("sender"):
                block_lines.append(f"From: {meta.get('sender')}")
            if meta.get("date"):
                block_lines.append(f"Date: {meta.get('date')}")
            block_lines.append("")  # blank line
            block_lines.append(d.page_content)
            block_lines.append("-" * 60)
            blocks.append("\n".join(block_lines))

        return "\n\n".join(blocks)

    return Tool(
        name="enron_rag_search",
        func=_rag_search,
        description=(
            "Searches the (synthetic) Enron-like email corpus for information "
            "about projects, risks, legal issues, and internal communication. "
            "Input a natural-language query; returns relevant email snippets."
        ),
    )


def make_summarizer_tool(llm: ChatOpenAI) -> Tool:
    """
    Summarizer tool: given multiple email snippets, produce 3–5 bullet points.
    """
    prompt = PromptTemplate(
        input_variables=["emails_text"],
        template=(
            "You are summarizing internal corporate emails.\n"
            "Summarize the following email snippets into 3–5 bullet points.\n"
            "Focus on:\n"
            "- Who is involved\n"
            "- Main actions/decisions\n"
            "- Any risks, deadlines, or regulatory topics\n\n"
            "EMAIL SNIPPETS:\n{emails_text}\n\n"
            "Return only a bullet list."
        ),
    )
    chain = LLMChain(llm=llm, prompt=prompt)

    def _summarize(emails_text: str) -> str:
        return chain.run(emails_text=emails_text)

    return Tool(
        name="email_summarizer",
        func=_summarize,
        description=(
            "Summarizes one or more email snippets into concise bullet points."
        ),
    )


def make_sentiment_tool(llm: ChatOpenAI) -> Tool:
    """
    Sentiment / tone analyzer: classify mood (calm, urgent, conflict, etc.)
    and give a short explanation.
    """
    prompt = PromptTemplate(
        input_variables=["emails_text"],
        template=(
            "You are analyzing the tone of internal corporate emails.\n"
            "Given the email content, classify overall tone as one of:\n"
            "  - calm\n"
            "  - positive / collaborative\n"
            "  - urgent / stressed\n"
            "  - conflict / disagreement\n"
            "  - neutral\n\n"
            "Also provide a short explanation.\n\n"
            "EMAIL CONTENT:\n{emails_text}\n\n"
            "Return JSON with keys 'tone' and 'explanation'."
        ),
    )
    chain = LLMChain(llm=llm, prompt=prompt)

    def _analyze(emails_text: str) -> str:
        return chain.run(emails_text=emails_text)

    return Tool(
        name="email_sentiment_analyzer",
        func=_analyze,
        description=(
            "Analyzes overall tone/mood of one or more emails. "
            "Returns JSON with tone label and explanation."
        ),
    )


# -------------------------------------------------------------------
# 4. LLM, MODEL-ONLY BASELINE, RAG-ONLY, AND AGENTIC WORKFLOW
# -------------------------------------------------------------------

def make_llm() -> ChatOpenAI:
    """
    Helper for constructing the LLM used everywhere (model-only, RAG-only, tools).
    """
    return ChatOpenAI(
        model=OPENAI_MODEL,
        temperature=0.1,
    )


def model_only_answer(llm: ChatOpenAI, question: str) -> str:
    """
    Baseline: LLM has NO direct access to emails.
    It must answer based only on general knowledge and the question text.
    """
    system_prompt = (
        "You are an analyst answering questions about an Enron-like email corpus. "
        "You DO NOT have direct access to the actual emails, only general knowledge. "
        "If asked for specific details from particular emails, say that you cannot "
        "see the messages."
    )
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": question},
    ]
    response = llm.invoke(messages)
    return response.content


def rag_only_answer(llm: ChatOpenAI, vectordb: FAISS, question: str) -> str:
    """
    Classic RAG: retrieve top-k email snippets and stuff into the prompt context.
    No explicit tools, no agent decisions.
    """
    retriever = vectordb.as_retriever(search_kwargs={"k": 5})
    docs = retriever.get_relevant_documents(question)
    context_str = ""
    for i, d in enumerate(docs, start=1):
        context_str += f"[Doc {i}]\n{d.page_content}\n\n"

    prompt = (
        "You are analyzing internal emails. Use ONLY the snippets below to answer. "
        "If the answer is not clearly supported by the snippets, say so.\n\n"
        f"EMAIL CONTEXT:\n{context_str}\n\n"
        f"QUESTION: {question}\n\n"
        "ANSWER:"
    )
    response = llm.invoke(prompt)
    return response.content


def make_agent_with_tools(llm: ChatOpenAI, tools: List[Tool]):
    """
    Agentic workflow:
    - Tools: RAG search, summarizer, sentiment analyzer.
    - LLM decides when to call which tool.
    """
    system_prompt = (
        "You help the user explore an Enron-like email corpus using tools.\n"
        "AVAILABLE TOOLS:\n"
        "1) enron_rag_search: search for relevant emails.\n"
        "2) email_summarizer: summarize email snippets.\n"
        "3) email_sentiment_analyzer: analyze tone of emails.\n\n"
        "STRATEGY:\n"
        "- When a question needs specific evidence, first call enron_rag_search.\n"
        "- If results are long or span multiple emails, call email_summarizer.\n"
        "- If the question is about mood, conflict, or trust, call "
        "email_sentiment_analyzer.\n"
        "- Ground your final answer in the snippets you see. Be concise and clear."
    )

    agent = initialize_agent(
        tools=tools,
        llm=llm,
        agent=AgentType.OPENAI_FUNCTIONS,  # function-calling agent
        verbose=True,
        handle_parsing_errors=True,
        agent_kwargs={"system_message": system_prompt},
    )
    return agent


def agentic_answer(agent, question: str) -> str:
    """
    Convenience wrapper: one call to the agent for evaluation.
    """
    result = agent.invoke({"input": question})
    if isinstance(result, dict):
        return result.get("output", result.get("output_text", str(result)))
    return str(result)


# -------------------------------------------------------------------
# 5. EVALUATION
#    - 3 realistic tasks
#    - Compare: model-only vs RAG vs Agent (RAG+tools)
# -------------------------------------------------------------------

def run_evaluation(llm: ChatOpenAI, vectordb: FAISS, agent):
    tasks = [
        "Who appears to be coordinating or leading a key project or initiative?",
        "Summarize any legal or regulatory issues that show up in these emails.",
        "Describe the tone of communication around risk and accounting concerns.",
    ]

    results: List[Dict[str, Any]] = []

    for q in tasks:
        print("=" * 80)
        print("QUESTION:", q)
        print("=" * 80)

        print("\n[1] MODEL-ONLY (NO RAG)")
        model_only = model_only_answer(llm, q)
        print(model_only)

        print("\n[2] RAG-ONLY")
        rag_only = rag_only_answer(llm, vectordb, q)
        print(rag_only)

        print("\n[3] AGENT (RAG + TOOLS)")
        agent_ans = agentic_answer(agent, q)
        print(agent_ans)

        results.append(
            {
                "question": q,
                "model_only": model_only,
                "rag_only": rag_only,
                "agent": agent_ans,
            }
        )

    out_path = os.path.join(RESULTS_DIR, "enron_rag_evaluation_synthetic.json")
    with open(out_path, "w", encoding="utf-8") as f:
        json.dump(results, f, indent=2)

    print("\nSaved evaluation results to", out_path)


# -------------------------------------------------------------------
# 6. MAIN – RUN THE FULL PIPELINE
# -------------------------------------------------------------------

def main():
    # 1) Build & save dataset subset
    emails = build_synthetic_enron_corpus()
    save_corpus_structured(emails, out_dir=DATA_DIR)

    # 2) Build vector store (embeddings + FAISS)
    vectordb = build_vector_store(emails)

    # 3) LLM + tools
    llm = make_llm()
    rag_tool = make_rag_retrieval_tool(vectordb)
    summarizer_tool = make_summarizer_tool(llm)
    sentiment_tool = make_sentiment_tool(llm)
    tools = [rag_tool, summarizer_tool, sentiment_tool]

    # 4) Agentic RAG
    agent = make_agent_with_tools(llm, tools)

    # 5) Evaluation
    run_evaluation(llm, vectordb, agent)


# Run everything once
main()


Synthetic email corpus built with 10 emails.
Saved structured dataset to:
  data/email_subset.csv
  data/email_subset.jsonl
Building embeddings & FAISS vector store...
Vector store ready with 10 documents.
QUESTION: Who appears to be coordinating or leading a key project or initiative?

[1] MODEL-ONLY (NO RAG)
I don't have access to the specific emails to identify who is coordinating or leading a particular project or initiative. However, in the Enron email corpus, individuals such as Jeff Skilling, Kenneth Lay, and Andrew Fastow often appear in leadership roles related to major projects and initiatives. If you have a particular project or time frame in mind, I can provide general information about typical leadership roles during that period.

[2] RAG-ONLY
The Project Lead appears to be coordinating or leading a key project or initiative, specifically Project Falcon, as indicated in Doc 1.

[3] AGENT (RAG + TOOLS)


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: 