In [None]:
# Install dependencies (run once per environment)
%pip install -q dspy python-dotenv


In [None]:
# Basic imports and environment setup
import os
import dspy
from dotenv import load_dotenv
from dspy import History

# Load API keys from .env
load_dotenv()

# Configure model provider (OpenAI-only, per LangGraph agent)
lm = dspy.LM("openai/gpt-5-mini", api_key=os.getenv("OPENAI_API_KEY"), temperature=0.2, max_tokens=12000)

dspy.configure(lm=lm)

# Conversation memory
conversation_history = History(messages=[])

print("DSPy configured for Deep Research agent.")


In [None]:
# Utils: history sanitization and helpers
from dspy import History as _History
from datetime import datetime


def sanitize_history(history: _History) -> _History:
    """Return a new History with only valid, non-empty messages.
    - Keeps only dicts with role in {user, assistant} and non-empty content
    - Converts legacy entries with keys 'user_message'/'answer' into role/content
    """
    new_messages = []
    for m in getattr(history, "messages", []):
        if isinstance(m, dict):
            role = m.get("role")
            content = m.get("content")
            if role in ("user", "assistant") and isinstance(content, str) and content.strip():
                new_messages.append({"role": role, "content": content})
                continue
            if isinstance(m.get("user_message"), str) and m["user_message"].strip():
                new_messages.append({"role": "user", "content": m["user_message"]})
                continue
            if isinstance(m.get("answer"), str) and m["answer"].strip():
                new_messages.append({"role": "assistant", "content": m["answer"]})
                continue
    return _History(messages=new_messages)


def get_today_str() -> str:
    now = datetime.now()
    return f"{now:%a} {now:%b} {now.day}, {now:%Y}"


In [None]:
# Tools: ported from LangGraph (OpenAI-only research + think tool)
from typing import List, Literal


def openai_search(queries: List[str], max_results: int = 5, topic: Literal["general", "news"] = "general") -> str:
    """Generate comprehensive research responses using the model's knowledge base.

    Mirrors the LangGraph `openai_search` tool behavior at a high level.
    We keep a simple sequential loop for clarity in DSPy.
    """
    if not isinstance(queries, list) or not queries:
        return "No research results could be generated. Please provide queries."

    results = []
    for q in queries[:max_results]:
        if topic == "news":
            prompt = f"""Please provide a comprehensive research summary for the following current events query: "{q}"

Focus on recent developments and provide:
1. Key facts and developments
2. Timeline of important events
3. Current status and implications
4. Sources and references you know of

Be thorough and objective. Include dates where relevant."""
        else:
            prompt = f"""Please provide comprehensive information for the following research query: "{q}"

Structure your response to include:
1. Key facts and background information
2. Important details and context
3. Current understanding and implications
4. Any relevant examples or case studies

Be thorough and provide detailed, accurate information based on your knowledge."""
        # Single-turn call
        resp = dspy.Predict("answer")(question=prompt)  # lightweight call
        content = getattr(resp, "answer", "") if isinstance(resp, dspy.Prediction) else str(resp)
        results.append(f"--- RESEARCH RESULT: {q} ---\n{content}\n")

    if not results:
        return "No research results could be generated."
    return (f"OpenAI Research Results ({topic} focus):\n\n" + "\n\n".join(results)).strip()


def think_tool(reflection: str) -> str:
    """Strategic reflection tool for research planning.

    Use after searches to analyze results and plan next steps.
    """
    return f"Reflection recorded: {reflection}"


In [None]:
# Signatures for each phase (mirroring LangGraph prompts)

class ClarifySignature(dspy.Signature):
    """
    Analyze user's messages and decide whether to ask a clarifying question.

    Return JSON-like outputs:
    - need_clarification: boolean
    - question: clarifying question to ask
    - verification: verification message when proceeding
    """
    messages: dspy.History = dspy.InputField()
    date: str = dspy.InputField()

    need_clarification: bool = dspy.OutputField()
    question: str = dspy.OutputField()
    verification: str = dspy.OutputField()


class BriefSignature(dspy.Signature):
    """Turn messages into a structured research brief."""
    messages: dspy.History = dspy.InputField()
    date: str = dspy.InputField()

    research_brief: str = dspy.OutputField()


class GenerateQueriesSignature(dspy.Signature):
    """Generate diverse, concrete queries from the brief."""
    messages: dspy.History = dspy.InputField()
    research_brief: str = dspy.InputField()
    date: str = dspy.InputField()
    num_queries: int = dspy.InputField()

    queries_text: str = dspy.OutputField(description="One query per line, no numbering")


class FinalReportSignature(dspy.Signature):
    """Generate a concise, well-structured final report from findings."""
    research_brief: str = dspy.InputField()
    messages: dspy.History = dspy.InputField()
    findings: str = dspy.InputField()
    date: str = dspy.InputField()

    final_report: str = dspy.OutputField()


In [None]:
# Simple phase predictors (useful to keep behavior explicit for interns)

clarify_predict = dspy.Predict(ClarifySignature)
brief_predict = dspy.Predict(BriefSignature)
queries_predict = dspy.Predict(GenerateQueriesSignature)
final_report_predict = dspy.Predict(FinalReportSignature)


In [None]:
# Orchestrator: clarify → brief → generate queries → run queries → final report

MAX_CLARIFICATION_ATTEMPTS = 3
DEFAULT_NUM_QUERIES = 6


def _history_to_text(history: History) -> History:
    """Ensure History is sanitized and ready."""
    return sanitize_history(history)


def _parse_queries(text: str, limit: int) -> list:
    lines = [q.strip("- ") for q in (text or "").splitlines() if q.strip()]
    return lines[: int(limit)]


def run_deep_research(user_message: str, *, allow_clarification: bool = True, num_queries: int = DEFAULT_NUM_QUERIES) -> dict:
    """Entry point returning a JSON-like dict with final_report and intermediates.

    Inputs mirror LangGraph: user messages feed a multi-phase pipeline.
    """
    if not isinstance(user_message, str) or not user_message.strip():
        return {"final_report": "", "status": "reject", "notes": []}

    # Update history
    conversation_history.messages.append({"role": "user", "content": user_message})
    safe_history = _history_to_text(conversation_history)
    date_str = get_today_str()

    # Phase 1: Clarify (optional)
    attempts = 0
    if allow_clarification:
        while attempts < MAX_CLARIFICATION_ATTEMPTS:
            c = clarify_predict(messages=safe_history, date=date_str)
            need = getattr(c, "need_clarification", False)
            question = getattr(c, "question", "")
            verification = getattr(c, "verification", "")
            attempts += 1
            if need and question:
                # Ask user one clarifying question and stop here (mimic END)
                conversation_history.messages.append({"role": "assistant", "content": question})
                return {"final_report": "", "status": "clarify", "question": question}
            else:
                if verification:
                    conversation_history.messages.append({"role": "assistant", "content": verification})
                break

    # Phase 2: Brief
    b = brief_predict(messages=safe_history, date=date_str)
    research_brief = getattr(b, "research_brief", "").strip()

    # Phase 3: Generate queries
    q_pred = queries_predict(messages=safe_history, research_brief=research_brief, date=date_str, num_queries=int(num_queries))
    queries = _parse_queries(getattr(q_pred, "queries_text", ""), limit=int(num_queries))

    # Phase 4: Run queries (OpenAI-only search)
    findings = openai_search(queries=queries, max_results=len(queries) or int(num_queries), topic="general")

    # Phase 5: Final report
    report = final_report_predict(research_brief=research_brief, messages=safe_history, findings=findings, date=date_str)
    final_report = getattr(report, "final_report", "")

    # Append to history
    if final_report.strip():
        conversation_history.messages.append({"role": "assistant", "content": final_report})

    return {
        "final_report": final_report,
        "status": "ok",
        "research_brief": research_brief,
        "queries": queries,
        "findings": findings,
    }


In [None]:
# Examples / smoke tests

print("\n--- Clarify path (likely) ---")
resp = run_deep_research("I need a marketing plan")
print({k: resp[k] for k in resp if k in ("status", "question")})

print("\n--- Direct report path ---")
resp2 = run_deep_research("Compare Tesla FSD approaches vs Waymo safety stack in 2024.")
print({k: (resp2[k][:120] + "...") if isinstance(resp2.get(k), str) and k == "final_report" else resp2[k] for k in resp2 if k in ("status", "research_brief", "queries", "final_report")})
