# Goal

Create a deep research agent using a planner, a researcher and a writer

# Web

Use Tavily

In [None]:
from pydantic_ai import Agent
from pydantic import BaseModel, Field
from pydantic_settings import BaseSettings
import os
from pydantic_ai import RunContext
from pydantic_ai.mcp import MCPServerStreamableHTTP
import logging
logging.basicConfig(level=logging.INFO)
logging.getLogger("pydantic").setLevel(logging.WARNING)
logging.getLogger("httpx").setLevel(logging.WARNING)
logging.getLogger("asyncio").setLevel(logging.WARNING)
logging.getLogger("httpx").setLevel(logging.WARNING)
logging.getLogger("httpx").setLevel(logging.WARNING)
logger = logging.getLogger("web_deep_researcher")

class Settings(BaseSettings):
    GOOGLE_API_KEY: str
    MODEL_NAME: str = "google-gla:gemini-2.5-pro"
    CONTEXT7_API_KEY: str
    TAVILY_API_KEY: str
    class Config:
        #ignore extra fields
        extra = "ignore"
        env_file = ".env"
        
class PlanOutput(BaseModel):
    outline: str = Field(description="The outline of the report")
    
class GetHumanInTheLoopInput(BaseModel):
    """Input for getting more information"""
    questions: str

class GetHumanInTheLoopOutput(BaseModel):
    """Response for getting more information"""
    answer: str
    
class GetPlanInput(BaseModel):
    """Input for getting a plan"""
    instructions_for_plan: str

class GetPlanOutput(BaseModel):
    """Response for getting a plan"""
    plan: str
    
class reportOutput(BaseModel):
    report: str
    
class Section(BaseModel):
    title: str = Field(description="The title/subtitle of the section")
    content: str = Field(description="The content of the section, that answers the title of the section")
    references: str = Field(description="The references of the section, that are used to answer the title of the section")

class documentationOutput(BaseModel):
    sections: list[Section] = Field(description="The sections of the report, that are used to answer the outline")
    
class web_deep_researcher:
    def __init__(self):
        self.settings = Settings()
        os.environ["GOOGLE_API_KEY"] = self.settings.GOOGLE_API_KEY
    
    async def execute(self, question:str):
        # Create the plan
        logger.info(f"Creating the plan for the question: {question}")
        plan = await self.plan(question)
        # Get the information
        logger.info(f"Getting the information for the plan: {plan}")
        information = await self.get_information(plan)
        # Get the report
        logger.info(f"Getting the report for the information: {information}")
        report = await self.get_report(information, plan)
        return report
    # Define the Planning Agent
    async def plan(self, question:str):
        planning_agent = Agent(
            model=self.settings.MODEL_NAME,
            system_prompt="""You are a planning assistant. Your Job is to create an outline for a report for an user. You have access to tools to help you answer questions.
1. Assess which tool you should use to answer the question.
2. Use get_human_in_the_loop to get user input in case of ambiguity. Provide your question.
3. Use create_plan to create a plan based on what you understand about what the user is asking for. The default outline is the following:
    - Summary 
    - Introduction
    - Sections and subsections
    - Conclusion
    - References
4. If you think the question is too complex or not relevant, first use get_human_in_the_loop to get user input. if user input is ambiguous, then respond with 'I don't know how to help you with that'.
5. After making the plan, use human_in_the_loop to ask the user if the plan looks good, if the user asks for changes, use_create_plan with the changes made by the user. If the user is happy with the plan, respond with the final plan.""",
            output_type=PlanOutput
        )
        # Tools
        @planning_agent.tool
        async def get_plan(_: RunContext[GetPlanInput], instructions_for_plan: str) -> GetPlanOutput:
            outline_agent = Agent(self.settings.MODEL_NAME,
                        instructions="You are an experienced planner for a research project, you will be given a query from the user and you need to make an outline of the report to give the user about it, and what to search for in the web in order to create the report",
                        output_type=GetPlanOutput)
            result = await outline_agent.run(instructions_for_plan)
            return GetPlanOutput(plan=result.output.plan)
        
        @planning_agent.tool
        def get_human_in_the_loop(_: RunContext[GetHumanInTheLoopInput], question: str) -> GetHumanInTheLoopOutput:
            input_str = input(f"Please provide your input for the question > '{question}': ")
            return GetHumanInTheLoopOutput(answer=input_str)
        
        nodes = []
        # Run the Planning Agent
        async with planning_agent.iter(
            question,
        ) as agent_run:
            async for node in agent_run:
                # Each node represents a step in the agent's execution
                logger.info(f"Node: {node}")
                nodes.append(node)
                
        result = nodes[-1].data.output.outline
        
        return result
        
    # Define the Research Agent
    async def get_information(self, plan:str):
        tavily_server = MCPServerStreamableHTTP(
            url =f'https://mcp.tavily.com/mcp/?tavilyApiKey={self.settings.TAVILY_API_KEY}'
        )
        mcp_agent = Agent(self.settings.MODEL_NAME, toolsets=[tavily_server], 
            instructions="""You are a helpful AI assistant tasked with getting content for a comprehensive report based on a given outline. Your goal is to research and compile current information for each section and subsection of the outline using the DuckDuckGo MCP server. Follow these steps to complete the task:

                1. Review the provided outline

                2. For each section and subsection in the outline:
                a. Use the DuckDuckGo MCP server to search for current information related to the topic.
                b. Review the search results and identify the most relevant and reliable sources.
                c. After fetching the content, analyze and summarize the information relevant to the outline section.

                3. Write the report section by section, following the structure of the outline. For each section:
                a. Provide a brief introduction to the topic.
                b. Include relevant information gathered from your research.
                c. Ensure that the content is well-organized and flows logically.
                d. Use appropriate transitions between subsections and main sections.

                4. Throughout the writing process, keep track of the sources you've used. For each source, note:
                - The title of the webpage or article
                - The author (if available)
                - The website name
                - The URL
                - The date you accessed the information
                """,
                    output_type=documentationOutput
            )
        # Run the Research Agent
        async with mcp_agent:  
            nodes = []
            async with mcp_agent.iter(
                f"The outline of the report is: {plan}",
            ) as agent_run:
                async for node in agent_run:
                    # Each node represents a step in the agent's execution
                    logger.info(f"Node: {node}")
                    nodes.append(node)
        result = nodes[-1].data.output.sections
        return result
    
    # Run the Writer Agent
    async def get_report(self, information:list[Section], plan:str):
        writer_agent = Agent(self.settings.MODEL_NAME,
                instructions="You are a writer, you write a report about a given outline, you use the information provided as well as your own knowledge to write the report",
                        output_type=reportOutput
        )
        result = await writer_agent.run(f"""Write a report about the following outline: {plan}
        A Researcher has found the following information for each section: {information}
        Please use this to create a comprehesive report""")
        return result.output.report

In [4]:
web_deep_researcher = web_deep_researcher()
result = await web_deep_researcher.execute("how can I use pydantic-ai")

INFO:web_deep_researcher:Creating the plan for the question: how can I use pydantic-ai
INFO:web_deep_researcher:Node: UserPromptNode(user_prompt='how can I use pydantic-ai', instructions=None, instructions_functions=[], system_prompts=("You are a planning assistant. Your Job is to create an outline for a report for an user. You have access to tools to help you answer questions.\n        1. Assess which tool you should use to answer the question.\n        2. Use get_human_in_the_loop to get user input in case of ambiguity. Provide your question.\n        3. Use create_plan to create a plan based on what you understand about what the user is asking for. The default outline is the following:\n            - Summary \n            - Introduction\n            - Sections and subsections\n            - Conclusion\n            - References\n        4. If you think the question is too complex or not relevant, first use get_human_in_the_loop to get user input. if user input is ambiguous, then resp

In [6]:
print(result)

Report Outline: Pydantic in AI Applications

**1. Summary**
Pydantic has emerged as a crucial library in modern AI development, primarily by providing robust data validation and structuring capabilities. Its integration offers key benefits such as improved type safety, reduced debugging time, and enhanced reliability of AI systems. Pydantic facilitates the creation of predictable data flows, which is essential for complex AI/ML workflows. Practical implications span various use cases, including the validation of inputs and outputs for AI models, enforcing structured responses from Large Language Models (LLMs), defining tools for AI agents, and establishing clear data schemas for MLOps pipelines.

**2. Introduction**
This report explores the indispensable role of Pydantic in shaping the landscape of modern AI applications, addressing critical data management and workflow challenges.

**2.1 What is Pydantic?**
Pydantic is a powerful Python library designed for data validation and setting