# Building a Research Agent with Pydantic AI.

In [None]:
!pip -q install pydantic-ai
!pip -q install nest_asyncio
!pip -q install devtools
!pip -q install duckduckgo-search
!pip -q install tavily-python

### **Restart the notebook**

You will need to restart the notebook after you've installed Pydantic AI and the other dependencies above.

In [None]:
import os
from google.colab import userdata
from IPython.display import display, Markdown

os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')
os.environ["GEMINI_API_KEY"] = userdata.get('GOOGLE_AI_STUDIO')
os.environ["GROQ_API_KEY"] = userdata.get('GROQ_API_KEY')
os.environ["TAVILY_API_KEY"] = userdata.get('TAVILY_API_KEY') # If you dont have this use the DDGS alternative below

In [None]:
import nest_asyncio
nest_asyncio.apply()

## Setting Up search Tavily & DDGS

I've put both Tavily search and DuckDuckGo search here. You should use DuckDuckGo search if you want a free alternative, but you can get an API key with Tavily which will also give you a number of free calls. Unfortunately, nowadays with DuckDuckGo search you often hit rate limits if you're not using a proxy system.

No need to use both, just using one of these is fine.

### DuckDuckGo Search

In [None]:
from duckduckgo_search import DDGS, AsyncDDGS

results = DDGS().text("python programming", max_results=5)
print(results)

In [None]:
results = AsyncDDGS().text("python programming", max_results=5)
print(results)

In [None]:
results = await AsyncDDGS().achat('describe the characteristic habits and behaviors of humans as a species')
print(results)

### Tavily Search

In [None]:
from tavily import TavilyClient, AsyncTavilyClient

# Setup the Tavily Client
tavily_client = AsyncTavilyClient(api_key=os.environ["TAVILY_API_KEY"])

# Simple Search
response = await tavily_client.search("Who is Leo Messi?", max_results=3)

print(response['results'])

In [None]:
# Responses
for result in response['results']:
    print(result['title'])
    print(result['content'])
    print('\n')

In [None]:
# RAG Context search

# Context Search
context = await tavily_client.get_search_context(query="Who is Leo Messi?", max_results=3)

print(context)

## Setting up the Agent & Tools

In [None]:
from __future__ import annotations as _annotations

import asyncio
import os
from dataclasses import dataclass
from typing import Any

from devtools import debug
from httpx import AsyncClient
import datetime

from pydantic_ai import Agent, ModelRetry, RunContext
from pydantic import BaseModel, Field

In [None]:
@dataclass
class SearchDataclass:
    max_results: int
    todays_date: str

@dataclass
class ResearchDependencies:
    todays_date: str

class ResearchResult(BaseModel):
    research_title: str = Field(description='This is a top level Markdown heading that covers the topic of the query and answer prefix it with #')
    research_main: str = Field(description='This is a main section that provides answers for the query and research')
    research_bullets: str = Field(description='This is a set of bulletpoints that summarize the answers for query')

In [None]:
## Make the agent
search_agent = Agent('openai:gpt-4o',
                     deps_type=ResearchDependencies,
                     result_type=ResearchResult,
                     system_prompt='Your a helpful research assistant, you are an expert in research '
                     'If you are given a question you write strong keywords to do 3-5 searches in total '
                     '(each with a query_number) and then combine the results')


In [None]:
@search_agent.tool #Tavily
async def get_search(search_data:RunContext[SearchDataclass], query: str, query_number: int) -> dict[str, Any]:
    """Get the search for a keyword query.

    Args:
        query: keywords to search.
    """
    print(f"Search query {query_number}: {query}")
    max_results = search_data.deps.max_results
    results = await tavily_client.get_search_context(query=query, max_results=max_results)

    return results

#### Uncomment this for the DDGS version

In [None]:
# @search_agent.tool # DDGS
# async def get_search(search_data:RunContext[SearchDataclass],query: str) -> dict[str, Any]:
#     """Get the search for a keyword query.

#     Args:
#         query: keywords to search.
#     """
#     print(f"Search query: {query}")
#     max_results = search_data.deps.max_results
#     results = await AsyncDDGS(proxy=None).atext(query, max_results=max_results)

#     return results

In [None]:
## set up the dependencies

# Get the current date
current_date = datetime.date.today()

# Convert the date to a string
date_string = current_date.strftime("%Y-%m-%d")

deps = SearchDataclass(max_results=3, todays_date=date_string)

In [None]:
result = await search_agent.run('can you give me a very detailed bio of Sam Altman?', deps=deps)

In [None]:
print(result.data)

In [None]:
result.data.research_title = '#' + result.data.research_title
print(result.data.research_title)

In [None]:
print(result.data.research_main)

In [None]:
print(result.data.research_bullets)

In [None]:
combined_markdown = "\n\n".join([result.data.research_title, result.data.research_main, result.data.research_bullets])

Markdown(combined_markdown)

In [None]:
debug(result)

In [None]:
result = await search_agent.run('What is the latest AI News?', deps=deps)

In [None]:
print(result.data)

## Add in the date

In [None]:
@search_agent.system_prompt
async def add_current_date(ctx: RunContext[ResearchDependencies]) -> str:
    todays_date = ctx.deps.todays_date
    system_prompt=f'Your a helpful research assistant, you are an expert in research \
                    If you are given a question you write strong keywords to do 3-5 searches in total \
                    (each with a query_number) and then combine the results \
                    if you need todays date it is {todays_date}'
    return system_prompt

In [None]:
result = await search_agent.run('What are the major AI News announcements in the last few days?', deps=deps)

In [None]:
print(result.data)

In [None]:
result.data.research_title = '#' + result.data.research_title
print(result.data.research_title)

In [None]:
print(result.data.research_main)

In [None]:
print(result.data.research_bullets)

In [None]:
combined_markdown = "\n\n".join([result.data.research_title, result.data.research_main, result.data.research_bullets])

Markdown(combined_markdown)

In [None]:
debug(result)