In [1]:
from typing_extensions import TypedDict
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel, Field
from langchain_tavily import TavilySearch

from dotenv import load_dotenv
from prompt import INDEPENDENT_AGENT_PROMPT, WORKER_PROMPT, QUERY_SPLITTER_PROMPT

import time


In [2]:
WEB_SEARCH_RELEVANCE_SCORE_THRESHOLD = 0.80

class IndependentAgentState(TypedDict):
    question: str
    model: ChatGoogleGenerativeAI
    depth: int
    breadth: int

class IndependentAgentOutput(BaseModel):
    sub_queries: list[str] = Field(
        description="List of sub-queries to be answered independently"
    )

class WorkerOutput(BaseModel):
    answer: str = Field(
        description="The answer to the question"
    )

class QuerySplitterOutput(BaseModel):
    can_split: bool = Field(
        description="Whether the query can be split into sub-queries"
    )

class WebSearchResult(BaseModel):
    cited_url: str = Field(description="The URL of the web page")
    content: str = Field(description="The content of the web page")
    score: float = Field(description="The relevance score of the web page")

In [3]:
def build_model(agentState):
    agentState['model'] = ChatGoogleGenerativeAI(model="gemini-2.5-pro")

def sleep_thread():
    sleep_time = 1
    print(f"Sleeping for {sleep_time} seconds to avoid rate limits...")
    time.sleep(sleep_time)
    print(f"Woke Up!")

In [4]:
def can_split_into_subtasks(query: str, model: ChatGoogleGenerativeAI) -> bool:
    """ Determine if the query can be split into sub-tasks """
    model_with_structured_output = model.with_structured_output(QuerySplitterOutput)
    query_splitter_prompt = ChatPromptTemplate.from_messages(
        [
            ("system", QUERY_SPLITTER_PROMPT),
            ("human", "Query: \n\n {query}"),
        ]
    )

    query_splitter_retriever = query_splitter_prompt | model_with_structured_output
    query_splitter_response: QuerySplitterOutput = query_splitter_retriever.invoke({"query": query})
    sleep_thread()

    return query_splitter_response.can_split

In [5]:
def parse_web_search_results(web_search_result):
    search_results = []

    for result in web_search_result['results']:
        cited_url = result['url']
        search_content = result['content']
        search_score = result['score']

        if search_score >= WEB_SEARCH_RELEVANCE_SCORE_THRESHOLD:
            search_results.append(
                WebSearchResult(
                    cited_url=cited_url, content=search_content, score=search_score))
    
    return search_results

def web_search(query):
    print(f"\n Performing Web Search for {query} \n")

    web_search_tool = TavilySearch(max_results=5)
    web_results = web_search_tool.invoke({"query": query})

    return parse_web_search_results(web_results)

In [6]:
def supervisor(agentState: IndependentAgentState):
    query = agentState['question']
    model = agentState['model']
    current_depth = agentState['depth']
    print(f"Initiating Supervisor for '{query}'...")
    
    can_split = False
    if current_depth > 0:
        can_split = can_split_into_subtasks(query, model)
        print(f"Should agent split the query? => {can_split}")
    else:
        print(f"Depth Limit of {current_depth} reached")

    report = ""

    if can_split:
        report = independent_agent(agentState)
    else: 
        report = worker(query, model)
    
    return report


def independent_agent(agentState: IndependentAgentState):
    query = agentState['question']
    model = agentState['model']
    print(f"Initiating Independent Agent for '{query}'...")

    model_with_structured_output = model.with_structured_output(IndependentAgentOutput)
    independent_agent_prompt = ChatPromptTemplate.from_messages(
        [
            ("system", INDEPENDENT_AGENT_PROMPT),
            ("human", "Query: \n\n {query} \n\n, Limit: {limit}"),
        ]
    )

    independent_agent_retriever = independent_agent_prompt | model_with_structured_output

    independent_sub_tasks: IndependentAgentOutput = independent_agent_retriever.invoke(
        { "query": query, "limit": agentState['breadth'] }
    )
    sleep_thread()

    research_reports = ["# Research Report"]

    current_depth: int = agentState['depth']

    for sub_query in independent_sub_tasks.sub_queries:
        supervisor_agent_state: IndependentAgentState = {
            'question': sub_query,
            'depth': current_depth - 1, # Reducing the depth for each sub-task
            'breadth': agentState['breadth'],
            'model': agentState['model'], 
        }
        supervisor_response = supervisor(supervisor_agent_state)

        research_reports.append(
            " \n ## " + sub_query + "\n\n" + supervisor_response)

    return '\n\n'.join(research_reports)


def worker(query: str, model: ChatGoogleGenerativeAI):
    """ A straight-forward worker that just answers the query """
    print(f"Researching for the Query => '{query}'...")
    web_search_results = web_search(query)
    print("Web Search completed!")

    model_with_structured_output = model.with_structured_output(WorkerOutput)
    worker_prompt = ChatPromptTemplate.from_messages(
        [
            ("system", WORKER_PROMPT),
            ("human", "Query: \n\n {query} \n\n, Web Search Results: {web_search_results}"),
        ]
    )

    worker_retriever = worker_prompt | model_with_structured_output
    worker_result: WorkerOutput = worker_retriever.invoke({"query": query, "web_search_results": web_search_results})

    sleep_thread()
    print("Research Done!")

    return worker_result.answer


In [14]:
load_dotenv()  # Load environment variables from .env file

DEPTH = 2
BREADTH = 5

model = ChatGoogleGenerativeAI(model="gemini-2.5-pro")
response = supervisor({
    "model": model,
    "question": 'What are the investment philosophies of Duan Yongping, Warren Buffett, and Charlie Munger?',
    "depth": DEPTH,
    "breadth": BREADTH,
})

Initiating Supervisor for 'What are the investment philosophies of Duan Yongping, Warren Buffett, and Charlie Munger?'...
Sleeping for 5 seconds to avoid rate limits...
Woke Up!
Should agent split the query? => True
Initiating Independent Agent for 'What are the investment philosophies of Duan Yongping, Warren Buffett, and Charlie Munger?'...
Sleeping for 5 seconds to avoid rate limits...
Woke Up!
Initiating Supervisor for 'What is the investment philosophy of Duan Yongping?'...
Sleeping for 5 seconds to avoid rate limits...
Woke Up!
Should agent split the query? => False
Researching for the Query => 'What is the investment philosophy of Duan Yongping?'...

 Performing Web Search for What is the investment philosophy of Duan Yongping? 

Sleeping for 5 seconds to avoid rate limits...
Woke Up!
Research Done!
Initiating Supervisor for 'What is the investment philosophy of Warren Buffett?'...
Sleeping for 5 seconds to avoid rate limits...
Woke Up!
Should agent split the query? => False
Res

In [15]:
with open("research_output/output3.md", "w") as file_object:
    # Write the string to the file
    file_object.write(response)


In [11]:
research_file_outputs = []
for ind in range(100):
    research_output = {
        "id": ind + 1,
        "prompt": "",
        "article": "",
    }
    research_file_outputs.append(str(research_output))

research_file_outputs = '\n'.join(research_file_outputs)
research_file_outputs = research_file_outputs.replace("'", '"')

In [12]:
research_file_outputs

'{"id": 1, "prompt": "", "article": ""}\n{"id": 2, "prompt": "", "article": ""}\n{"id": 3, "prompt": "", "article": ""}\n{"id": 4, "prompt": "", "article": ""}\n{"id": 5, "prompt": "", "article": ""}\n{"id": 6, "prompt": "", "article": ""}\n{"id": 7, "prompt": "", "article": ""}\n{"id": 8, "prompt": "", "article": ""}\n{"id": 9, "prompt": "", "article": ""}\n{"id": 10, "prompt": "", "article": ""}\n{"id": 11, "prompt": "", "article": ""}\n{"id": 12, "prompt": "", "article": ""}\n{"id": 13, "prompt": "", "article": ""}\n{"id": 14, "prompt": "", "article": ""}\n{"id": 15, "prompt": "", "article": ""}\n{"id": 16, "prompt": "", "article": ""}\n{"id": 17, "prompt": "", "article": ""}\n{"id": 18, "prompt": "", "article": ""}\n{"id": 19, "prompt": "", "article": ""}\n{"id": 20, "prompt": "", "article": ""}\n{"id": 21, "prompt": "", "article": ""}\n{"id": 22, "prompt": "", "article": ""}\n{"id": 23, "prompt": "", "article": ""}\n{"id": 24, "prompt": "", "article": ""}\n{"id": 25, "prompt": "",

In [13]:
with open("research_output/base_result.jsonl", "w") as file_object:
    # Write the string to the file
    file_object.write(research_file_outputs)